├── LICENSE ├── data └── NIRsoil.RData ├── tests ├── testthat.R └── testthat │ ├── test-movav.R │ ├── test-binning.R │ ├── test-gapDer.R │ ├── test-blockNorm.R │ ├── test-detrend.R │ ├── test-savitzkyGolay.R │ ├── test-standardNormalVariate.R │ ├── test-resample.R │ ├── test-continuumRemoval.R │ ├── test-baseline.R │ ├── test-resample2.R │ ├── test-msc.R │ ├── test-honigs.R │ ├── test-shenkWest.R │ ├── test-blockScale.R │ ├── test-duplex.R │ ├── test-naes.R │ └── test-kenStone.R ├── vignettes ├── logo.jpg └── prospectr.bib ├── man ├── figures │ ├── logo.png │ ├── lifecycle-stable.svg │ ├── lifecycle-defunct.svg │ ├── lifecycle-archived.svg │ ├── lifecycle-maturing.svg │ ├── lifecycle-deprecated.svg │ ├── lifecycle-superseded.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-questioning.svg │ └── lifecycle-soft-deprecated.svg ├── sqrtSm.Rd ├── get_nircal_response.Rd ├── get_nircal_indices.Rd ├── bitAND.Rd ├── convCppM.Rd ├── get_nircal_spectra.Rd ├── get_nircal_lengthspc.Rd ├── pkg_info.Rd ├── get_nircal_comments.Rd ├── get_nircal_description.Rd ├── get_msc_coeff.Rd ├── get_nircal_metadata.Rd ├── Cul.Rd ├── fastDist.Rd ├── resample_fwhm.Rd ├── fastDistV.Rd ├── movav.Rd ├── resample.Rd ├── blockNorm.Rd ├── e2m.Rd ├── blockScale.Rd ├── standardNormalVariate.Rd ├── NIRsoil.Rd ├── binning.Rd ├── spliceCorrection.Rd ├── read_nircal.Rd ├── baseline.Rd ├── resample2.Rd ├── detrend.Rd ├── readASD.Rd ├── continuumRemoval.Rd ├── prospectr-package.Rd ├── msc.Rd ├── savitzkyGolay.Rd ├── gapDer.Rd ├── shenkWest.Rd ├── cochranTest.Rd ├── honigs.Rd ├── naes.Rd ├── duplex.Rd ├── puchwein.Rd └── kenStone.Rd ├── CRAN-SUBMISSION ├── .Rbuildignore ├── .gitignore ├── R ├── pkg_info.R ├── Cul.R ├── AAA.R ├── NIRsoil.R ├── standardNormalVariate.R ├── blockScale.R ├── movav.R ├── resample.R ├── blockNorm.R ├── e2m.R ├── resample2.R ├── RcppExports.R ├── detrend.R ├── binning.R ├── msc.R ├── baseline.R ├── spliceCorrection.R ├── prospectr.R ├── continuumRemoval.R ├── savitzkyGolay.R ├── shenkWest.R ├── gapDer.R ├── cochranTest.R ├── honigs.R └── naes.R ├── .travis.yml ├── src ├── Makevars.win ├── Makevars └── RcppExports.cpp ├── inst └── CITATION ├── NAMESPACE ├── appveyor.yml ├── DESCRIPTION ├── cran-comments.md ├── README.md └── README.Rmd /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2014 - 2022 2 | COPYRIGHT HOLDER: Leonardo Ramirez Lopez 3 | -------------------------------------------------------------------------------- /data/NIRsoil.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/l-ramirez-lopez/prospectr/HEAD/data/NIRsoil.RData -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(prospectr) 3 | 4 | test_check("prospectr") 5 | -------------------------------------------------------------------------------- /vignettes/logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/l-ramirez-lopez/prospectr/HEAD/vignettes/logo.jpg -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/l-ramirez-lopez/prospectr/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.2.8 2 | Date: 2025-03-05 22:54:12 UTC 3 | SHA: e274cccf3222b7577a72f2bc7cad064d7ca7a545 4 | -------------------------------------------------------------------------------- /tests/testthat/test-movav.R: -------------------------------------------------------------------------------- 1 | context("test-movav") 2 | 3 | test_that("movav works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_movav <- movav(NIRsoil$spc, 5) 7 | 8 | expect_is(X_movav, "matrix") 9 | expect_true(round(max(X_movav[1, ]), 5) == 0.37237) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-binning.R: -------------------------------------------------------------------------------- 1 | context("test-binning") 2 | 3 | test_that("binning works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_binning <- binning(NIRsoil$spc) 7 | 8 | expect_is(X_binning, "matrix") 9 | expect_true(round(max(X_binning[1, ]), 5) == 0.37257) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-gapDer.R: -------------------------------------------------------------------------------- 1 | context("test-gapDer") 2 | 3 | test_that("gapDer works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_gapDer <- gapDer(NIRsoil$spc, m = 1, w = 3) 7 | 8 | expect_is(X_gapDer, "matrix") 9 | expect_true(round(max(abs(X_gapDer[1, ])), 5) == 0.00517) 10 | }) 11 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | .gitignore 4 | .Rhistory 5 | README.Rmd 6 | .travis.yml 7 | ^appveyor\.yml$ 8 | cran-comments.md 9 | ^my-comments\.md$ 10 | code_ex.R 11 | ^Rscript* 12 | ^packrat/ 13 | ^\.Rprofile$c 14 | TODO 15 | epo.R 16 | NEWS.html 17 | 18 | ^CRAN-SUBMISSION$ 19 | -------------------------------------------------------------------------------- /tests/testthat/test-blockNorm.R: -------------------------------------------------------------------------------- 1 | context("test-blockNorm") 2 | 3 | test_that("blockNorm works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_blockNorm <- blockNorm(NIRsoil$spc) 7 | 8 | expect_is(X_blockNorm, "list") 9 | expect_true(round(max(X_blockNorm$Xscaled[1, ]), 5) == 0.00146) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test-detrend.R: -------------------------------------------------------------------------------- 1 | context("test-detrend") 2 | 3 | test_that("detrend works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_detrend <- detrend(NIRsoil$spc, wav = as.numeric(colnames(NIRsoil$spc))) 7 | 8 | expect_is(X_detrend, "matrix") 9 | expect_true(round(max(X_detrend[1, ]), 5) == 2.57863) 10 | }) 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | src/*.o 5 | src/*.so 6 | src/*.dll 7 | .Rbuildignore 8 | prospectr\.Rproj 9 | todo.md 10 | src-i386/ 11 | src-x64/ 12 | figure/ 13 | prospectr-intro\.Rmd 14 | prospectr-intro\.bib 15 | prospectr-intro\.html 16 | prospectr-intro\.md 17 | doc 18 | Meta 19 | vignettes/prospectr.html 20 | -------------------------------------------------------------------------------- /man/sqrtSm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/e2m.R 3 | \name{sqrtSm} 4 | \alias{sqrtSm} 5 | \title{Square root of (square) symetric matrices} 6 | \usage{ 7 | sqrtSm(X, method = c("svd", "eigen")) 8 | } 9 | \description{ 10 | Square root of (square) symetric matrices 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-savitzkyGolay.R: -------------------------------------------------------------------------------- 1 | context("test-savitzkyGolay") 2 | 3 | test_that("savitzkyGolay works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_savitzkyGolay <- savitzkyGolay(NIRsoil$spc, m = 1, p = 1, w = 3) 7 | 8 | expect_is(X_savitzkyGolay, "matrix") 9 | expect_true(round(max(abs(X_savitzkyGolay[1, ])), 5) == 0.00528) 10 | }) 11 | -------------------------------------------------------------------------------- /man/get_nircal_response.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_response} 4 | \alias{get_nircal_response} 5 | \title{get the response variables in the nircal file} 6 | \usage{ 7 | get_nircal_response(x, n) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /R/pkg_info.R: -------------------------------------------------------------------------------- 1 | #' @title Get the package version info 2 | #' @description returns package info. 3 | #' @param pkg the package name i.e "prospectr" 4 | #' @keywords internal 5 | pkg_info <- function(pkg = "prospectr") { 6 | fld <- c("Version", "Config/VersionName", "URL") 7 | pinfo <- read.dcf(system.file("DESCRIPTION", package = pkg), fields = fld) 8 | pinfo 9 | } -------------------------------------------------------------------------------- /man/get_nircal_indices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_indices} 4 | \alias{get_nircal_indices} 5 | \title{get the positions of relevant data witihi the nircal file} 6 | \usage{ 7 | get_nircal_indices(x) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/bitAND.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{bitAND} 4 | \alias{bitAND} 5 | \title{bitwise operations} 6 | \usage{ 7 | bitAND(aa, bb) 8 | } 9 | \arguments{ 10 | \item{aa}{integer} 11 | 12 | \item{bb}{integer} 13 | } 14 | \description{ 15 | bitwise operations in C++ 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/convCppM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{convCppM} 4 | \alias{convCppM} 5 | \title{Convolve} 6 | \usage{ 7 | convCppM(X, f) 8 | } 9 | \arguments{ 10 | \item{X}{matrix to convolve} 11 | 12 | \item{f}{filter} 13 | } 14 | \description{ 15 | Convolution, written in C++ 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/get_nircal_spectra.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_spectra} 4 | \alias{get_nircal_spectra} 5 | \title{get the spectra in the nircal file} 6 | \usage{ 7 | get_nircal_spectra(x, values_s, spctra_start, speclength, n) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-standardNormalVariate.R: -------------------------------------------------------------------------------- 1 | context("test-standardNormalVariate") 2 | 3 | test_that("standardNormalVariate works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_standardNormalVariate <- standardNormalVariate(NIRsoil$spc) 7 | 8 | expect_is(X_standardNormalVariate, "matrix") 9 | expect_true(round(max(X_standardNormalVariate[1, ]), 5) == 2.63444) 10 | }) 11 | -------------------------------------------------------------------------------- /man/get_nircal_lengthspc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_lengthspc} 4 | \alias{get_nircal_lengthspc} 5 | \title{get the number of spectral variables in the nircaa file} 6 | \usage{ 7 | get_nircal_lengthspc(connection, from, to) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-resample.R: -------------------------------------------------------------------------------- 1 | context("test-resample") 2 | 3 | test_that("resample works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_resample <- resample( 7 | NIRsoil$spc, as.numeric(colnames(NIRsoil$spc)), 8 | seq(1100, 2500, 10) 9 | ) 10 | 11 | expect_is(X_resample, "matrix") 12 | 13 | expect_true(round(max(abs(X_resample[1, ])), 5) == 0.37288) 14 | }) 15 | -------------------------------------------------------------------------------- /man/pkg_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pkg_info.R 3 | \name{pkg_info} 4 | \alias{pkg_info} 5 | \title{Get the package version info} 6 | \usage{ 7 | pkg_info(pkg = "prospectr") 8 | } 9 | \arguments{ 10 | \item{pkg}{the package name i.e "prospectr"} 11 | } 12 | \description{ 13 | returns package info. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /tests/testthat/test-continuumRemoval.R: -------------------------------------------------------------------------------- 1 | context("test-continuumRemoval") 2 | 3 | test_that("continuumRemoval works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_continuumRemoval <- continuumRemoval(NIRsoil$spc, 7 | wav = as.numeric(colnames(NIRsoil$spc)) 8 | ) 9 | 10 | expect_is(X_continuumRemoval, "matrix") 11 | expect_true(round(min(X_continuumRemoval[1, ]), 5) == 0.80512) 12 | }) 13 | -------------------------------------------------------------------------------- /man/get_nircal_comments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_comments} 4 | \alias{get_nircal_comments} 5 | \title{get the comments of the spectra in the nircal file} 6 | \usage{ 7 | get_nircal_comments(connection, metanumbers, begin_s, comment_s, comment_f, n) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/get_nircal_description.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_description} 4 | \alias{get_nircal_description} 5 | \title{get the description of the spectra in the nircal file} 6 | \usage{ 7 | get_nircal_description(x, begin_s, spcinfo, comment_s, comment_f, n) 8 | } 9 | \description{ 10 | internal 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-baseline.R: -------------------------------------------------------------------------------- 1 | context("test-baseline") 2 | 3 | test_that("baseline works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_baselined <- baseline(round(NIRsoil$spc, 6), 7 | wav = as.numeric(colnames(NIRsoil$spc)) 8 | ) 9 | 10 | expect_is(X_baselined, "matrix") 11 | expect_is(attr(X_baselined, "baselines"), "matrix") 12 | expect_true(round(mean(X_baselined), 6) == 0.005746) 13 | }) 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sample .travis.yml for R projects 2 | 3 | language: r 4 | warnings_are_errors: true 5 | sudo: required 6 | 7 | env: 8 | global: 9 | - NOT_CRAN = true 10 | before_install: echo "options(repos = c(CRAN='http://cran.rstudio.com'))" > ~/.Rprofile 11 | 12 | notifications: 13 | email: 14 | on_success: change 15 | on_failure: change 16 | 17 | r_packages: 18 | - Rcpp 19 | - RcppArmadillo 20 | - foreach 21 | - iterators 22 | -------------------------------------------------------------------------------- /tests/testthat/test-resample2.R: -------------------------------------------------------------------------------- 1 | context("test-resample2") 2 | 3 | test_that("resample2 works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | new_wav <- c(1650, 2165, 2205, 2260, 2330, 2395) 7 | fwhm <- c(100, 40, 40, 50, 70, 70) 8 | X_resample <- resample2( 9 | NIRsoil$spc, as.numeric(colnames(NIRsoil$spc)), 10 | new_wav, fwhm 11 | ) 12 | 13 | expect_is(X_resample, "matrix") 14 | 15 | expect_true(round(max(abs(X_resample[1, ])), 5) == 0.34966) 16 | }) 17 | -------------------------------------------------------------------------------- /man/get_msc_coeff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{get_msc_coeff} 4 | \alias{get_msc_coeff} 5 | \title{get_msc_coeff} 6 | \usage{ 7 | get_msc_coeff(X, ref_spectrum) 8 | } 9 | \arguments{ 10 | \item{X}{matrix} 11 | 12 | \item{ref_spectrum}{a matrix of one row and same columns as in X} 13 | } 14 | \description{ 15 | Coefficients for multiplicative Scatter Correction written in C++ 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /tests/testthat/test-msc.R: -------------------------------------------------------------------------------- 1 | context("test-msc") 2 | 3 | test_that("msc", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_msc <- msc(NIRsoil$spc) 7 | 8 | expect_is(X_msc, "matrix") 9 | expect_true(round(max(X_msc[1, ]), 5) == 0.37394) 10 | expect_true(round(min(X_msc[1, ]), 5) == 0.29474) 11 | 12 | X_mscb <- msc(NIRsoil$spc, apply(NIRsoil$spc, 2, median)) 13 | expect_true(round(max(X_mscb[1, ]), 5) == 0.34816) 14 | expect_true(round(min(X_mscb[1, ]), 5) == 0.26749) 15 | }) 16 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) 2 | 3 | PKG_CPPFLAGS = -I../inst/include -I. 4 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 5 | 6 | ## WARNING: although the following lines strip symbols to make the installation 7 | ## smaller, CRAN policies do not allow this 8 | #strippedLib: $(SHLIB) 9 | # if test -e "/usr/bin/strip" & test -e "/bin/uname" & [[ `uname` == "Linux" ]]; then /usr/bin/strip --strip-debug $(SHLIB); fi 10 | #.phony: strippedLib -------------------------------------------------------------------------------- /man/get_nircal_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{get_nircal_metadata} 4 | \alias{get_nircal_metadata} 5 | \title{get the metadata of the samples in the nircal file} 6 | \usage{ 7 | get_nircal_metadata( 8 | connection, 9 | n, 10 | spctra_start, 11 | spcinfo, 12 | progress, 13 | pb, 14 | progress.start, 15 | progress.steps 16 | ) 17 | } 18 | \description{ 19 | internal 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /tests/testthat/test-honigs.R: -------------------------------------------------------------------------------- 1 | context("test-honigs") 2 | 3 | test_that("honigs works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_honigs <- honigs(NIRsoil$spc, k = 30, type = "A") 7 | X_honigs$model 8 | 9 | expect_is(X_honigs, "list") 10 | 11 | sel_samples <- c( 12 | 410, 619, 386, 141, 618, 687, 377, 37, 789, 592, 431, 402, 13 | 572, 333, 186, 519, 710, 610, 342, 282, 330, 290, 614, 279, 14 | 315, 284, 248, 147, 749, 823 15 | ) 16 | expect_true(!any(!sel_samples == X_honigs$model)) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-shenkWest.R: -------------------------------------------------------------------------------- 1 | context("test-shenkWest") 2 | 3 | test_that("shenkWest works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_shenkWest <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = FALSE) 7 | X_shenkWest$model 8 | 9 | expect_is(X_shenkWest, "list") 10 | 11 | sel_samples <- c( 12 | 112, 200, 309, 690, 297, 452, 345, 608, 225, 595, 63, 732, 13 | 824, 126, 706, 294, 313, 528, 154, 585, 612, 617 14 | ) 15 | expect_true(!any(!sel_samples == X_shenkWest$model)) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-blockScale.R: -------------------------------------------------------------------------------- 1 | context("test-blockScale") 2 | 3 | test_that("blockScale works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_blockScale <- blockScale(NIRsoil$spc, type = "hard", sigma2 = 1) 7 | X_blockScale_soft <- blockScale(NIRsoil$spc, type = "soft", sigma2 = 1) 8 | 9 | expect_is(X_blockScale, "list") 10 | expect_is(X_blockScale_soft, "list") 11 | expect_true(round(max(X_blockScale$Xscaled[1, ]), 5) == 0.15734) 12 | expect_true(round(max(X_blockScale_soft$Xscaled[1, ]), 5) == 0.80929) 13 | }) 14 | -------------------------------------------------------------------------------- /tests/testthat/test-duplex.R: -------------------------------------------------------------------------------- 1 | context("test-duplex") 2 | 3 | test_that("duplex works", { 4 | nirdata <- data("NIRsoil") 5 | 6 | X_duplex <- duplex(NIRsoil$spc, k = 30, metric = "mahal", pc = .99) 7 | X_duplex$model 8 | 9 | expect_is(X_duplex, "list") 10 | 11 | sel_samples <- c( 12 | 789, 619, 39, 617, 594, 310, 737, 822, 683, 614, 204, 287, 13 | 825, 218, 701, 268, 717, 350, 615, 702, 687, 653, 186, 479, 14 | 421, 282, 178, 728, 254, 613 15 | ) 16 | expect_true(!any(!sel_samples == X_duplex$model)) 17 | }) 18 | -------------------------------------------------------------------------------- /tests/testthat/test-naes.R: -------------------------------------------------------------------------------- 1 | context("test-naes") 2 | 3 | test_that("naes works", { 4 | nirdata <- data("NIRsoil") 5 | skip_on_cran() 6 | skip_on_travis() 7 | 8 | set.seed(140920) 9 | X_naes <- naes(NIRsoil$spc, k = 30) 10 | X_naes$model 11 | 12 | expect_is(X_naes, "list") 13 | 14 | sel_samples <- c( 15 | 7, 715, 380, 215, 483, 588, 530, 211, 253, 389, 57, 540, 166, 16 | 30, 338, 370, 654, 620, 551, 770, 667, 4, 629, 439, 486, 687, 17 | 10, 149, 797, 649 18 | ) 19 | expect_true(!any(!sel_samples == X_naes$model)) 20 | }) 21 | -------------------------------------------------------------------------------- /R/Cul.R: -------------------------------------------------------------------------------- 1 | #' @title Cochran C critical value 2 | #' 3 | #' @description 4 | #' Upper limit critical value Cul for one-sided test on balanced design 5 | #' @usage 6 | #' Cul(a,n,N) 7 | #' @param a significance level. 8 | #' @param n number of points per series. 9 | #' @param N number of data series. 10 | #' @keywords internal 11 | #' @author Antoine Stevens 12 | #' @references 13 | #' 14 | Cul <- function(a, n, N) { 15 | Fc <- qf(a / N, n - 1, (n - 1) * (N - 1), lower.tail = FALSE) 16 | value <- 1 / (1 + ((N - 1) / Fc)) 17 | return(value) 18 | } 19 | -------------------------------------------------------------------------------- /man/Cul.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Cul.R 3 | \name{Cul} 4 | \alias{Cul} 5 | \title{Cochran C critical value} 6 | \usage{ 7 | Cul(a,n,N) 8 | } 9 | \arguments{ 10 | \item{a}{significance level.} 11 | 12 | \item{n}{number of points per series.} 13 | 14 | \item{N}{number of data series.} 15 | } 16 | \description{ 17 | Upper limit critical value Cul for one-sided test on balanced design 18 | } 19 | \references{ 20 | \url{https://en.wikipedia.org/wiki/Cochran's_C_test} 21 | } 22 | \author{ 23 | Antoine Stevens 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/fastDist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{fastDist} 4 | \alias{fastDist} 5 | \title{A fast distance algorithm for two matrices written in C++} 6 | \usage{ 7 | fastDist(X,Y,method) 8 | } 9 | \arguments{ 10 | \item{X}{a \code{matrix}} 11 | 12 | \item{Y}{a \code{matrix}} 13 | 14 | \item{method}{a \code{string} with possible values "euclid", "cor", "cosine"} 15 | } 16 | \value{ 17 | a distance \code{matrix} 18 | } 19 | \description{ 20 | A fast distance algorithm for two matrices written in C++ 21 | } 22 | \author{ 23 | Antoine Stevens and Leonardo Ramirez-Lopez 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/resample_fwhm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{resample_fwhm} 4 | \alias{resample_fwhm} 5 | \title{Resample to given band position and fwhm} 6 | \usage{ 7 | resample_fwhm(X, wav, new_wav, fwhm) 8 | } 9 | \arguments{ 10 | \item{X}{matrix to resample} 11 | 12 | \item{wav}{a numeric \code{vector} giving the original band positions} 13 | 14 | \item{new_wav}{a numeric \code{vector} giving the new band positions} 15 | 16 | \item{fwhm}{numeric \code{vector} giving the full width half maximums of the new band positions.} 17 | } 18 | \description{ 19 | Resample, written in C++ 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/fastDistV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{fastDistV} 4 | \alias{fastDistV} 5 | \title{A fast distance algorithm for a matrix and a vector written in C++} 6 | \usage{ 7 | fastDistV(X,Y,method) 8 | } 9 | \arguments{ 10 | \item{X}{a \code{matrix}} 11 | 12 | \item{Y}{a \code{vector}} 13 | 14 | \item{method}{a \code{string} with possible values "euclid", "cor", "cosine"} 15 | } 16 | \value{ 17 | a distance \code{vector} 18 | } 19 | \description{ 20 | A fast distance algorithm for a matrix and a vector written in C++ 21 | } 22 | \author{ 23 | Antoine Stevens and Leonardo Ramirez-Lopez 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite package 'prospectr' in publications use:") 2 | 3 | year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date, perl = TRUE) 4 | vers <- paste("R package version", meta$Version) 5 | 6 | bibentry(bibtype = "Manual", 7 | title = "An introduction to the prospectr package", 8 | author = c(as.person("Antoine Stevens"), 9 | as.person("Leornardo Ramirez-Lopez")), 10 | publication = "R package Vignette", 11 | year = year, 12 | note = vers, 13 | 14 | textVersion = 15 | paste("Antoine Stevens and Leornardo Ramirez-Lopez (", 16 | year, 17 | "). An introduction to the prospectr package. R package Vignette ", 18 | vers, ".", sep="")) 19 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | # deprecated: PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(SHLIB_OPENMP_CXXFLAGS) 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS) 3 | 4 | PKG_CPPFLAGS = -I../inst/include -I. 5 | 6 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 7 | 8 | 9 | #F77 = $(GCCBASE)/bin/gfortran 10 | #FC = $F77 11 | #FLIBS = -L$(GCCBASE)/bin/gfortran 12 | 13 | ## WARNING: although the following lines strip symbols to make the installation 14 | ## smaller, CRAN policies do not allow this 15 | #strippedLib: $(SHLIB) 16 | # if test -e "/usr/bin/strip" & test -e "/bin/uname" & [[ `uname` == "Linux" ]]; then /usr/bin/strip --strip-debug $(SHLIB); fi 17 | #.phony: strippedLib -------------------------------------------------------------------------------- /R/AAA.R: -------------------------------------------------------------------------------- 1 | # .PROSPECTR_CACHE <- new.env(FALSE, parent = globalenv()) 2 | 3 | .onAttach <- function(lib, pkg) { 4 | # assign("gpclib", FALSE, envir=.prospectr_CACHE) 5 | # prospectr_v <- read.dcf( 6 | # file = system.file("DESCRIPTION", package = pkg), 7 | # fields = "Version" 8 | # ) 9 | pkg_v <- pkg_info() 10 | 11 | mss <- paste0( 12 | "\033[34m", 13 | pkg, " version ", 14 | paste(pkg_v[1:2], collapse = " \U002D\U002D "), 15 | "\033[39m" 16 | ) 17 | mss2 <- paste0( 18 | "\033[34mcheck the package repository at: ", 19 | pkg_v[, "URL"], 20 | "\033[39m" 21 | ) 22 | packageStartupMessage(mss) 23 | packageStartupMessage(mss2) 24 | } 25 | 26 | # .onUnload <- function(libpath) { 27 | # rm(.prospectr_CACHE) 28 | # } 29 | -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclestablestable -------------------------------------------------------------------------------- /man/figures/lifecycle-defunct.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledefunctdefunct -------------------------------------------------------------------------------- /man/figures/lifecycle-archived.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclearchivedarchived -------------------------------------------------------------------------------- /man/figures/lifecycle-maturing.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclematuringmaturing -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledeprecateddeprecated -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclesupersededsuperseded -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycleexperimentalexperimental -------------------------------------------------------------------------------- /man/figures/lifecycle-questioning.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclequestioningquestioning -------------------------------------------------------------------------------- /man/figures/lifecycle-soft-deprecated.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclesoft-deprecatedsoft-deprecated -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(baseline) 4 | export(binning) 5 | export(blockNorm) 6 | export(blockScale) 7 | export(cochranTest) 8 | export(continuumRemoval) 9 | export(detrend) 10 | export(duplex) 11 | export(gapDer) 12 | export(honigs) 13 | export(kenStone) 14 | export(movav) 15 | export(msc) 16 | export(naes) 17 | export(puchwein) 18 | export(readASD) 19 | export(read_nircal) 20 | export(resample) 21 | export(resample2) 22 | export(savitzkyGolay) 23 | export(shenkWest) 24 | export(spliceCorrection) 25 | export(standardNormalVariate) 26 | import(Rcpp) 27 | import(foreach) 28 | import(iterators) 29 | import(lifecycle) 30 | import(mathjaxr) 31 | importFrom(grDevices,chull) 32 | importFrom(graphics,legend) 33 | importFrom(graphics,matplot) 34 | importFrom(lifecycle,deprecate_soft) 35 | importFrom(stats,aggregate) 36 | importFrom(stats,approx) 37 | importFrom(stats,cov) 38 | importFrom(stats,kmeans) 39 | importFrom(stats,lm) 40 | importFrom(stats,prcomp) 41 | importFrom(stats,qf) 42 | importFrom(stats,sd) 43 | importFrom(stats,splinefun) 44 | importFrom(stats,var) 45 | importFrom(utils,download.file) 46 | importFrom(utils,read.table) 47 | importFrom(utils,setTxtProgressBar) 48 | importFrom(utils,txtProgressBar) 49 | useDynLib(prospectr) 50 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | ps: Bootstrap 12 | 13 | cache: 14 | - C:\RLibrary 15 | 16 | environment: 17 | NOT_CRAN: true 18 | # env vars that may need to be set, at least temporarily, from time to time 19 | # see https://github.com/krlmlr/r-appveyor#readme for details 20 | # USE_RTOOLS: true 21 | # R_REMOTES_STANDALONE: true 22 | 23 | # Adapt as necessary starting from here 24 | 25 | build_script: 26 | - travis-tool.sh install_deps 27 | 28 | test_script: 29 | - travis-tool.sh run_tests 30 | 31 | on_failure: 32 | - 7z a failure.zip *.Rcheck\* 33 | - appveyor PushArtifact failure.zip 34 | 35 | artifacts: 36 | - path: '*.Rcheck\**\*.log' 37 | name: Logs 38 | 39 | - path: '*.Rcheck\**\*.out' 40 | name: Logs 41 | 42 | - path: '*.Rcheck\**\*.fail' 43 | name: Logs 44 | 45 | - path: '*.Rcheck\**\*.Rout' 46 | name: Logs 47 | 48 | - path: '\*_*.tar.gz' 49 | name: Bits 50 | 51 | - path: '\*_*.zip' 52 | name: Bits 53 | -------------------------------------------------------------------------------- /man/movav.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/movav.R 3 | \name{movav} 4 | \alias{movav} 5 | \title{Moving average} 6 | \usage{ 7 | movav(X, w) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | 13 | \item{w}{filter length.} 14 | } 15 | \value{ 16 | a matrix or vector with the filtered signal(s) 17 | } 18 | \description{ 19 | A simple moving average of a matrix or vector using a convolution 20 | function written in C++/Rcpp for fast computing 21 | } 22 | \examples{ 23 | data(NIRsoil) 24 | wav <- as.numeric(colnames(NIRsoil$spc)) 25 | # adding some noise 26 | NIRsoil$spc_noise <- NIRsoil$spc + rnorm(length(NIRsoil$spc), 0, 0.001) 27 | matplot(wav, 28 | t(NIRsoil$spc_noise[1:10, ]), 29 | type = "l", 30 | lty = 1, 31 | xlab = "Wavelength /nm", 32 | ylab = "Absorbance", 33 | col = "grey" 34 | ) 35 | 36 | # window size of 11 bands 37 | NIRsoil$spc_mov <- movav(NIRsoil$spc_noise, w = 15) 38 | # smoothed data 39 | matlines(as.numeric(colnames(NIRsoil$spc_mov)), 40 | t(NIRsoil$spc_mov[1:10, ]), 41 | type = "l", 42 | lty = 1 43 | ) 44 | } 45 | \seealso{ 46 | \code{\link{savitzkyGolay}}, \code{\link{gapDer}}, 47 | \code{\link{binning}}, \code{\link{continuumRemoval}} 48 | } 49 | \author{ 50 | Antoine Stevens 51 | } 52 | -------------------------------------------------------------------------------- /man/resample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resample.R 3 | \name{resample} 4 | \alias{resample} 5 | \title{Resample spectral data} 6 | \usage{ 7 | resample(X, wav, new.wav, interpol = "spline", ...) 8 | } 9 | \arguments{ 10 | \item{X}{numeric matrix or vector to resample (optionally a data frame that 11 | can be coerced to a numerical matrix).} 12 | 13 | \item{wav}{a numeric vector giving the original band positions.} 14 | 15 | \item{new.wav}{a numeric vector giving the new band positions.} 16 | 17 | \item{interpol}{the interpolation method: 'linear' or 'spline' (default).} 18 | 19 | \item{...}{additional arguments to be passed to the \code{\link{splinefun}} 20 | function when \code{interpol = 'spline'}.} 21 | } 22 | \value{ 23 | a matrix or vector with resampled values. 24 | } 25 | \description{ 26 | Resample a data matrix or vector to new coordinates (e.g. 27 | band positions) using spline or linear interpolation. This function is a 28 | simple wrapper around \code{\link{approx}} and \code{\link{splinefun}} in 29 | \pkg{base}. 30 | } 31 | \examples{ 32 | data(NIRsoil) 33 | wav <- as.numeric(colnames(NIRsoil$spc)) 34 | # increase spectral resolution by 2 35 | NIRsoil$spc_resampled <- resample(NIRsoil$spc, wav, seq(1100, 2498, 2)) 36 | dim(NIRsoil$spc) 37 | dim(NIRsoil$spc_resampled) 38 | } 39 | \seealso{ 40 | \code{\link{resample2}} 41 | } 42 | \author{ 43 | Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 44 | } 45 | -------------------------------------------------------------------------------- /man/blockNorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/blockNorm.R 3 | \name{blockNorm} 4 | \alias{blockNorm} 5 | \title{Sum of squares block weighting} 6 | \usage{ 7 | blockNorm(X, targetnorm = 1) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix to transform (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | 13 | \item{targetnorm}{desired sum of squares for a block of variables 14 | (default = 1)} 15 | } 16 | \value{ 17 | a list with components \code{Xscaled}, the scaled matrix and \code{f}, the 18 | scaling factor 19 | } 20 | \description{ 21 | Sum of squares block weighting: allows to scale blocks of variables, 22 | but keeping the relative weights of the variables inside a block. 23 | } 24 | \details{ 25 | The function computes a scaling factor, which, multiplied by the 26 | input matrix, 27 | produces a matrix with a pre--determined sum of squares. 28 | } 29 | \note{ 30 | This is a \R port of the \file{MBnorm.m} function of the MB matlab toolbox 31 | by Fran van den Berg. 32 | } 33 | \examples{ 34 | X <- matrix(rnorm(100), ncol = 10) 35 | # Block normalize to sum of square equals to 1 36 | res <- blockNorm(X, targetnorm = 1) 37 | sum(res$Xscaled^2) # check 38 | } 39 | \references{ 40 | Eriksson, L., Johansson, E., Kettaneh, N., Trygg, J., 41 | Wikstrom, C., and Wold, S., 2006. Multi- and Megavariate Data Analysis. 42 | MKS Umetrics AB. 43 | } 44 | \seealso{ 45 | \code{\link{blockScale}}, \code{\link{standardNormalVariate}}, 46 | \code{\link{detrend}} 47 | } 48 | \author{ 49 | Antoine Stevens 50 | } 51 | -------------------------------------------------------------------------------- /tests/testthat/test-kenStone.R: -------------------------------------------------------------------------------- 1 | context("test-kenStone") 2 | 3 | test_that("kenStone works", { 4 | data("NIRsoil") 5 | 6 | X_kenStone <- kenStone(NIRsoil$spc, k = 50, metric = "mahal", pc = 3) 7 | X_kenStone$model 8 | 9 | expect_is(X_kenStone, "list") 10 | 11 | sel_samples <- c( 12 | 687, 377, 410, 619, 87, 618, 611, 283, 317, 666, 789, 635, 13 | 147, 822, 285, 313, 737, 803, 819, 383, 823, 204, 591, 252, 14 | 825, 272, 402, 39, 330, 590, 286, 608, 363, 234, 701, 718, 15 | 287, 270, 571, 192, 614, 1, 386, 615, 126, 755, 734, 428, 16 | 466, 426 17 | ) 18 | expect_true(!any(!sel_samples == X_kenStone$model)) 19 | 20 | X_kenStone_b <- kenStone( 21 | NIRsoil$spc, 22 | k = 50, 23 | metric = "mahal", 24 | pc = 3, 25 | init = X_kenStone$model[1:10] 26 | ) 27 | 28 | expect_true(!any(!sel_samples == X_kenStone_b$model)) 29 | }) 30 | 31 | test_that("kenStone with Mahalanobis on 1 single variable", { 32 | data("NIRsoil") 33 | expect_true(is.list(kenStone(NIRsoil$spc, k = 3, metric = "mahal", pc = 1))) 34 | 35 | }) 36 | 37 | 38 | test_that("kenStone works with groups", { 39 | data("NIRsoil") 40 | x <- NIRsoil$spc 41 | n_per_group <- 5 42 | my_groups <- rep(1:floor(nrow(x) / n_per_group), each = n_per_group) 43 | if (length(my_groups) != nrow(x)) { 44 | my_groups <- c(rep(nrow(x), nrow(x) - length(my_groups)), my_groups) 45 | } 46 | 47 | X_kenStone <- kenStone(x, 48 | k = 30, 49 | pc = 2, 50 | group = as.factor(my_groups) 51 | ) 52 | 53 | expect_true(all(diff(X_kenStone$model[1:n_per_group]) == 1)) 54 | }) 55 | -------------------------------------------------------------------------------- /man/e2m.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/e2m.R 3 | \name{e2m} 4 | \alias{e2m} 5 | \title{A function for transforming a matrix from its Euclidean space to its Mahalanobis space} 6 | \usage{ 7 | e2m(X, sm.method = c("svd", "eigen")) 8 | } 9 | \description{ 10 | A function for transforming a matrix from its Euclidean space to its Mahalanobis space 11 | } 12 | \examples{ 13 | # test data 14 | \dontrun{ 15 | X <- matrix(rnorm(500), ncol = 5) 16 | # Normal way to compute the Mahalanobis distance 17 | md1 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) 18 | # Projection approach for computing the Mahalanobis distance 19 | # 1. Projecting from the Euclidean to the Mahalanobis space 20 | Xm <- e2m(X, sm.method = "svd") 21 | # 2. Use the normal Euclidean distance on the Mahalanobis space 22 | md2 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 23 | # Plot the results of both methods 24 | plot(md1, md2) 25 | # Test on a real dataset 26 | # Mahalanobis in the spectral space 27 | data(NIRsoil) 28 | X <- NIRsoil$spc 29 | Xm <- e2m(X, sm.method = "svd") 30 | md2 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 31 | 32 | md1 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) # does not work 33 | # Mahalanobis in the PC space 34 | pc <- 20 35 | pca <- prcomp(X, center = TRUE, scale = TRUE) 36 | X <- pca$x[, 1:pc] 37 | X2 <- sweep(pca$x[, 1:pc, drop = FALSE], 2, pca$sdev[1:pc], "/") 38 | md4 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 39 | md5 <- sqrt(rowSums((sweep(X2, 2, colMeans(X2), "-"))^2)) 40 | md3 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) # does work 41 | } 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: prospectr 2 | Type: Package 3 | Title: Miscellaneous Functions for Processing 4 | and Sample Selection of Spectroscopic Data 5 | Version: 0.2.9 6 | Date: 2025-03-17 7 | Authors@R: c( 8 | person(given = "Antoine", 9 | family = "Stevens", 10 | role = c("aut"), 11 | comment = c(ORCID = "0000-0002-1588-7519")), 12 | person(given = "Leonardo", 13 | family = "Ramirez-Lopez", 14 | role = c("aut", "cre"), 15 | email = "ramirez.lopez.leo@gmail.com", 16 | comment = c(ORCID = "0000-0002-5369-5120")) 17 | ) 18 | Author: 19 | Antoine Stevens [aut] (), 20 | Leonardo Ramirez-Lopez [aut, cre] () 21 | Maintainer: 22 | Leonardo Ramirez-Lopez 23 | BugReports: 24 | https://github.com/l-ramirez-lopez/prospectr/issues 25 | Description: Functions to preprocess spectroscopic data 26 | and conduct (representative) sample selection/calibration sampling. 27 | License: MIT + file LICENSE 28 | URL: https://github.com/l-ramirez-lopez/prospectr 29 | VignetteBuilder: knitr 30 | Suggests: 31 | knitr, 32 | rmarkdown, 33 | formatR, 34 | testthat, 35 | bookdown 36 | LinkingTo: 37 | Rcpp, 38 | RcppArmadillo 39 | Depends: 40 | R (>= 3.5.0), 41 | Imports: 42 | foreach, 43 | iterators, 44 | Rcpp (>= 1.0.1), 45 | mathjaxr (>= 1.0), 46 | lifecycle (>= 0.2.0) 47 | RdMacros: mathjaxr 48 | RoxygenNote: 7.3.2 49 | NeedsCompilation: yes 50 | LazyData: true 51 | LazyDataCompression: xz 52 | Repository: CRAN 53 | Encoding: UTF-8 54 | Roxygen: list(markdown = TRUE) 55 | Config/VersionName: hallo-galo 56 | -------------------------------------------------------------------------------- /man/blockScale.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/blockScale.R 3 | \name{blockScale} 4 | \alias{blockScale} 5 | \title{Hard or soft block scaling} 6 | \usage{ 7 | blockScale(X, type = 'hard', sigma2 = 1) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that 11 | can be coerced to a numerical matrix).} 12 | 13 | \item{type}{the type of block scaling: 'hard' or 'soft'.} 14 | 15 | \item{sigma2}{the desired total variance of a block (ie sum of the variances 16 | of all variables, default = 1), applicable when \code{type = 'hard'}.} 17 | } 18 | \value{ 19 | a \code{list} with \code{Xscaled}, the scaled matrix and \code{f}, the scaling 20 | factor. 21 | } 22 | \description{ 23 | Hard or soft block scaling of a spectral matrix to constant group variance. 24 | In multivariate calibration, block scaling is used to down-weight variables, 25 | when one block of variables dominates other blocks. 26 | With hard block scaling, the variables in a block are scaled so that the sum 27 | of their variances equals 1. When soft block scaling is used, the variables 28 | are scaled such that the sum of variable variances is equal to the square 29 | root of the number of variables in a particular block. 30 | } 31 | \examples{ 32 | X <- matrix(rnorm(100), ncol = 10) 33 | # Hard block scaling 34 | res <- blockScale(X) 35 | # sum of column variances == 1 36 | apply(res$Xscaled, 2, var) 37 | } 38 | \references{ 39 | Eriksson, L., Johansson, E., Kettaneh, N., Trygg, J., 40 | Wikstrom, C., and Wold, S., 2006. Multi- and Megavariate Data Analysis. 41 | MKS Umetrics AB. 42 | } 43 | \seealso{ 44 | \code{\link{blockNorm}}, \code{\link{standardNormalVariate}}, 45 | \code{\link{detrend}} 46 | } 47 | \author{ 48 | Antoine Stevens 49 | } 50 | -------------------------------------------------------------------------------- /R/NIRsoil.R: -------------------------------------------------------------------------------- 1 | #' @docType data 2 | #' @name NIRsoil 3 | #' @aliases NIRsoil 4 | #' @title NIRSoil 5 | #' @format A `data.frame` of 825 observations and 5 variables (where the spectral 6 | #' data is embedded in one variable `NIRSoil$spc`). 7 | #' @usage 8 | #' data(NIRsoil) 9 | #' @description 10 | #' Soil spectral library of the \sQuote{Chimiometrie 2006} challenge. 11 | #' The database contains absorbance spectra of dried and sieved soil samples 12 | #' measured between 1100 nm and 2498 nm at 2 nm interval. The soil samples come 13 | #' from agricultural fields collected from all over the Walloon region in Belgium. 14 | #' Three parameters are associated with the spectral library: Nt (Total Nitrogen 15 | #' in g/Kg of dry soil), CEC (Cation Exchange Capacity in meq/100 g of dry soil) 16 | #' and Ciso (Carbon in g/100 g of dry soil). Carbon content has been measured 17 | #' following the ISO14235 method. 18 | #' @details 19 | #' The dataset includes 618 training and 207 test samples with 5 variables: 20 | #' \itemize{ 21 | #' \item{Nt (Total Nitrogen).} 22 | #' \item{Ciso (Carbon).} 23 | #' \item{CEC (Cation Exchange Capacity).} 24 | #' \item{train (binary vector indicating training (1) and validation (0) samples).} 25 | #' \item{and spc (a matrix of spectral NIR absorbance values, where the band/wavelength positions are stored as `colnames`).} 26 | #' } 27 | #' Nt, Ciso and CEC have respectively 22 \%, 11 \% and 46 \% of the observations 28 | #' with missing values. 29 | #' @source Pierre Dardenne from Walloon Agricultural Research Centre, Belgium. 30 | #' @references 31 | #' Fernandez Pierna, J.A., and Dardenne, P., 2008. Soil parameter quantification 32 | #' by NIRS as a Chemometric challenge at 'Chimiometrie 2006'. Chemometrics and 33 | #' Intelligent Laboratory Systems 91, 94-98. 34 | #' @keywords datasets 35 | NULL 36 | -------------------------------------------------------------------------------- /man/standardNormalVariate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardNormalVariate.R 3 | \name{standardNormalVariate} 4 | \alias{standardNormalVariate} 5 | \title{Standard normal variate transformation} 6 | \usage{ 7 | standardNormalVariate(X) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix of spectral data (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | } 13 | \value{ 14 | a matrix of normalized spectral data. 15 | } 16 | \description{ 17 | \loadmathjax 18 | This function normalizes each row of an input matrix by 19 | subtracting each row by its mean and dividing it by its standard deviation 20 | } 21 | \details{ 22 | SNV is simple way for normalizing spectral data that intends to correct for 23 | light scatter. 24 | It operates row-wise: 25 | 26 | \mjdeqn{SNV_i = \frac{x_i - \bar{x}_i}{s_i}}{SNV_i = \frac{x_i - \bar{x}_i}{s_i}} 27 | 28 | where \mjeqn{x_i}{x_i} is the signal of the \mjeqn{i}{i}th observation, 29 | \mjeqn{\bar{x}_i}{\bar{x}_i} is its mean and \mjeqn{s_i}{s_i} its standard 30 | deviation. 31 | } 32 | \examples{ 33 | data(NIRsoil) 34 | NIRsoil$spc_snv <- standardNormalVariate(X = NIRsoil$spc) 35 | # 10 first snv spectra 36 | matplot( 37 | x = as.numeric(colnames(NIRsoil$spc_snv)), 38 | y = t(NIRsoil$spc_snv[1:10, ]), 39 | type = "l", 40 | xlab = "wavelength, nm", 41 | ylab = "snv" 42 | ) 43 | \dontrun{ 44 | apply(NIRsoil$spc_snv, 1, sd) # check 45 | } 46 | 47 | } 48 | \references{ 49 | Barnes RJ, Dhanoa MS, Lister SJ. 1989. Standard normal variate 50 | transformation and de-trending of near-infrared diffuse reflectance spectra. 51 | Applied spectroscopy, 43(5): 772-777. 52 | } 53 | \seealso{ 54 | \code{\link{msc}}, \code{\link{detrend}}, \code{\link{blockScale}}, 55 | \code{\link{blockNorm}} 56 | } 57 | \author{ 58 | Antoine Stevens 59 | } 60 | -------------------------------------------------------------------------------- /man/NIRsoil.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/NIRsoil.R 3 | \docType{data} 4 | \name{NIRsoil} 5 | \alias{NIRsoil} 6 | \title{NIRSoil} 7 | \format{ 8 | A \code{data.frame} of 825 observations and 5 variables (where the spectral 9 | data is embedded in one variable \code{NIRSoil$spc}). 10 | } 11 | \source{ 12 | Pierre Dardenne from Walloon Agricultural Research Centre, Belgium. 13 | } 14 | \usage{ 15 | data(NIRsoil) 16 | } 17 | \description{ 18 | Soil spectral library of the \sQuote{Chimiometrie 2006} challenge. 19 | The database contains absorbance spectra of dried and sieved soil samples 20 | measured between 1100 nm and 2498 nm at 2 nm interval. The soil samples come 21 | from agricultural fields collected from all over the Walloon region in Belgium. 22 | Three parameters are associated with the spectral library: Nt (Total Nitrogen 23 | in g/Kg of dry soil), CEC (Cation Exchange Capacity in meq/100 g of dry soil) 24 | and Ciso (Carbon in g/100 g of dry soil). Carbon content has been measured 25 | following the ISO14235 method. 26 | } 27 | \details{ 28 | The dataset includes 618 training and 207 test samples with 5 variables: 29 | \itemize{ 30 | \item{Nt (Total Nitrogen).} 31 | \item{Ciso (Carbon).} 32 | \item{CEC (Cation Exchange Capacity).} 33 | \item{train (binary vector indicating training (1) and validation (0) samples).} 34 | \item{and spc (a matrix of spectral NIR absorbance values, where the band/wavelength positions are stored as \code{colnames}).} 35 | } 36 | Nt, Ciso and CEC have respectively 22 \\%, 11 \\% and 46 \\% of the observations 37 | with missing values. 38 | } 39 | \references{ 40 | Fernandez Pierna, J.A., and Dardenne, P., 2008. Soil parameter quantification 41 | by NIRS as a Chemometric challenge at 'Chimiometrie 2006'. Chemometrics and 42 | Intelligent Laboratory Systems 91, 94-98. 43 | } 44 | \keyword{datasets} 45 | -------------------------------------------------------------------------------- /man/binning.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/binning.R 3 | \name{binning} 4 | \alias{binning} 5 | \title{Signal binning} 6 | \usage{ 7 | binning(X, bins, bin.size) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that 11 | can be coerced to a numerical matrix).} 12 | 13 | \item{bins}{the number of bins.} 14 | 15 | \item{bin.size}{the desired size of the bins.} 16 | } 17 | \value{ 18 | a matrix or vector with average values per bin. 19 | } 20 | \description{ 21 | Compute average values of a signal in pre-determined bins (col-wise subsets). 22 | The bin size can be determined either directly or by specifying the number of 23 | bins. Sometimes called boxcar transformation in signal processing 24 | } 25 | \examples{ 26 | data(NIRsoil) 27 | wav <- as.numeric(colnames(NIRsoil$spc)) 28 | 29 | # 5 first spectra 30 | matplot(wav, t(NIRsoil$spc[1:5, ]), 31 | type = "l", 32 | xlab = "Wavelength /nm", 33 | ylab = "Absorbance" 34 | ) 35 | 36 | NIRsoil$spc_binned <- binning(NIRsoil$spc, bin.size = 20) 37 | 38 | # bin means 39 | matpoints(as.numeric(colnames(NIRsoil$spc_binned)), 40 | t(NIRsoil$spc_binned[1:5, ]), 41 | pch = 1:5 42 | ) 43 | 44 | NIRsoil$spc_binned <- binning(NIRsoil$spc, bins = 20) 45 | dim(NIRsoil$spc_binned) # 20 bins 46 | 47 | # 5 first spectra 48 | matplot(wav, 49 | t(NIRsoil$spc[1:5, ]), 50 | type = "l", 51 | xlab = "Wavelength /nm", 52 | ylab = "Absorbance" 53 | ) 54 | 55 | # bin means 56 | matpoints(as.numeric(colnames(NIRsoil$spc_binned)), 57 | t(NIRsoil$spc_binned[1:5, ]), 58 | pch = 1:5 59 | ) 60 | } 61 | \seealso{ 62 | \code{\link{savitzkyGolay}}, \code{\link{movav}}, 63 | \code{\link{gapDer}}, \code{\link{continuumRemoval}} 64 | } 65 | \author{ 66 | Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 67 | } 68 | -------------------------------------------------------------------------------- /man/spliceCorrection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spliceCorrection.R 3 | \name{spliceCorrection} 4 | \alias{spliceCorrection} 5 | \title{Splice correction of a spectral matrix acquired with an ASD spectrometer} 6 | \usage{ 7 | spliceCorrection(X, wav, splice = c(1000, 1830), interpol.bands = 10) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to transform (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | 13 | \item{wav}{a numeric vector with band positions.} 14 | 15 | \item{splice}{a numeric vector of length 1 or 2 with the positions of the 16 | splice(s). Default: 17 | \code{c(1000, 1830)} (as for the ASD FieldSpec Pro spectrometer of Malvern 18 | Panalytical). See details.} 19 | 20 | \item{interpol.bands}{the number of interpolation bands.} 21 | } 22 | \value{ 23 | a matrix with the splice corrected data. 24 | } 25 | \description{ 26 | Corrects steps in an input spectral matrix by linear interpolation of the 27 | values of the edges of the middle sensor 28 | } 29 | \details{ 30 | This function uses by default the positions for the ASD FieldSpec Pro 31 | spectroradiometer (Malvern Panalytical) which usually exhibit 32 | steps at the splice of the three built-in detectors, 33 | positioned at 1000 nm (end of VNIR detector) and 1830 nm (end of SWIR1 detector). 34 | The data corresponding to the spectral region after the first step is used as 35 | reference for correcting the first region and the laste region (if 2 steps 36 | are supplied). 37 | Other typical examples of splice artifacts caused by concatenating data 38 | captured by different detectors inside the spectrometer: 39 | \itemize{ 40 | \item{XDS (FOSS): 1100 nm} 41 | \item{ProxiMate (BUCHI Labortechnik): 900 nm} 42 | } 43 | } 44 | \author{ 45 | Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 46 | } 47 | -------------------------------------------------------------------------------- /R/standardNormalVariate.R: -------------------------------------------------------------------------------- 1 | #' @title Standard normal variate transformation 2 | #' 3 | #' @description 4 | #' \loadmathjax 5 | #' This function normalizes each row of an input matrix by 6 | #' subtracting each row by its mean and dividing it by its standard deviation 7 | #' @usage 8 | #' standardNormalVariate(X) 9 | #' @param X a numeric matrix of spectral data (optionally a data frame that can 10 | #' be coerced to a numerical matrix). 11 | #' @author Antoine Stevens 12 | #' @examples 13 | #' data(NIRsoil) 14 | #' NIRsoil$spc_snv <- standardNormalVariate(X = NIRsoil$spc) 15 | #' # 10 first snv spectra 16 | #' matplot( 17 | #' x = as.numeric(colnames(NIRsoil$spc_snv)), 18 | #' y = t(NIRsoil$spc_snv[1:10, ]), 19 | #' type = "l", 20 | #' xlab = "wavelength, nm", 21 | #' ylab = "snv" 22 | #' ) 23 | #' \dontrun{ 24 | #' apply(NIRsoil$spc_snv, 1, sd) # check 25 | #' } 26 | #' 27 | #' @return a matrix of normalized spectral data. 28 | #' @details 29 | #' SNV is simple way for normalizing spectral data that intends to correct for 30 | #' light scatter. 31 | #' It operates row-wise: 32 | #' 33 | #' \mjdeqn{SNV_i = \frac{x_i - \bar{x}_i}{s_i}}{SNV_i = \frac{x_i - \bar{x}_i}{s_i}} 34 | #' 35 | #' where \mjeqn{x_i}{x_i} is the signal of the \mjeqn{i}{i}th observation, 36 | #' \mjeqn{\bar{x}_i}{\bar{x}_i} is its mean and \mjeqn{s_i}{s_i} its standard 37 | #' deviation. 38 | #' @seealso \code{\link{msc}}, \code{\link{detrend}}, \code{\link{blockScale}}, 39 | #' \code{\link{blockNorm}} 40 | #' @references Barnes RJ, Dhanoa MS, Lister SJ. 1989. Standard normal variate 41 | #' transformation and de-trending of near-infrared diffuse reflectance spectra. 42 | #' Applied spectroscopy, 43(5): 772-777. 43 | #' @export 44 | #' 45 | standardNormalVariate <- function(X) { 46 | if (!any(class(X) %in% c("matrix", "data.frame"))) { 47 | stop("X must be a matrix or optionally a data.frame") 48 | } 49 | X <- sweep(X, 1, rowMeans(X, na.rm = TRUE), "-") 50 | X <- sweep(X, 1, apply(X, 1, sd, na.rm = TRUE), "/") 51 | as.matrix(X) 52 | } 53 | -------------------------------------------------------------------------------- /R/blockScale.R: -------------------------------------------------------------------------------- 1 | #' @title Hard or soft block scaling 2 | #' 3 | #' @description 4 | #' Hard or soft block scaling of a spectral matrix to constant group variance. 5 | #' In multivariate calibration, block scaling is used to down-weight variables, 6 | #' when one block of variables dominates other blocks. 7 | #' With hard block scaling, the variables in a block are scaled so that the sum 8 | #' of their variances equals 1. When soft block scaling is used, the variables 9 | #' are scaled such that the sum of variable variances is equal to the square 10 | #' root of the number of variables in a particular block. 11 | #' @usage 12 | #' blockScale(X, type = 'hard', sigma2 = 1) 13 | #' @param X a numeric matrix or vector to process (optionally a data frame that 14 | #' can be coerced to a numerical matrix). 15 | #' @param type the type of block scaling: 'hard' or 'soft'. 16 | #' @param sigma2 the desired total variance of a block (ie sum of the variances 17 | #' of all variables, default = 1), applicable when `type = 'hard'`. 18 | #' @return a `list` with `Xscaled`, the scaled matrix and `f`, the scaling 19 | #' factor. 20 | #' @author Antoine Stevens 21 | #' @examples 22 | #' X <- matrix(rnorm(100), ncol = 10) 23 | #' # Hard block scaling 24 | #' res <- blockScale(X) 25 | #' # sum of column variances == 1 26 | #' apply(res$Xscaled, 2, var) 27 | #' @seealso 28 | #' \code{\link{blockNorm}}, \code{\link{standardNormalVariate}}, 29 | #' \code{\link{detrend}} 30 | #' @references Eriksson, L., Johansson, E., Kettaneh, N., Trygg, J., 31 | #' Wikstrom, C., and Wold, S., 2006. Multi- and Megavariate Data Analysis. 32 | #' MKS Umetrics AB. 33 | #' @export 34 | #' 35 | blockScale <- function(X, type = "hard", sigma2 = 1) { 36 | if (!any(class(X) %in% c("matrix", "data.frame"))) { 37 | stop("X must be either a matrix or data.frame") 38 | } 39 | 40 | if (is.data.frame(X)) { 41 | X <- as.matrix(X) 42 | } 43 | 44 | w <- apply(X, 2, sd) 45 | if (type == "soft") { 46 | f <- w * (length(w)^0.25) 47 | } else { 48 | f <- w * (length(w)^0.5) / sigma2^0.5 49 | } 50 | list(Xscaled = t(t(X) / f), f = f) 51 | } 52 | -------------------------------------------------------------------------------- /man/read_nircal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_nircal.R 3 | \name{read_nircal} 4 | \alias{read_nircal} 5 | \title{Import BUCHI NIRCal files} 6 | \usage{ 7 | read_nircal(file, response = TRUE, spectra = TRUE, 8 | metadata = TRUE, progress = TRUE, verbose = TRUE) 9 | } 10 | \arguments{ 11 | \item{file}{the name of the NIRCal (.nir) file which the data are to be read 12 | from. For URLs a temporary file is first downloaded and is then read.} 13 | 14 | \item{response}{a logical indicating if the data of the response variables 15 | must be returned (default is \code{TRUE}).} 16 | 17 | \item{spectra}{a logical indicating if the spectral data must be returned 18 | (default is \code{TRUE}).} 19 | 20 | \item{metadata}{a logical indicating if the metadada must be returned 21 | (default is \code{TRUE}).} 22 | 23 | \item{progress}{a logical indicating if a progress bar must be printed 24 | (default is \code{TRUE}).} 25 | 26 | \item{verbose}{a logical indicating if the number of spectra and response 27 | variables (an also the ID's of the spectra without gain and/or temperature 28 | information) must be printed (default is \code{TRUE}).} 29 | } 30 | \value{ 31 | a data.frame containing the metadata, response variables (if 32 | \code{response = TRUE}) and spectra (if \code{spectra = TRUE}, embedded in the 33 | \code{data.frame} as a matrix named \code{...$spc}). 34 | } 35 | \description{ 36 | \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 37 | 38 | This function imports .nir files generated by BUCHI NIRCal software. 39 | } 40 | \details{ 41 | The extension of the BUCHI NIRCal files is .nir. These files are used to 42 | store spectra generated by BUCHI N-500 and BUCHI NIRMaster FT-NIR sensors. 43 | See 44 | \href{https://assets.buchi.com/image/upload/v1605790933/pdf/Technical-Datasheet/TDS_11593569_NIRCal.pdf}{NIRCal technical data sheet.} 45 | } 46 | \author{ 47 | \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 48 | } 49 | -------------------------------------------------------------------------------- /man/baseline.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/baseline.R 3 | \name{baseline} 4 | \alias{baseline} 5 | \title{baseline} 6 | \usage{ 7 | baseline(X, wav) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that 11 | can be coerced to a numerical matrix).} 12 | 13 | \item{wav}{optional. A numeric vector of band positions.} 14 | } 15 | \value{ 16 | a matrix or vector with the baselined spectra. The resulting matrix 17 | is output with an attribute called \code{baselines} which contain the spectra 18 | of the fitted baselines. 19 | 20 | This function is similar to \code{\link{continuumRemoval}} and it might 21 | replace some of its functionality in the future. 22 | } 23 | \description{ 24 | \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 25 | 26 | Fits a baseline to each spectrum in a matrix and removes it from the 27 | corresponding input spectrum. A vector can also be passed to this function. 28 | } 29 | \details{ 30 | The baseline function find points lying on the convex hull 31 | of a spectrum, connects the points by linear interpolation and 32 | subtracts the interpolated line (baseline) from the corresponding spectrum. 33 | } 34 | \examples{ 35 | data(NIRsoil) 36 | wav <- as.numeric(colnames(NIRsoil$spc)) 37 | # plot of the 5 first absorbance spectra 38 | matplot(wav, 39 | t(NIRsoil$spc[1:5, ]), 40 | type = "l", 41 | ylim = c(0, .6), 42 | xlab = "Wavelength /nm", 43 | ylab = "Absorbance" 44 | ) 45 | 46 | bs <- baseline(NIRsoil$spc, wav) 47 | matlines(wav, t(bs[1:5, ])) 48 | 49 | fitted_baselines <- attr(bs, "baselines") 50 | matlines(wav, t(fitted_baselines[1:5, ])) 51 | title("Original spectra, baselines and baselined spectra") 52 | } 53 | \seealso{ 54 | \code{\link{savitzkyGolay}}, \code{\link{movav}}, 55 | \code{\link{gapDer}}, \code{\link{binning}}, \code{\link{continuumRemoval}} 56 | } 57 | \author{ 58 | \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 59 | with contributions from Mervin Manalili 60 | } 61 | -------------------------------------------------------------------------------- /man/resample2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resample2.R 3 | \name{resample2} 4 | \alias{resample2} 5 | \title{Resample a high resolution signal to a low resolution signal using full 6 | width half maximum (FWHM) values} 7 | \usage{ 8 | resample2(X, wav, new.wav, fwhm) 9 | } 10 | \arguments{ 11 | \item{X}{a numeric matrix or vector to resample (optionally a data frame that can 12 | be coerced to a numerical matrix).} 13 | 14 | \item{wav}{a numeric vector giving the original band positions.} 15 | 16 | \item{new.wav}{a numeric vector giving the new band positions.} 17 | 18 | \item{fwhm}{a numeric vector giving the full width half maximums of the new 19 | band positions. If no value is specified, it is assumed that the fwhm is 20 | equal to the sampling interval (i.e. band spacing). If only one value is 21 | specified, the fwhm is assumed to be constant over the spectral range.} 22 | } 23 | \value{ 24 | a matrix or vector with resampled values 25 | } 26 | \description{ 27 | Resample a data matrix or vector to match the response of another instrument 28 | using full width half maximum (FWHM) values 29 | } 30 | \details{ 31 | The function uses gaussian models defined by fwhm values to resample the high 32 | resolution data to new band positions and resolution. 33 | It assumes that band spacing and fwhm of the input data is constant over the 34 | spectral range. 35 | The interpolated values are set to 0 if input data fall outside by 3 standard 36 | deviations of the gaussian densities defined by fwhm. 37 | } 38 | \examples{ 39 | data(NIRsoil) 40 | wav <- as.numeric(colnames(NIRsoil$spc)) 41 | # Plot 10 first spectra 42 | matplot(wav, t(NIRsoil$spc[1:10, ]), 43 | type = "l", xlab = "Wavelength /nm", 44 | ylab = "Absorbance" 45 | ) 46 | # ASTER SWIR bands (nm) 47 | new_wav <- c(1650, 2165, 2205, 2260, 2330, 2395) # positions 48 | fwhm <- c(100, 40, 40, 50, 70, 70) # fwhm's 49 | # Resample NIRsoil to ASTER band positions 50 | aster <- resample2(NIRsoil$spc, wav, new_wav, fwhm) 51 | matpoints(as.numeric(colnames(aster)), t(aster[1:10, ]), pch = 1:5) 52 | } 53 | \seealso{ 54 | \code{\link{resample}} 55 | } 56 | \author{ 57 | Antoine Stevens 58 | } 59 | -------------------------------------------------------------------------------- /R/movav.R: -------------------------------------------------------------------------------- 1 | #' @title Moving average 2 | #' @description 3 | #' A simple moving average of a matrix or vector using a convolution 4 | #' function written in C++/Rcpp for fast computing 5 | #' @usage 6 | #' movav(X, w) 7 | #' @param X a numeric matrix or vector to process (optionally a data frame that can 8 | #' be coerced to a numerical matrix). 9 | #' @param w filter length. 10 | #' @author Antoine Stevens 11 | #' @examples 12 | #' data(NIRsoil) 13 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 14 | #' # adding some noise 15 | #' NIRsoil$spc_noise <- NIRsoil$spc + rnorm(length(NIRsoil$spc), 0, 0.001) 16 | #' matplot(wav, 17 | #' t(NIRsoil$spc_noise[1:10, ]), 18 | #' type = "l", 19 | #' lty = 1, 20 | #' xlab = "Wavelength /nm", 21 | #' ylab = "Absorbance", 22 | #' col = "grey" 23 | #' ) 24 | #' 25 | #' # window size of 11 bands 26 | #' NIRsoil$spc_mov <- movav(NIRsoil$spc_noise, w = 15) 27 | #' # smoothed data 28 | #' matlines(as.numeric(colnames(NIRsoil$spc_mov)), 29 | #' t(NIRsoil$spc_mov[1:10, ]), 30 | #' type = "l", 31 | #' lty = 1 32 | #' ) 33 | #' @return a matrix or vector with the filtered signal(s) 34 | #' @seealso \code{\link{savitzkyGolay}}, \code{\link{gapDer}}, 35 | #' \code{\link{binning}}, \code{\link{continuumRemoval}} 36 | #' @export 37 | #' 38 | movav <- function(X, w) { 39 | if (is.data.frame(X)) { 40 | X <- as.matrix(X) 41 | } 42 | if (missing(w)) { 43 | stop("filter length w should be specified") 44 | } 45 | if (w < 1) { 46 | stop("filter length w should be > 0") 47 | } 48 | if (w == 1) { 49 | return(X) 50 | } 51 | 52 | f <- rep(1, w) / w # filter 53 | 54 | if (is.matrix(X)) { 55 | if (w >= ncol(X)) { 56 | stop("filter length w should be lower than ncol(X)") 57 | } 58 | output <- convCppM(X, f) # Convolution 59 | g <- ceiling((w - 1) / 2) 60 | colnames(output) <- colnames(X)[((g + w %% 2):(ncol(X) - g))] 61 | rownames(output) <- rownames(X) 62 | } 63 | 64 | if (is.vector(X)) { 65 | if (w >= length(X)) { 66 | stop("filter length w should be lower than length(X)") 67 | } 68 | output <- convCppV(X, f) # Convolution 69 | g <- (w - 1) / 2 70 | names(output) <- names(X)[((g + w %% 2):(length(X) - g))] 71 | } 72 | 73 | return(output) 74 | } 75 | -------------------------------------------------------------------------------- /R/resample.R: -------------------------------------------------------------------------------- 1 | #' @title Resample spectral data 2 | #' @description 3 | #' Resample a data matrix or vector to new coordinates (e.g. 4 | #' band positions) using spline or linear interpolation. This function is a 5 | #' simple wrapper around \code{\link{approx}} and \code{\link{splinefun}} in 6 | #' \pkg{base}. 7 | #' @usage 8 | #' resample(X, wav, new.wav, interpol = "spline", ...) 9 | #' @param X numeric matrix or vector to resample (optionally a data frame that 10 | #' can be coerced to a numerical matrix). 11 | #' @param wav a numeric vector giving the original band positions. 12 | #' @param new.wav a numeric vector giving the new band positions. 13 | #' @param interpol the interpolation method: 'linear' or 'spline' (default). 14 | #' @param ... additional arguments to be passed to the \code{\link{splinefun}} 15 | #' function when \code{interpol = 'spline'}. 16 | #' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 17 | #' @examples 18 | #' data(NIRsoil) 19 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 20 | #' # increase spectral resolution by 2 21 | #' NIRsoil$spc_resampled <- resample(NIRsoil$spc, wav, seq(1100, 2498, 2)) 22 | #' dim(NIRsoil$spc) 23 | #' dim(NIRsoil$spc_resampled) 24 | #' @return 25 | #' a matrix or vector with resampled values. 26 | #' @seealso 27 | #' \code{\link{resample2}} 28 | #' @export 29 | #' 30 | resample <- function(X, wav, new.wav, interpol = "spline", ...) { 31 | if (is.data.frame(X)) { 32 | X <- as.matrix(X) 33 | } 34 | if (missing(wav)) { 35 | stop("wav argument must be specified") 36 | } 37 | 38 | if (!interpol %in% c("linear", "spline")) { 39 | stop("Argument 'interpol' must be either 'linear or 'spline'") 40 | } 41 | 42 | resfun <- function(x, interpol) { 43 | if (interpol == "linear") { 44 | approx(x = wav, y = x, xout = new.wav, method = "linear")$y 45 | } else { 46 | splinefun(x = wav, y = x, ...)(new.wav) 47 | } 48 | } 49 | 50 | if (is.matrix(X)) { 51 | if (length(wav) != ncol(X)) { 52 | stop("length(wav) must be equal to ncol(X)") 53 | } 54 | 55 | output <- t(apply(X, 1, resfun, interpol)) 56 | rownames(output) <- rownames(X) 57 | colnames(output) <- new.wav 58 | } else { 59 | if (length(wav) != length(X)) { 60 | stop("length(wav) must be equal to length(X)") 61 | } 62 | output <- resfun(X, interpol) 63 | names(output) <- new.wav 64 | } 65 | 66 | return(output) 67 | } 68 | -------------------------------------------------------------------------------- /man/detrend.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detrend.R 3 | \name{detrend} 4 | \alias{detrend} 5 | \title{Detrending spectral data} 6 | \usage{ 7 | detrend(X, wav, p = 2) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that 11 | can be coerced to a numerical matrix)} 12 | 13 | \item{wav}{the wavelengths/ band centers.} 14 | 15 | \item{p}{an integer larger than 1 indicating the polynomial order (default is 16 | 2, as in the original paper of Barnes et al., 1989).} 17 | } 18 | \value{ 19 | a matrix or vector with the detrended data. 20 | } 21 | \description{ 22 | \loadmathjax 23 | Normalizes each row of an input matrix by applying a SNV transformation 24 | followed by fitting a second order linear model and returning the fitted 25 | residuals. 26 | } 27 | \details{ 28 | The detrend is a row-wise transformation that allows to correct for 29 | wavelength-dependent scattering effects (variations in curvilinearity). A 30 | \mjeqn{p}{p} order polynomial is fit for each spectrum (\mjeqn{x_i}{x_i}) 31 | using the vector of bands (\mjeqn{\lambda}{\lambda}, e.g. wavelengths) as 32 | explanatory variable as follows: 33 | 34 | \mjdeqn{x_i = a\lambda^p + ... + b\lambda + c + e_i}{x_i = a\lambda^p + ... + b\lambda + c + e_i} 35 | 36 | were a, b, c are estimated by least squares, and \mjeqn{e_i}{e_i} are the 37 | spectral residuals of the least square fit. The residuals of the \mjeqn{i}{i}th 38 | correspond to the \mjeqn{i}{i}th detrended spectrum. 39 | } 40 | \examples{ 41 | data(NIRsoil) 42 | wav <- as.numeric(colnames(NIRsoil$spc)) 43 | # conversion to reflectance 44 | opar <- par(no.readonly = TRUE) 45 | par(mfrow = c(2, 1), mar = c(4, 4, 2, 2)) 46 | # plot of the 10 first spectra 47 | matplot(wav, t(NIRsoil$spc[1:10, ]), 48 | type = "l", 49 | xlab = "", 50 | ylab = "Absorbance" 51 | ) 52 | mtext("Raw spectra") 53 | det <- detrend(NIRsoil$spc, wav) 54 | matplot(wav, t(det[1:10, ]), 55 | type = "l", 56 | xlab = "Wavelength /nm", 57 | ylab = "Absorbance" 58 | ) 59 | mtext("Detrend spectra") 60 | par(opar) 61 | } 62 | \references{ 63 | Barnes RJ, Dhanoa MS, Lister SJ. 1989. Standard normal variate 64 | transformation and de-trending of near-infrared diffuse reflectance spectra. 65 | Applied spectroscopy, 43(5): 772-777. 66 | } 67 | \seealso{ 68 | \code{\link{standardNormalVariate}}, \code{\link{blockScale}}, 69 | \code{\link{blockNorm}} 70 | } 71 | \author{ 72 | Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 73 | } 74 | -------------------------------------------------------------------------------- /man/readASD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readASD.R 3 | \name{readASD} 4 | \alias{readASD} 5 | \title{Read ASD FieldSpec Pro binary and ASCII files} 6 | \usage{ 7 | readASD(fnames, in_format, out_format) 8 | } 9 | \arguments{ 10 | \item{fnames}{a character vector of the name(s) (with absolute path) of the file(s) to read.} 11 | 12 | \item{in_format}{the format of the input file: \code{'binary'} or \code{'txt'}.} 13 | 14 | \item{out_format}{the format of the output: 'matrix' (default) or 'list' (see below).} 15 | } 16 | \value{ 17 | if \code{out_format} = \code{'matrix'}, reflectance values of the input file(s) in a single matrix. 18 | 19 | if \code{out_format} = \code{'list'}, a \code{list} of the input file(s) data consisting of a list with components: 20 | \itemize{ 21 | \item{\code{Name}: name of the file imported} 22 | \item{\code{datetime}: date and time of acquisition in \code{POSIXct} format} 23 | \item{\code{header}: list with information from the header file} 24 | \item{\code{radiance}: if applicable, a numeric vector of radiance values} 25 | \item{\code{reference}: if applicable, a numeric vector of radiance values of the white reference} 26 | \item{\code{reflectance}: numeric vector of reflectance values} 27 | \item{\code{wavelength}: numeric vector of the band positions} 28 | } 29 | } 30 | \description{ 31 | \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 32 | 33 | Read single or multiple binary and ASCII files acquired with an ASD FieldSpec 34 | Pro (\href{https://www.malvernpanalytical.com/en/products/product-range/asd-range}{ASDi}, 35 | Boulder, CO) spectroradiometer 36 | } 37 | \note{ 38 | There is a \R port of the \file{importasd.m} function from the 39 | \sQuote{FSFPostProcessing} Matlab toolbox by Iain Robinson 40 | (University of Edinburgh), which is based on some Java code provided 41 | by Andreas Hunei (University of Zurich). 42 | 43 | It seems that ASD file format has changed quite a lot with file versions. The 44 | function will possibly not work as expected for 45 | all versions. Please report any bugs to the package maintainer. 46 | } 47 | \references{ 48 | Robinson, I., and A. MacArthur. 2011. The Field Spectroscopy Facility Post 49 | Processing Toolbox User Guide. Post processing spectral data in MATLAB, 50 | University of Edinburgh, Edinburgh, UK. 51 | } 52 | \author{ 53 | Antoine Stevens (\R port), Iain Robinson (matlab function) & Leonardo Ramirez-Lopez (\R port) 54 | } 55 | -------------------------------------------------------------------------------- /man/continuumRemoval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/continuumRemoval.R 3 | \name{continuumRemoval} 4 | \alias{continuumRemoval} 5 | \title{Continuum Removal} 6 | \usage{ 7 | continuumRemoval(X, wav, type = c("R", "A"), 8 | interpol = c("linear", "spline"), 9 | method = c("division", "substraction")) 10 | } 11 | \arguments{ 12 | \item{X}{a numeric matrix or vector to process (optionally a data frame that can 13 | be coerced to a numerical matrix).} 14 | 15 | \item{wav}{optional. A numeric vector of band positions.} 16 | 17 | \item{type}{the type of data: 'R' for reflectance (default), 'A' for 18 | absorbance.} 19 | 20 | \item{interpol}{the interpolation method between points on the convex hull: 21 | 'linear' (default) or 'spline'.} 22 | 23 | \item{method}{normalization method: 'division' (default) or 'subtraction' 24 | (see details section).} 25 | } 26 | \value{ 27 | a matrix or vector with the filtered spectra. 28 | } 29 | \description{ 30 | \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 31 | 32 | Compute the continuum removed values of a data matrix or vector 33 | } 34 | \details{ 35 | The continuum removal technique was introduced by Clark and Roush (1984) 36 | as a method to highlight energy absorption features of minerals. 37 | It can be viewed as a way to perform albedo normalization. 38 | The algorithm find points lying on the convex hull (local maxima or envelope) 39 | of a spectrum, connects the points by linear or spline interpolation and 40 | normalizes the spectrum by dividing (or subtracting) the input data by the 41 | interpolated line. 42 | } 43 | \examples{ 44 | data(NIRsoil) 45 | wav <- as.numeric(colnames(NIRsoil$spc)) 46 | # plot of the 10 first abs spectra 47 | matplot(wav, 48 | t(NIRsoil$spc[1:10, ]), 49 | type = "l", 50 | ylim = c(0, .6), 51 | xlab = "Wavelength /nm", 52 | ylab = "Abs" 53 | ) 54 | # # type = 'A' is used for absorbance spectra 55 | cr <- continuumRemoval(NIRsoil$spc, wav, type = "A") 56 | matlines(wav, t(cr[1:10, ])) 57 | } 58 | \references{ 59 | Clark, R.N., and Roush, T.L., 1984. Reflectance Spectroscopy: Quantitative 60 | Analysis Techniques for Remote Sensing Applications. J. Geophys. Res. 89, 61 | 6329-6340. 62 | } 63 | \seealso{ 64 | \code{\link{savitzkyGolay}}, \code{\link{movav}}, 65 | \code{\link{gapDer}}, \code{\link{binning}} 66 | } 67 | \author{ 68 | Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 69 | } 70 | -------------------------------------------------------------------------------- /R/blockNorm.R: -------------------------------------------------------------------------------- 1 | #' @title Sum of squares block weighting 2 | #' @description 3 | #' Sum of squares block weighting: allows to scale blocks of variables, 4 | #' but keeping the relative weights of the variables inside a block. 5 | #' @usage 6 | #' blockNorm(X, targetnorm = 1) 7 | #' @param X a numeric matrix to transform (optionally a data frame that can 8 | #' be coerced to a numerical matrix). 9 | #' @param targetnorm desired sum of squares for a block of variables 10 | #' (default = 1) 11 | #' @return a list with components `Xscaled`, the scaled matrix and `f`, the 12 | #' scaling factor 13 | #' @author Antoine Stevens 14 | #' @examples 15 | #' X <- matrix(rnorm(100), ncol = 10) 16 | #' # Block normalize to sum of square equals to 1 17 | #' res <- blockNorm(X, targetnorm = 1) 18 | #' sum(res$Xscaled^2) # check 19 | #' @seealso 20 | #' \code{\link{blockScale}}, \code{\link{standardNormalVariate}}, 21 | #' \code{\link{detrend}} 22 | #' @references 23 | #' Eriksson, L., Johansson, E., Kettaneh, N., Trygg, J., 24 | #' Wikstrom, C., and Wold, S., 2006. Multi- and Megavariate Data Analysis. 25 | #' MKS Umetrics AB. 26 | #' @details 27 | #' The function computes a scaling factor, which, multiplied by the 28 | #' input matrix, 29 | #' produces a matrix with a pre--determined sum of squares. 30 | #' @note 31 | #' This is a \R port of the \file{MBnorm.m} function of the MB matlab toolbox 32 | #' by Fran van den Berg. 33 | #' @export 34 | blockNorm <- function(X, targetnorm = 1) { 35 | if (!any(class(X) %in% c("matrix", "data.frame"))) { 36 | stop("X must be either a matrix or optionally a data.frame") 37 | } 38 | 39 | if (is.data.frame(X)) { 40 | X <- as.matrix(X) 41 | } 42 | 43 | if (targetnorm == 1) { 44 | f <- sum(X^2, na.rm = TRUE)^0.5 45 | } else { 46 | fmax <- sum(X^2, na.rm = TRUE) 47 | fmaxn <- sum((X / fmax)^2, na.rm = TRUE) 48 | if (fmaxn > targetnorm) { 49 | fmin <- fmax 50 | fminn <- fmaxn 51 | while (fminn > targetnorm) { 52 | fmin <- fmin * 10 53 | fminn <- sum((X / fmin)^2, na.rm = TRUE) 54 | } 55 | } else { 56 | fmin <- fmax 57 | fminn <- fmaxn 58 | while (fmaxn < targetnorm) { 59 | fmax <- fmax / 10 60 | fmaxn <- sum((X / fmax)^2, na.rm = TRUE) 61 | } 62 | } 63 | n <- fmaxn 64 | while ((targetnorm - n)^2 > 1e-12) { 65 | f <- (fmin + fmax) / 2 66 | n <- sum((X / f)^2, na.rm = TRUE) 67 | if (n > targetnorm) { 68 | fmax <- f 69 | } else { 70 | fmin <- f 71 | } 72 | } 73 | } 74 | Xn <- X / f 75 | return(list(Xscaled = Xn, f = f)) 76 | } 77 | -------------------------------------------------------------------------------- /man/prospectr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prospectr.R 3 | \docType{package} 4 | \name{prospectr-package} 5 | \alias{prospectr-package} 6 | \alias{prospectr} 7 | \title{Overview of the functions in the prospectr package} 8 | \description{ 9 | \ifelse{html}{\out{Stable lifecycle}}{\strong{Stable}} 10 | 11 | Misc functions for spectral data 12 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 13 | 14 | This package implements a number of functions useful for 15 | pre-processing spectral data well as for selecting representative samples/spectra. 16 | The functions included here are particularly useful in Near-Infrared and Infrared 17 | Spectroscopy applications. 18 | } 19 | \details{ 20 | This is the version 21 | 0.2.9 -- hallo-galo of the package. 22 | The main functionality is listed here. 23 | 24 | Currently, the following preprocessing functions are available: 25 | 26 | \itemize{ 27 | \item{\code{\link{resample}}} 28 | \item{\code{\link{resample2}}} 29 | \item{\code{\link{movav}}} 30 | \item{\code{\link{standardNormalVariate}}} 31 | \item{\code{\link{msc}}} 32 | \item{\code{\link{detrend}}} 33 | \item{\code{\link{baseline}}} 34 | \item{\code{\link{blockScale}}} 35 | \item{\code{\link{blockNorm}}} 36 | \item{\code{\link{binning}}} 37 | \item{\code{\link{savitzkyGolay}}} 38 | \item{\code{\link{gapDer}}} 39 | \item{\code{\link{continuumRemoval}}} 40 | } 41 | 42 | For the selection of representative samples/observations for calibrating 43 | spectral models the following functions ca be used: 44 | 45 | \itemize{ 46 | \item{\code{\link{naes}}} 47 | \item{\code{\link{honigs}}} 48 | \item{\code{\link{shenkWest}}} 49 | \item{\code{\link{kenStone}}} 50 | \item{\code{\link{duplex}}} 51 | \item{\code{\link{puchwein}}} 52 | } 53 | 54 | Other useful functions are also available: 55 | 56 | \itemize{ 57 | \item{\code{\link{read_nircal}}} 58 | \item{\code{\link{readASD}}} 59 | \item{\code{\link{spliceCorrection}}} 60 | \item{\code{\link{cochranTest}}} 61 | } 62 | } 63 | \seealso{ 64 | Useful links: 65 | \itemize{ 66 | \item \url{https://github.com/l-ramirez-lopez/prospectr} 67 | \item Report bugs at \url{https://github.com/l-ramirez-lopez/prospectr/issues} 68 | } 69 | } 70 | \author{ 71 | \strong{Maintainer}: Leonardo Ramirez-Lopez \email{ramirez.lopez.leo@gmail.com} 72 | 73 | Authors: 74 | \itemize{ 75 | \item Antoine Stevens (\href{https://orcid.org/0000-0002-1588-7519}{ORCID}) 76 | 77 | \item Leonardo Ramirez-Lopez (\href{https://orcid.org/0000-0002-5369-5120}{ORCID}) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /R/e2m.R: -------------------------------------------------------------------------------- 1 | #' @title A function for transforming a matrix from its Euclidean space to its Mahalanobis space 2 | #' @examples 3 | #' # test data 4 | #' \dontrun{ 5 | #' X <- matrix(rnorm(500), ncol = 5) 6 | #' # Normal way to compute the Mahalanobis distance 7 | #' md1 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) 8 | #' # Projection approach for computing the Mahalanobis distance 9 | #' # 1. Projecting from the Euclidean to the Mahalanobis space 10 | #' Xm <- e2m(X, sm.method = "svd") 11 | #' # 2. Use the normal Euclidean distance on the Mahalanobis space 12 | #' md2 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 13 | #' # Plot the results of both methods 14 | #' plot(md1, md2) 15 | #' # Test on a real dataset 16 | #' # Mahalanobis in the spectral space 17 | #' data(NIRsoil) 18 | #' X <- NIRsoil$spc 19 | #' Xm <- e2m(X, sm.method = "svd") 20 | #' md2 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 21 | #' 22 | #' md1 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) # does not work 23 | #' # Mahalanobis in the PC space 24 | #' pc <- 20 25 | #' pca <- prcomp(X, center = TRUE, scale = TRUE) 26 | #' X <- pca$x[, 1:pc] 27 | #' X2 <- sweep(pca$x[, 1:pc, drop = FALSE], 2, pca$sdev[1:pc], "/") 28 | #' md4 <- sqrt(rowSums((sweep(Xm, 2, colMeans(Xm), "-"))^2)) 29 | #' md5 <- sqrt(rowSums((sweep(X2, 2, colMeans(X2), "-"))^2)) 30 | #' md3 <- sqrt(mahalanobis(X, center = colMeans(X), cov = cov(X))) # does work 31 | #' } 32 | #' @keywords internal 33 | e2m <- function(X, sm.method = c("svd", "eigen")) { 34 | nms <- dimnames(X) 35 | 36 | if (ncol(X) > nrow(X)) { 37 | stop("In order to project the matrix to a Mahalanobis space, the number of observations of the input matrix must greater than its number of variables") 38 | } 39 | 40 | if (ncol(X) == 1) { 41 | ms_x <- X / sd(X) 42 | } else { 43 | sm_method <- match.arg(sm.method) 44 | 45 | X <- as.matrix(X) 46 | vcv <- cov(X) 47 | sq_vcv <- sqrtSm(vcv, method = sm_method) 48 | sq_S <- solve(sq_vcv) 49 | ms_x <- X %*% sq_S 50 | dimnames(ms_x) <- nms 51 | } 52 | return(ms_x) 53 | } 54 | 55 | 56 | #' @title Square root of (square) symetric matrices 57 | #' @keywords internal 58 | sqrtSm <- function(X, method = c("svd", "eigen")) { 59 | if (!isSymmetric(X)) { 60 | stop("X must be a square symmetric matrix") 61 | } 62 | method <- match.arg(method) 63 | 64 | if (method == "svd") { 65 | out <- svd(X) 66 | D <- diag(out$d) 67 | U <- out$v 68 | } 69 | 70 | if (method == "eigen") { 71 | out <- eigen(X) 72 | D <- diag(out$values) 73 | U <- out$vectors 74 | } 75 | 76 | # if(method == 'Schur'){ require(geigen) out <- Schur(X) D <- diag(out$EValues) U <- out$Q } 77 | return(U %*% sqrt(D) %*% t(U)) 78 | } 79 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "cran-comments.md" 3 | author: "Leo Ramirez Lopez" 4 | date: "13 3 2020" 5 | --- 6 | 7 | 8 | ## version 0.2.3 9 | ## Test environments 10 | 18.02.2022 11 | 12 | Check: installed package size 13 | Result: NOTE 14 | installed size is 6.3Mb 15 | sub-directories of 1Mb or more: 16 | data 2.1Mb 17 | libs 3.4Mb 18 | Flavors: r-release-macos-arm64, r-release-macos-x86_64, r-oldrel-macos-x86_64 19 | 20 | 21 | The checks were conducted in the following platforms through rhub: 22 | 23 | - "debian-clang-devel" 24 | 25 | - "debian-gcc-devel" 26 | 27 | - "fedora-gcc-devel" 28 | 29 | - "debian-gcc-devel-nold" 30 | 31 | - "debian-gcc-patched" 32 | 33 | - "debian-gcc-release" 34 | 35 | - "linux-x86_64-rocker-gcc-san" 36 | 37 | - "macos-highsierra-release-cran" 38 | 39 | - "solaris-x86-patched-ods" 40 | 41 | - "ubuntu-gcc-release" 42 | 43 | - "windows-x86_64-devel" 44 | 45 | - Running under: Windows Server x64 (build 17763) (appveyor) R 3.6.3 46 | 47 | - ubuntu 16.04.6 (on travis-ci), R version 4.0.2 48 | 49 | - CRAN win-builders 50 | 51 | ## R CMD check results 52 | There were no ERRORs or WARNINGs or NOTEs. 53 | 54 | 55 | ## version 0.2.1 56 | ## Test environments 57 | 24.10.2020 58 | 59 | The checks were conducted in the following platforms through rhub: 60 | 61 | - "debian-clang-devel" 62 | 63 | - "debian-gcc-devel" 64 | 65 | - "fedora-gcc-devel" 66 | 67 | - "debian-gcc-devel-nold" 68 | 69 | - "debian-gcc-patched" 70 | 71 | - "debian-gcc-release" 72 | 73 | - "linux-x86_64-rocker-gcc-san" 74 | 75 | - "macos-highsierra-release-cran" 76 | 77 | - "solaris-x86-patched-ods" 78 | 79 | - "ubuntu-gcc-release" 80 | 81 | - "windows-x86_64-devel" 82 | 83 | - Running under: Windows Server x64 (build 17763) (appveyor) R 3.6.3 84 | 85 | - ubuntu 16.04.6 (on travis-ci), R version 4.0.2 86 | 87 | - CRAN win-builders 88 | 89 | ## R CMD check results 90 | There were no ERRORs or WARNINGs or NOTEs. 91 | 92 | 93 | 94 | ## version 0.2.0 95 | ## Test environments 96 | * local x86_64-w64-mingw32 (64-bit) install, R version 3.6.1/R devel 2020-03-12 r77936 97 | * Running under: Windows Server x64 (build 17763) (appveyor) R 3.6.3 98 | * ubuntu 16.04.6 (on travis-ci), R version 3.6.2 99 | * win-builder 100 | 101 | ## R CMD check results 102 | There were no ERRORs or WARNINGs. 103 | 104 | There was 1 NOTE (in all test environments): 105 | 106 | * checking installed package size ... NOTE 107 | installed size is 6.0Mb 108 | sub-directories of 1Mb or more: 109 | data 4.0Mb 110 | libs 1.8Mb 111 | 112 | ## Recommenations CRAN submission 113 | For the future: Add some reference about the method in 114 | the Description field in the form Authors (year) ? -------------------------------------------------------------------------------- /man/msc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/msc.R 3 | \name{msc} 4 | \alias{msc} 5 | \title{Multiplicative Scatter Correction (msc)} 6 | \usage{ 7 | msc(X, ref_spectrum = colMeans(X)) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix of spectral data.} 11 | 12 | \item{ref_spectrum}{a numeric vector corresponding to an "ideal" reference 13 | spectrum (e.g. free of scattering effects). By default the function uses the 14 | mean spectrum of the input \code{X}. See details. Note that this argument was 15 | previously named as \code{reference_spc}, however, it has been renamed to 16 | \code{ref_spectrum} to emphasize that this argument is a vector and not a 17 | matrix of spectra.} 18 | } 19 | \value{ 20 | a matrix of normalized spectral data with an attribute which indicates the 21 | reference spectrum used. 22 | } 23 | \description{ 24 | \loadmathjax 25 | \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 26 | 27 | This function implements the multiplicative scatter correction method 28 | which attempts to remove physical light scatter by accounting for additive 29 | and multiplicative effects (Geladi et al., 1985). 30 | } 31 | \details{ 32 | The Multiplicative Scatter Correction (MSC) is a normalization method that 33 | attempts to account for additive and multiplicative effects by aligning each 34 | spectrum (\mjeqn{x_i}{x_i}) to an ideal reference one (\mjeqn{x_r}{x_r}) as 35 | follows: 36 | 37 | \mjdeqn{x_i = m_i x_r + a_i}{x_i = m_i x_r + a_i} 38 | \mjdeqn{MSC(x_i) = \frac{a_i - x_i}{m_i}}{MSC(x_i) = {a_i - x_i}/{m_i}} 39 | 40 | where \mjeqn{a_i}{a_i} and \mjeqn{m_i}{m_i} are the additive and 41 | multiplicative terms respectively. 42 | } 43 | \examples{ 44 | data(NIRsoil) 45 | NIRsoil$msc_spc <- msc(X = NIRsoil$spc) 46 | 47 | # 10 first msc spectra 48 | matplot( 49 | x = as.numeric(colnames(NIRsoil$msc_spc)), 50 | y = t(NIRsoil$msc_spc[1:10, ]), 51 | type = "l", 52 | xlab = "wavelength, nm", 53 | ylab = "msc" 54 | ) 55 | 56 | # another example 57 | spectra_a <- NIRsoil$spc[1:40, ] 58 | spectra_b <- NIRsoil$spc[-(1:40), ] 59 | 60 | spectra_a_msc <- msc(spectra_a, colMeans(spectra_a)) 61 | 62 | # correct spectra_a based on the reference spectrum used to correct 63 | # spectra_a 64 | 65 | spectra_b_msc <- msc( 66 | spectra_b, 67 | ref_spectrum = attr(spectra_a_msc, "Reference spectrum") 68 | ) 69 | } 70 | \references{ 71 | Geladi, P., MacDougall, D., and Martens, H. 1985. Linearization and 72 | Scatter-Correction for Near-Infrared Reflectance Spectra of Meat. 73 | Applied Spectroscopy, 39(3):491-500. 74 | } 75 | \seealso{ 76 | \code{\link{standardNormalVariate}}, \code{\link{detrend}}, 77 | \code{\link{blockScale}}, \code{\link{blockNorm}} 78 | } 79 | \author{ 80 | \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} and Guillaume Hans 81 | } 82 | -------------------------------------------------------------------------------- /man/savitzkyGolay.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/savitzkyGolay.R 3 | \name{savitzkyGolay} 4 | \alias{savitzkyGolay} 5 | \title{Savitzky-Golay smoothing and differentiation} 6 | \usage{ 7 | savitzkyGolay(X, m, p, w, delta.wav) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector to process (optionally a data frame that 11 | can be coerced to a numerical matrix).} 12 | 13 | \item{m}{an integer indcating the differentiation order.} 14 | 15 | \item{p}{an integer indicating the polynomial order.} 16 | 17 | \item{w}{an integer indicating the window size (must be odd).} 18 | 19 | \item{delta.wav}{(optional) sampling interval.} 20 | } 21 | \description{ 22 | \loadmathjax 23 | Savitzky-Golay smoothing and derivative of a data matrix or vector. 24 | } 25 | \details{ 26 | The Savitzky-Golay algorithm fits a local polynomial regression on the signal. 27 | It requires evenly spaced data points. Mathematically, it operates simply as 28 | a weighted sum over a given window: 29 | 30 | \mjdeqn{ x_j\ast = \frac{1}{N}\sum_{h=-k}^{k}{c_hx_{j+h}}}{ x_j ast = 1/N \sum_{h=-k}^{k} c_hx_{j+h}} 31 | 32 | where \mjeqn{x_j\ast}{x_j ast} is the new value, \mjeqn{N}{N} is a 33 | normalizing coefficient, \mjeqn{k}{k} is the gap size on each side of 34 | \mjeqn{j}{j} and \mjeqn{c_h}{c_h} are pre-computed coefficients, that depends 35 | on the chosen polynomial order and degree. 36 | 37 | The sampling interval specified with the \code{delta.wav} argument is used for 38 | scaling and get numerically correct derivatives. 39 | 40 | The convolution function is written in C++/Rcpp for faster computations. 41 | } 42 | \examples{ 43 | data(NIRsoil) 44 | opar <- par(no.readonly = TRUE) 45 | par(mfrow = c(2, 1), mar = c(4, 4, 2, 2)) 46 | 47 | # plot of the 10 first spectra 48 | matplot(as.numeric(colnames(NIRsoil$spc)), 49 | t(NIRsoil$spc[1:10, ]), 50 | type = "l", 51 | xlab = "", 52 | ylab = "Absorbance" 53 | ) 54 | 55 | mtext("Raw spectra") 56 | NIRsoil$spc_sg <- savitzkyGolay( 57 | X = NIRsoil$spc, 58 | m = 1, 59 | p = 3, 60 | w = 11, 61 | delta.wav = 2 62 | ) 63 | 64 | matplot(as.numeric(colnames(NIRsoil$spc_sg)), 65 | t(NIRsoil$spc_sg[1:10, ]), 66 | type = "l", 67 | xlab = "Wavelength /nm", 68 | ylab = "1st derivative" 69 | ) 70 | 71 | mtext("1st derivative spectra") 72 | par(opar) 73 | } 74 | \references{ 75 | Luo, J., Ying, K., He, P., & Bai, J. (2005). Properties of Savitzky–Golay 76 | digital differentiators. Digital Signal Processing, 15(2), 122-136. 77 | 78 | Savitzky, A., and Golay, M.J.E., 1964. Smoothing and 79 | differentiation of data by simplified least squares procedures. 80 | Anal. Chem. 36, 1627-1639. 81 | 82 | Schafer, R. W. (2011). What is a Savitzky-Golay filter? (lecture notes). IEEE 83 | Signal processing magazine, 28(4), 111-117. 84 | 85 | Wentzell, P.D., and Brown, C.D., 2000. Signal processing in analytical 86 | chemistry. Encyclopedia of Analytical Chemistry, 9764-9800. 87 | } 88 | \author{ 89 | Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 90 | } 91 | -------------------------------------------------------------------------------- /man/gapDer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gapDer.R 3 | \name{gapDer} 4 | \alias{gapDer} 5 | \title{Gap-Segment derivative} 6 | \usage{ 7 | gapDer(X, m = 1, w = 1, s = 1, delta.wav) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix or vector` to transform (optionally a data frame 11 | that can be coerced to a numerical matrix).} 12 | 13 | \item{m}{an integer indicating the order of the derivative. 14 | Note that this function allows for high order derivatives (e.g. m = 6). If 0 15 | is passed, the function will just smooth out the signal(s).} 16 | 17 | \item{w}{an integer indicating the gap size (must be odd and >=1), i.e. the spacing 18 | between points over which the derivative is computed.} 19 | 20 | \item{s}{an integer indicating the segment size (must be odd and >=1), i.e. 21 | the range over which the points are averaged (default = 1, i.e. no 22 | smoothing corresponding to Norris-Gap Derivative).} 23 | 24 | \item{delta.wav}{the sampling interval (or band spacing).} 25 | } 26 | \value{ 27 | a matrix or vector with the filtered signal(s) 28 | } 29 | \description{ 30 | Gap-Segment derivatives of a data matrix or vector 31 | } 32 | \details{ 33 | In this type of derivatives, the gap size denotes the length of the x 34 | interval that separates the two segments that are averaged. A detailed 35 | explanation of gap segment derivatives can be found in Hopkins (2001). 36 | 37 | The sampling interval specified with the \code{delta.wav} argument is used for 38 | scaling and get numerically correct derivatives. 39 | 40 | The convolution function is written in C++/Rcpp for faster computations. 41 | } 42 | \examples{ 43 | data(NIRsoil) 44 | opar <- par(no.readonly = TRUE) 45 | par(mfrow = c(2, 2), mar = c(4, 4, 2, 2)) 46 | # plot of the 10 first spectra 47 | matplot(as.numeric(colnames(NIRsoil$spc)), 48 | t(NIRsoil$spc[1:10, ]), 49 | type = "l", 50 | xlab = "", 51 | ylab = "Absorbance" 52 | ) 53 | mtext("Raw spectra") 54 | 55 | der <- gapDer(NIRsoil$spc, m = 1, w = 1, s = 1, delta.wav = 2) 56 | matplot(as.numeric(colnames(der)), 57 | t(der[1:10, ]), 58 | type = "l", 59 | xlab = "Wavelength /nm", 60 | ylab = "gap derivative" 61 | ) 62 | 63 | mtext("1st derivative spectra") 64 | der <- gapDer(NIRsoil$spc, m = 1, w = 11, s = 1, delta.wav = 2) 65 | matplot(as.numeric(colnames(der)), t(der[1:10, ]), 66 | type = "l", 67 | xlab = "Wavelength /nm", 68 | ylab = "gap derivative" 69 | ) 70 | 71 | mtext("1st derivative spectra with a window size = 11 nm") 72 | der <- gapDer(NIRsoil$spc, m = 1, w = 11, s = 5, delta.wav = 2) 73 | matplot(as.numeric(colnames(der)), t(der[1:10, ]), 74 | type = "l", 75 | xlab = "Wavelength /nm", 76 | ylab = "gap derivative" 77 | ) 78 | mtext("1st derivative spectra with: window size: 11 nm, smoothing: 5 nm") 79 | par(opar) 80 | } 81 | \references{ 82 | Hopkins, D. W. (2001). What is a Norris derivative?. NIR news, 12(3), 3-5. 83 | } 84 | \seealso{ 85 | \code{\link{savitzkyGolay}}, \code{\link{movav}}, 86 | \code{\link{binning}}, \code{\link{continuumRemoval}} 87 | } 88 | \author{ 89 | Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 90 | } 91 | -------------------------------------------------------------------------------- /R/resample2.R: -------------------------------------------------------------------------------- 1 | #' @title Resample a high resolution signal to a low resolution signal using full 2 | #' width half maximum (FWHM) values 3 | #' @description 4 | #' Resample a data matrix or vector to match the response of another instrument 5 | #' using full width half maximum (FWHM) values 6 | #' @usage 7 | #' resample2(X, wav, new.wav, fwhm) 8 | #' @param X a numeric matrix or vector to resample (optionally a data frame that can 9 | #' be coerced to a numerical matrix). 10 | #' @param wav a numeric vector giving the original band positions. 11 | #' @param new.wav a numeric vector giving the new band positions. 12 | #' @param fwhm a numeric vector giving the full width half maximums of the new 13 | #' band positions. If no value is specified, it is assumed that the fwhm is 14 | #' equal to the sampling interval (i.e. band spacing). If only one value is 15 | #' specified, the fwhm is assumed to be constant over the spectral range. 16 | #' @author Antoine Stevens 17 | #' @details 18 | #' The function uses gaussian models defined by fwhm values to resample the high 19 | #' resolution data to new band positions and resolution. 20 | #' It assumes that band spacing and fwhm of the input data is constant over the 21 | #' spectral range. 22 | #' The interpolated values are set to 0 if input data fall outside by 3 standard 23 | #' deviations of the gaussian densities defined by fwhm. 24 | #' @examples 25 | #' data(NIRsoil) 26 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 27 | #' # Plot 10 first spectra 28 | #' matplot(wav, t(NIRsoil$spc[1:10, ]), 29 | #' type = "l", xlab = "Wavelength /nm", 30 | #' ylab = "Absorbance" 31 | #' ) 32 | #' # ASTER SWIR bands (nm) 33 | #' new_wav <- c(1650, 2165, 2205, 2260, 2330, 2395) # positions 34 | #' fwhm <- c(100, 40, 40, 50, 70, 70) # fwhm's 35 | #' # Resample NIRsoil to ASTER band positions 36 | #' aster <- resample2(NIRsoil$spc, wav, new_wav, fwhm) 37 | #' matpoints(as.numeric(colnames(aster)), t(aster[1:10, ]), pch = 1:5) 38 | #' @return 39 | #' a matrix or vector with resampled values 40 | #' @seealso 41 | #' \code{\link{resample}} 42 | #' @export 43 | #' 44 | resample2 <- function(X, wav, new.wav, fwhm) { 45 | if (is.data.frame(X)) { 46 | X <- as.matrix(X) 47 | } 48 | if (missing(wav)) { 49 | stop("wav argument must be specified") 50 | } 51 | if (missing(new.wav)) { 52 | stop("new.wav argument must be specified") 53 | } 54 | if (missing(fwhm)) { 55 | fwhm <- c(new.wav[2] - new.wav[1], diff(new.wav, 2) / 2, new.wav[length(new.wav)] - new.wav[length(new.wav) - 1]) 56 | } 57 | if (length(fwhm) == 1) { 58 | fwhm <- rep(fwhm, length(new.wav)) 59 | } 60 | if (length(new.wav) != length(fwhm)) { 61 | stop("length(fwhm) must be equal to length(new.wav)") 62 | } 63 | 64 | if (is.matrix(X)) { 65 | if (length(wav) != ncol(X)) { 66 | stop("length(wav) must be equal to ncol(X)") 67 | } 68 | output <- resample_fwhm(X, wav, new.wav, fwhm) 69 | rownames(output) <- rownames(X) 70 | colnames(output) <- new.wav 71 | } else { 72 | if (length(wav) != length(X)) { 73 | stop("length(wav) must be equal to length(X)") 74 | } 75 | output <- resample_fwhm_vec(X, wav, new.wav, fwhm) 76 | names(output) <- new.wav 77 | } 78 | return(output) 79 | } 80 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' @title bitwise operations 5 | #' @description 6 | #' bitwise operations in C++ 7 | #' @param aa integer 8 | #' @param bb integer 9 | #' @keywords internal 10 | #' @useDynLib prospectr 11 | bitAND <- function(aa, bb) { 12 | .Call('_prospectr_bitAND', PACKAGE = 'prospectr', aa, bb) 13 | } 14 | 15 | bitSR <- function(a, b) { 16 | .Call('_prospectr_bitSR', PACKAGE = 'prospectr', a, b) 17 | } 18 | 19 | #' @title Convolve 20 | #' @description 21 | #' Convolution, written in C++ 22 | #' @param X matrix to convolve 23 | #' @param f filter 24 | #' @keywords internal 25 | #' @useDynLib prospectr 26 | convCppM <- function(X, f) { 27 | .Call('_prospectr_convCppM', PACKAGE = 'prospectr', X, f) 28 | } 29 | 30 | convCppV <- function(X, f) { 31 | .Call('_prospectr_convCppV', PACKAGE = 'prospectr', X, f) 32 | } 33 | 34 | #' @title A fast distance algorithm for two matrices written in C++ 35 | #' @usage 36 | #' fastDist(X,Y,method) 37 | #' @param X a \code{matrix} 38 | #' @param Y a \code{matrix} 39 | #' @param method a \code{string} with possible values "euclid", "cor", "cosine" 40 | #' @return a distance \code{matrix} 41 | #' @keywords internal 42 | #' @useDynLib prospectr 43 | #' @author Antoine Stevens and Leonardo Ramirez-Lopez 44 | fastDist <- function(X, Y, method) { 45 | .Call('_prospectr_fastDist', PACKAGE = 'prospectr', X, Y, method) 46 | } 47 | 48 | #' @title A fast distance algorithm for a matrix and a vector written in C++ 49 | #' @usage 50 | #' fastDistV(X,Y,method) 51 | #' @param X a \code{matrix} 52 | #' @param Y a \code{vector} 53 | #' @param method a \code{string} with possible values "euclid", "cor", "cosine" 54 | #' @return a distance \code{vector} 55 | #' @author Antoine Stevens and Leonardo Ramirez-Lopez 56 | #' @keywords internal 57 | #' @useDynLib prospectr 58 | fastDistV <- function(X, Y, method) { 59 | .Call('_prospectr_fastDistV', PACKAGE = 'prospectr', X, Y, method) 60 | } 61 | 62 | #' @title get_msc_coeff 63 | #' @description 64 | #' Coefficients for multiplicative Scatter Correction written in C++ 65 | #' @param X matrix 66 | #' @param ref_spectrum a matrix of one row and same columns as in X 67 | #' @keywords internal 68 | #' @useDynLib prospectr 69 | get_msc_coeff <- function(X, ref_spectrum) { 70 | .Call('_prospectr_get_msc_coeff', PACKAGE = 'prospectr', X, ref_spectrum) 71 | } 72 | 73 | #' @title Resample to given band position and fwhm 74 | #' @description 75 | #' Resample, written in C++ 76 | #' @param X matrix to resample 77 | #' @param wav a numeric \code{vector} giving the original band positions 78 | #' @param new_wav a numeric \code{vector} giving the new band positions 79 | #' @param fwhm numeric \code{vector} giving the full width half maximums of the new band positions. 80 | #' @keywords internal 81 | #' @useDynLib prospectr 82 | resample_fwhm <- function(X, wav, new_wav, fwhm) { 83 | .Call('_prospectr_resample_fwhm', PACKAGE = 'prospectr', X, wav, new_wav, fwhm) 84 | } 85 | 86 | resample_fwhm_vec <- function(X, wav, new_wav, fwhm) { 87 | .Call('_prospectr_resample_fwhm_vec', PACKAGE = 'prospectr', X, wav, new_wav, fwhm) 88 | } 89 | 90 | residLm <- function(Yr, Xr) { 91 | .Call('_prospectr_residLm', PACKAGE = 'prospectr', Yr, Xr) 92 | } 93 | 94 | -------------------------------------------------------------------------------- /R/detrend.R: -------------------------------------------------------------------------------- 1 | #' @title Detrending spectral data 2 | #' @description 3 | #' \loadmathjax 4 | #' Normalizes each row of an input matrix by applying a SNV transformation 5 | #' followed by fitting a second order linear model and returning the fitted 6 | #' residuals. 7 | #' @usage 8 | #' detrend(X, wav, p = 2) 9 | #' @param X a numeric matrix or vector to process (optionally a data frame that 10 | #' can be coerced to a numerical matrix) 11 | #' @param wav the wavelengths/ band centers. 12 | #' @param p an integer larger than 1 indicating the polynomial order (default is 13 | #' 2, as in the original paper of Barnes et al., 1989). 14 | #' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 15 | #' @examples 16 | #' data(NIRsoil) 17 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 18 | #' # conversion to reflectance 19 | #' opar <- par(no.readonly = TRUE) 20 | #' par(mfrow = c(2, 1), mar = c(4, 4, 2, 2)) 21 | #' # plot of the 10 first spectra 22 | #' matplot(wav, t(NIRsoil$spc[1:10, ]), 23 | #' type = "l", 24 | #' xlab = "", 25 | #' ylab = "Absorbance" 26 | #' ) 27 | #' mtext("Raw spectra") 28 | #' det <- detrend(NIRsoil$spc, wav) 29 | #' matplot(wav, t(det[1:10, ]), 30 | #' type = "l", 31 | #' xlab = "Wavelength /nm", 32 | #' ylab = "Absorbance" 33 | #' ) 34 | #' mtext("Detrend spectra") 35 | #' par(opar) 36 | #' @details The detrend is a row-wise transformation that allows to correct for 37 | #' wavelength-dependent scattering effects (variations in curvilinearity). A 38 | #' \mjeqn{p}{p} order polynomial is fit for each spectrum (\mjeqn{x_i}{x_i}) 39 | #' using the vector of bands (\mjeqn{\lambda}{\lambda}, e.g. wavelengths) as 40 | #' explanatory variable as follows: 41 | #' 42 | #' \mjdeqn{x_i = a\lambda^p + ... + b\lambda + c + e_i}{x_i = a\lambda^p + ... + b\lambda + c + e_i} 43 | #' 44 | #' were a, b, c are estimated by least squares, and \mjeqn{e_i}{e_i} are the 45 | #' spectral residuals of the least square fit. The residuals of the \mjeqn{i}{i}th 46 | #' correspond to the \mjeqn{i}{i}th detrended spectrum. 47 | #' 48 | #' @seealso \code{\link{standardNormalVariate}}, \code{\link{blockScale}}, 49 | #' \code{\link{blockNorm}} 50 | #' @references Barnes RJ, Dhanoa MS, Lister SJ. 1989. Standard normal variate 51 | #' transformation and de-trending of near-infrared diffuse reflectance spectra. 52 | #' Applied spectroscopy, 43(5): 772-777. 53 | #' @return a matrix or vector with the detrended data. 54 | #' @export 55 | 56 | detrend <- function(X, wav, p = 2) { 57 | if (missing(wav)) { 58 | stop("argument wav must be specified") 59 | } 60 | 61 | if (is.data.frame(X)) { 62 | X <- as.matrix(X) 63 | } 64 | 65 | was_vec <- is.vector(X) 66 | if (p < 1) { 67 | stop("'p' must be an integer larger than 0") 68 | } 69 | 70 | if (p != round(p)) { 71 | stop("'p' must be an integer") 72 | } 73 | 74 | if (is.vector(X)) { 75 | nms <- names(X) 76 | X <- matrix(X, ncol = length(X)) 77 | } 78 | 79 | xpoly <- stats::poly(wav, p) 80 | # SNV transformation 81 | X <- sweep(X, 1, rowMeans(X), "-") 82 | X <- sweep(X, 1, apply(X, 1, sd), "/") 83 | 84 | # get the residuals output <- t(apply(X, 1, function(y) lm.fit(x= xpoly,y)$residuals)) 85 | output <- residLm(X, xpoly) # using Rcpp ... 86 | 87 | if (was_vec) { 88 | output <- as.vector(output) 89 | names(output) <- nms 90 | } else { 91 | dimnames(output) <- list(rownames(X), colnames(X)) 92 | } 93 | return(output) 94 | } 95 | -------------------------------------------------------------------------------- /man/shenkWest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shenkWest.R 3 | \name{shenkWest} 4 | \alias{shenkWest} 5 | \title{SELECT algorithm for calibration sampling} 6 | \usage{ 7 | shenkWest(X, 8 | d.min = 0.6, 9 | pc = 0.95, 10 | rm.outlier = FALSE, 11 | .center = TRUE, 12 | .scale = FALSE) 13 | } 14 | \arguments{ 15 | \item{X}{a numeric matrix (optionally a data frame that can 16 | be coerced to a numerical matrix).} 17 | 18 | \item{d.min}{a minimum distance (default = 0.6).} 19 | 20 | \item{pc}{the number of principal components retained in the computation 21 | distance in the standardized Principal Component space (Mahalanobis distance). 22 | If \code{pc < 1}, the number of principal components kept corresponds to the 23 | number of components explaining at least (\code{pc * 100}) percent of the total 24 | variance (default = 0.95).} 25 | 26 | \item{rm.outlier}{logical. If \code{TRUE}, remove observations with a standardized 27 | mahalanobis distance to the center of the data greater than 3 28 | (default = \code{FALSE}).} 29 | 30 | \item{.center}{logical. Indicates whether the input matrix should be centered 31 | before Principal Component Analysis. Default set to \code{TRUE}.} 32 | 33 | \item{.scale}{logical. Indicates whether the input matrix should be scaled 34 | before Principal Component Analysis. Default set to \code{FALSE}.} 35 | } 36 | \value{ 37 | a \code{list} with components: 38 | \itemize{ 39 | \item{'\code{model}': numeric vector giving the row indices of the input data 40 | selected for calibration} 41 | \item{'\code{test}': numeric vector giving the row indices of the remaining 42 | observations} 43 | \item{'\code{pc}': a numeric matrix of the scaled pc scores} 44 | } 45 | } 46 | \description{ 47 | Select calibration samples from a large multivariate data using the SELECT 48 | algorithm as described in Shenk and Westerhaus (1991). 49 | } 50 | \details{ 51 | The SELECT algorithm is an iterative procedure based on the standardized 52 | Mahalanobis distance between observations. 53 | First, the observation having the highest number of neighbours within a given 54 | minimum distance is selected and its neighbours are discarded. The procedure 55 | is repeated until there is no observation left. 56 | 57 | If the \code{rm.outlier} argument is set to \code{TRUE}, outliers will be removed 58 | before running the SELECT algorithm, using the CENTER algorithm of 59 | Shenk and Westerhaus (1991), i.e. samples with a standardized Mahalanobis 60 | distance \verb{>3} are removed. 61 | } 62 | \examples{ 63 | data(NIRsoil) 64 | # reduce data size 65 | NIRsoil$spc <- binning(X = NIRsoil$spc, bin.size = 5) 66 | sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = FALSE) 67 | plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 68 | # points selected for calibration 69 | points(sel$pc[sel$model, 1:2], pch = 19, col = 2) 70 | # without outliers 71 | sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = TRUE) 72 | plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 73 | # points selected for calibration 74 | points(sel$pc[sel$model, 1:2], pch = 15, col = 3) 75 | } 76 | \references{ 77 | Shenk, J.S., and Westerhaus, M.O., 1991. Population Definition, 78 | Sample Selection, and Calibration Procedures for Near Infrared Reflectance 79 | Spectroscopy. Crop Science 31, 469-474. 80 | } 81 | \seealso{ 82 | \code{\link{kenStone}}, \code{\link{duplex}}, \code{\link{puchwein}} 83 | } 84 | \author{ 85 | Antoine Stevens 86 | } 87 | -------------------------------------------------------------------------------- /R/binning.R: -------------------------------------------------------------------------------- 1 | #' @title Signal binning 2 | #' @description 3 | #' Compute average values of a signal in pre-determined bins (col-wise subsets). 4 | #' The bin size can be determined either directly or by specifying the number of 5 | #' bins. Sometimes called boxcar transformation in signal processing 6 | #' @usage 7 | #' binning(X, bins, bin.size) 8 | #' @param X a numeric matrix or vector to process (optionally a data frame that 9 | #' can be coerced to a numerical matrix). 10 | #' @param bins the number of bins. 11 | #' @param bin.size the desired size of the bins. 12 | #' @author Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 13 | #' @examples 14 | #' data(NIRsoil) 15 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 16 | #' 17 | #' # 5 first spectra 18 | #' matplot(wav, t(NIRsoil$spc[1:5, ]), 19 | #' type = "l", 20 | #' xlab = "Wavelength /nm", 21 | #' ylab = "Absorbance" 22 | #' ) 23 | #' 24 | #' NIRsoil$spc_binned <- binning(NIRsoil$spc, bin.size = 20) 25 | #' 26 | #' # bin means 27 | #' matpoints(as.numeric(colnames(NIRsoil$spc_binned)), 28 | #' t(NIRsoil$spc_binned[1:5, ]), 29 | #' pch = 1:5 30 | #' ) 31 | #' 32 | #' NIRsoil$spc_binned <- binning(NIRsoil$spc, bins = 20) 33 | #' dim(NIRsoil$spc_binned) # 20 bins 34 | #' 35 | #' # 5 first spectra 36 | #' matplot(wav, 37 | #' t(NIRsoil$spc[1:5, ]), 38 | #' type = "l", 39 | #' xlab = "Wavelength /nm", 40 | #' ylab = "Absorbance" 41 | #' ) 42 | #' 43 | #' # bin means 44 | #' matpoints(as.numeric(colnames(NIRsoil$spc_binned)), 45 | #' t(NIRsoil$spc_binned[1:5, ]), 46 | #' pch = 1:5 47 | #' ) 48 | #' @return 49 | #' a matrix or vector with average values per bin. 50 | #' @seealso 51 | #' \code{\link{savitzkyGolay}}, \code{\link{movav}}, 52 | #' \code{\link{gapDer}}, \code{\link{continuumRemoval}} 53 | #' @importFrom stats aggregate 54 | #' @export 55 | #' 56 | 57 | binning <- function(X, bins, bin.size) { 58 | if (is.data.frame(X)) { 59 | X <- as.matrix(X) 60 | } 61 | if (!missing(bins) & !missing(bin.size)) { 62 | stop("either 'bins' or 'bin.size' must be specified") 63 | } 64 | if (missing(bins) & missing(bin.size)) { 65 | return(X) 66 | } 67 | 68 | if (is.matrix(X)) { 69 | nv <- ncol(X) 70 | } else { 71 | nv <- length(X) 72 | } 73 | 74 | 75 | if (missing(bins) & !missing(bin.size)) { 76 | b <- findInterval( 77 | 1:nv, 78 | seq(1, nv, bin.size), 79 | left.open = FALSE 80 | ) 81 | } else { 82 | bins <- bins + 1 83 | b <- findInterval( 84 | 1:nv, 85 | round(seq(1, nv, length.out = bins), 3), # round to 3 to avoid the famous floating math imprecision bug of R 86 | rightmost.closed = TRUE, 87 | left.open = FALSE 88 | ) 89 | } 90 | 91 | n_classes <- max(b) 92 | 93 | if (is.matrix(X)) { 94 | output <- matrix(0, nrow(X), n_classes) 95 | 96 | # for (i in seq_len(n_classes)) { 97 | # output[, i] <- rowMeans(X[, b == i, drop = F]) 98 | # } 99 | 100 | output <- aggregate(t(X), by = list(bin = b), FUN = mean) 101 | output <- t(output[order(output[, 1]), -1]) 102 | colnames(output) <- colnames(X)[ceiling(tapply(b, b, function(x) mean(which(b == x[1]), na.rm = TRUE)))] # find colnames 103 | rownames(output) <- rownames(X) 104 | } else { 105 | output <- tapply(X, b, mean) 106 | names(output) <- names(X)[ceiling(tapply(b, b, function(x) mean(which(b == x[1]), na.rm = TRUE)))] 107 | } 108 | 109 | return(output) 110 | } 111 | -------------------------------------------------------------------------------- /man/cochranTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cochranTest.R 3 | \name{cochranTest} 4 | \alias{cochranTest} 5 | \title{Cochran \emph{C} Test} 6 | \usage{ 7 | cochranTest(X, id, fun = 'sum', alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{X}{a a numeric matrix (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | 13 | \item{id}{factor of the replicate identifiers.} 14 | 15 | \item{fun}{function to aggregate data: 'sum' (default), 'mean', 'PC1' or 'PC2'.} 16 | 17 | \item{alpha}{\emph{p}-value of the Cochran \emph{C} test.} 18 | } 19 | \value{ 20 | a list with components: 21 | \itemize{ 22 | \item{'\code{X}': input matrix from which outlying observations (rows) have 23 | been removed} 24 | \item{'\code{outliers}': numeric vector giving the row indices of the input 25 | data that have been flagged as outliers} 26 | } 27 | } 28 | \description{ 29 | \loadmathjax 30 | Detects and removes replicate outliers in data series based on the Cochran 31 | \emph{C} test for homogeneity in variance. 32 | } 33 | \details{ 34 | The Cochran \emph{C} test is test whether a single estimate of variance is 35 | significantly larger than a a group of variances. 36 | It can be computed as: 37 | 38 | \mjdeqn{RMSD = \sqrt{\frac{1}{n} \sum_{i=1}^n {(y_i - \ddot{y}_i)^2}}}{RMSD = sqrt{{1}/{n} sum (y_i - ddot{y}_i)^2}} 39 | 40 | where \mjeqn{y_i}{y_i} is the value of the side variable of the \mjeqn{i}{i}th sample, 41 | \mjeqn{\ddot{y}_i}{\ddot{y}_i} is the value of the side variable of the nearest neighbor 42 | of the \mjeqn{i}{i}th sample and \mjeqn{n}{n} is the total number of observations. 43 | 44 | For multivariate data, the variance \mjeqn{S_i^2}{S_i^2} can be computed on aggregated 45 | data, using a summary function (\code{fun} argument) 46 | such as \code{sum}, \code{mean}, or first principal components ('PC1' and 'PC2'). 47 | 48 | An observation is considered to have an outlying variance if the Cochran \emph{C} 49 | statistic is higher than an upper limit critical value \mjeqn{C_{UL}}{C_{UL}} 50 | which can be evaluated with ('t Lam, 2010): 51 | 52 | \mjdeqn{C_{UL}(\alpha, n, N) = 1 + [\frac{N-1}{F_{c}(\alpha/N,(n-1),(N-1)(n-1))}]^{-1} }{C_{UL}(\alpha, n, N) = 1 + [\frac{N-1}{F_{c}(\alpha/N,(n-1),(N-1)(n-1))}]^{-1}} 53 | 54 | where \mjeqn{\alpha}{\alpha} is the \emph{p}-value of the test, \mjeqn{n}{n} is the (average) 55 | number of replicates and \mjeqn{F_c}{F_c} is the critical value of the Fisher's \mjeqn{F}{F} ratio. 56 | 57 | The replicates with outlying variance are removed and the test can be applied 58 | iteratively until no outlying variance is detected under the given \emph{p}-value. 59 | Such iterative procedure is implemented in \code{cochranTest}, allowing the user 60 | to specify whether a set of replicates must be removed or not from the 61 | dataset by graphical inspection of the outlying replicates. The user has then 62 | the possibility to (i) remove all replicates at once, (ii) remove one or more 63 | replicates by giving their indices or (iii) remove nothing. 64 | } 65 | \note{ 66 | The test assumes a balanced design (i.e. data series have the same 67 | number of replicates). 68 | } 69 | \references{ 70 | Centner, V., Massart, D.L., and De Noord, O.E., 1996. Detection of 71 | inhomogeneities in sets of NIR spectra. Analytica Chimica Acta 330, 1-17. 72 | 73 | R.U.E. 't Lam (2010). Scrutiny of variance results for outliers: Cochran's 74 | test optimized. Analytica Chimica Acta 659, 68-84. 75 | 76 | \url{https://en.wikipedia.org/wiki/Cochran's_C_test} 77 | } 78 | \author{ 79 | Antoine Stevens 80 | } 81 | -------------------------------------------------------------------------------- /R/msc.R: -------------------------------------------------------------------------------- 1 | #' @title Multiplicative Scatter Correction (msc) 2 | #' 3 | #' @description 4 | #' \loadmathjax 5 | #' \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 6 | #' 7 | #' This function implements the multiplicative scatter correction method 8 | #' which attempts to remove physical light scatter by accounting for additive 9 | #' and multiplicative effects (Geladi et al., 1985). 10 | #' 11 | #' @usage 12 | #' msc(X, ref_spectrum = colMeans(X)) 13 | #' 14 | #' @param X a numeric matrix of spectral data. 15 | #' @param ref_spectrum a numeric vector corresponding to an "ideal" reference 16 | #' spectrum (e.g. free of scattering effects). By default the function uses the 17 | #' mean spectrum of the input \code{X}. See details. Note that this argument was 18 | #' previously named as `reference_spc`, however, it has been renamed to 19 | #' `ref_spectrum` to emphasize that this argument is a vector and not a 20 | #' matrix of spectra. 21 | #' 22 | #' @details 23 | #' The Multiplicative Scatter Correction (MSC) is a normalization method that 24 | #' attempts to account for additive and multiplicative effects by aligning each 25 | #' spectrum (\mjeqn{x_i}{x_i}) to an ideal reference one (\mjeqn{x_r}{x_r}) as 26 | #' follows: 27 | #' 28 | #' \mjdeqn{x_i = m_i x_r + a_i}{x_i = m_i x_r + a_i} 29 | #' \mjdeqn{MSC(x_i) = \frac{x_i - a_i}{m_i}}{MSC(x_i) = {x_i - a_i}/{m_i}} 30 | #' 31 | #' where \mjeqn{a_i}{a_i} and \mjeqn{m_i}{m_i} are the additive and 32 | #' multiplicative terms respectively. 33 | #' @return 34 | #' a matrix of normalized spectral data with an attribute which indicates the 35 | #' reference spectrum used. 36 | #' @author 37 | #' \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} and Guillaume Hans 38 | #' 39 | #' @references 40 | #' Geladi, P., MacDougall, D., and Martens, H. 1985. Linearization and 41 | #' Scatter-Correction for Near-Infrared Reflectance Spectra of Meat. 42 | #' Applied Spectroscopy, 39(3):491-500. 43 | #' 44 | #' @seealso \code{\link{standardNormalVariate}}, \code{\link{detrend}}, 45 | #' \code{\link{blockScale}}, \code{\link{blockNorm}} 46 | #' 47 | #' @examples 48 | #' data(NIRsoil) 49 | #' NIRsoil$msc_spc <- msc(X = NIRsoil$spc) 50 | #' 51 | #' # 10 first msc spectra 52 | #' matplot( 53 | #' x = as.numeric(colnames(NIRsoil$msc_spc)), 54 | #' y = t(NIRsoil$msc_spc[1:10, ]), 55 | #' type = "l", 56 | #' xlab = "wavelength, nm", 57 | #' ylab = "msc" 58 | #' ) 59 | #' 60 | #' # another example 61 | #' spectra_a <- NIRsoil$spc[1:40, ] 62 | #' spectra_b <- NIRsoil$spc[-(1:40), ] 63 | #' 64 | #' spectra_a_msc <- msc(spectra_a, colMeans(spectra_a)) 65 | #' 66 | #' # correct spectra_a based on the reference spectrum used to correct 67 | #' # spectra_a 68 | #' 69 | #' spectra_b_msc <- msc( 70 | #' spectra_b, 71 | #' ref_spectrum = attr(spectra_a_msc, "Reference spectrum") 72 | #' ) 73 | #' @export 74 | 75 | 76 | msc <- function(X, ref_spectrum = colMeans(X)) { 77 | X <- as.matrix(X) 78 | 79 | if (!is.vector(ref_spectrum)) { 80 | stop("'ref_spectrum' must be a vector") 81 | } 82 | 83 | if (ncol(X) != length(ref_spectrum)) { 84 | stop("The number of column in X must be equal to the length of 'ref_spectrum'") 85 | } 86 | offsets_slopes <- get_msc_coeff(X, ref_spectrum) 87 | Xz <- sweep(X, MARGIN = 1, STATS = offsets_slopes[1, ], FUN = "-", check.margin = FALSE) 88 | Xz <- sweep(Xz, MARGIN = 1, STATS = offsets_slopes[2, ], FUN = "/", check.margin = FALSE) 89 | attr(Xz, "Reference spectrum:") <- ref_spectrum 90 | Xz 91 | } 92 | -------------------------------------------------------------------------------- /R/baseline.R: -------------------------------------------------------------------------------- 1 | #' @title baseline 2 | #' @description 3 | #' 4 | #' \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 5 | #' 6 | #' Fits a baseline to each spectrum in a matrix and removes it from the 7 | #' corresponding input spectrum. A vector can also be passed to this function. 8 | #' @usage 9 | #' baseline(X, wav) 10 | #' @param X a numeric matrix or vector to process (optionally a data frame that 11 | #' can be coerced to a numerical matrix). 12 | #' @param wav optional. A numeric vector of band positions. 13 | #' @author \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 14 | #' with contributions from Mervin Manalili 15 | #' @return a matrix or vector with the baselined spectra. The resulting matrix 16 | #' is output with an attribute called \code{baselines} which contain the spectra 17 | #' of the fitted baselines. 18 | #' 19 | #' This function is similar to \code{\link{continuumRemoval}} and it might 20 | #' replace some of its functionality in the future. 21 | #' 22 | #' @examples 23 | #' data(NIRsoil) 24 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 25 | #' # plot of the 5 first absorbance spectra 26 | #' matplot(wav, 27 | #' t(NIRsoil$spc[1:5, ]), 28 | #' type = "l", 29 | #' ylim = c(0, .6), 30 | #' xlab = "Wavelength /nm", 31 | #' ylab = "Absorbance" 32 | #' ) 33 | #' 34 | #' bs <- baseline(NIRsoil$spc, wav) 35 | #' matlines(wav, t(bs[1:5, ])) 36 | #' 37 | #' fitted_baselines <- attr(bs, "baselines") 38 | #' matlines(wav, t(fitted_baselines[1:5, ])) 39 | #' title("Original spectra, baselines and baselined spectra") 40 | #' @seealso 41 | #' \code{\link{savitzkyGolay}}, \code{\link{movav}}, 42 | #' \code{\link{gapDer}}, \code{\link{binning}}, \code{\link{continuumRemoval}} 43 | #' @details 44 | #' The baseline function find points lying on the convex hull 45 | #' of a spectrum, connects the points by linear interpolation and 46 | #' subtracts the interpolated line (baseline) from the corresponding spectrum. 47 | #' @export 48 | 49 | baseline <- function(X, wav) { 50 | if (is.data.frame(X)) { 51 | X <- as.matrix(X) 52 | } 53 | 54 | if (missing(wav)) { 55 | wav <- 1:ncol(X) 56 | } 57 | 58 | wav <- c( 59 | wav[1] - diff(wav[1:2]), 60 | wav, 61 | wav[length(wav)] + diff(wav[(length(wav) - 1):length(wav)]) 62 | ) 63 | 64 | # make sure the edges will be well above any peak 65 | edges <- abs(apply(X, 1, "max")) + abs(apply(X, 1, "min")) 66 | edges <- edges * 2 67 | 68 | X <- cbind(edges, X, edges) 69 | colnames(X) <- wav 70 | 71 | ## simple baseline function 72 | simple_bs <- function(x, wav) { 73 | id <- sort(chull(wav, x)) 74 | id <- id[-c(1, length(id))] 75 | hull_line <- approx(x = wav[id], y = x[id], xout = wav, method = "linear")$y 76 | return(hull_line) 77 | } 78 | 79 | if (is.matrix(X)) { 80 | if (missing(wav)) { 81 | wav <- seq_len(ncol(X)) 82 | } 83 | if (length(wav) != ncol(X)) { 84 | stop("length(wav) must be equal to ncol(X)") 85 | } 86 | hull_line <- t(apply(X, 1, function(x) simple_bs(x, wav))) 87 | } else { 88 | hull_line <- simple_bs(X, wav) 89 | } 90 | 91 | hull_line <- hull_line[, -c(1, ncol(hull_line))] 92 | X <- X[, -c(1, ncol(X))] 93 | 94 | baselined <- X - hull_line 95 | 96 | wav <- wav[-c(1, length(wav))] 97 | 98 | if (is.matrix(X)) { 99 | colnames(hull_line) <- colnames(baselined) <- wav 100 | rownames(hull_line) <- rownames(baselined) <- rownames(X) 101 | } else { 102 | hull_line <- as.vector(hull_line) 103 | names(hull_line) <- names(baselined) <- wav 104 | } 105 | attr(baselined, "baselines") <- hull_line 106 | return(baselined) 107 | } 108 | -------------------------------------------------------------------------------- /man/honigs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/honigs.R 3 | \name{honigs} 4 | \alias{honigs} 5 | \title{Honigs algorithm for calibration sampling} 6 | \usage{ 7 | honigs(X, k, type) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix with absorbance or continuum-removed reflectance 11 | values (optionally a data frame that can be coerced to a numerical matrix).} 12 | 13 | \item{k}{the number of samples to select for calibration.} 14 | 15 | \item{type}{type of data: 'A' for absorbance (default), 'R' for reflectance, 16 | 'CR' for continuum-removed reflectance} 17 | } 18 | \value{ 19 | a \code{list} with components: 20 | \itemize{ 21 | \item{'\code{model}': numeric vector giving the row indices of the input data 22 | selected for calibration} 23 | \item{'\code{test}': numeric vector giving the row indices of the remaining 24 | observations} 25 | \item{'\code{bands}': indices of the columns used during the selection procedure} 26 | } 27 | } 28 | \description{ 29 | Select calibration samples from a data matrix using the Honings et al. (1985) 30 | method 31 | } 32 | \details{ 33 | The Honigs algorithm is a simple method to select calibration samples based 34 | on their absorption features. Absorbance, reflectance and continuum-removed 35 | reflectance values (see \code{\link{continuumRemoval}}) can be used (\code{type} 36 | argument). 37 | The algorithm can be described as follows: let \eqn{A} be a matrix of 38 | \eqn{(i \times j)} absorbance values: 39 | 40 | \enumerate{ 41 | \item the observation (row) with the maximum absolute absorbance 42 | (\eqn{max(|A|)}) is selected and assigned to the calibration set. 43 | \item a vector of weights \eqn{W} is computed as \eqn{A_j/max_A} where 44 | \eqn{A_j} is the column of \eqn{A} having the maximum absolute absorbance 45 | and \eqn{max_A} is the absorbance value corresponding to the maximum 46 | absolute absorbance of \eqn{A} 47 | \item each row \eqn{A_i} is multiplied by the corresponding weight \eqn{W_i} 48 | and the resulting vector is subtracted from the original row \eqn{A_i}. 49 | \item the row of the selected observation and the column with the maximum 50 | absolute absorbance is removed from the matrix 51 | \item go back to step 1 and repeat the procedure until the desired number 52 | of selected samples is reached 53 | } 54 | 55 | The observation with the maximum absorbance is considered to have 56 | an unusual composition. The algorithm selects therefore this observation and 57 | remove from other samples the selected absorption feature by subtraction. 58 | Samples with low concentration related to this absorption will then have 59 | large negative absorption after the subtraction step 60 | and hence will be likely to be selected rapidly by the selection procedure 61 | as well. 62 | } 63 | \note{ 64 | The selection procedure is sensitive to noisy features in the signal. 65 | The number of samples selected \code{k} selected by the algorithm cannot be 66 | greater than the number of wavelengths. 67 | } 68 | \examples{ 69 | data(NIRsoil) 70 | sel <- honigs(NIRsoil$spc, k = 10, type = "A") 71 | wav <- as.numeric(colnames(NIRsoil$spc)) 72 | # spectral library 73 | matplot(wav, 74 | t(NIRsoil$spc), 75 | type = "l", 76 | xlab = "wavelength /nm", 77 | ylab = "Abs", 78 | col = "grey50" 79 | ) 80 | # plot calibration spectra 81 | matlines(wav, 82 | t(NIRsoil$spc[sel$model, ]), 83 | type = "l", 84 | xlab = "wavelength /nm", 85 | ylab = "Abs", 86 | lwd = 2, 87 | lty = 1 88 | ) 89 | # add bands used during the selection process 90 | abline(v = wav[sel$bands]) 91 | } 92 | \references{ 93 | Honigs D.E., Hieftje, G.M., Mark, H.L. and Hirschfeld, T.B. 1985. 94 | Unique-sample selection via Near-Infrared spectral substraction. 95 | Analytical Chemistry, 57, 2299-2303 96 | } 97 | \seealso{ 98 | \code{\link{kenStone}}, \code{\link{naes}}, \code{\link{duplex}}, 99 | \code{\link{shenkWest}} 100 | } 101 | \author{ 102 | Antoine Stevens 103 | } 104 | -------------------------------------------------------------------------------- /R/spliceCorrection.R: -------------------------------------------------------------------------------- 1 | #' @title Splice correction of a spectral matrix acquired with an ASD spectrometer 2 | #' @description 3 | #' Corrects steps in an input spectral matrix by linear interpolation of the 4 | #' values of the edges of the middle sensor 5 | #' 6 | #' @usage 7 | #' 8 | #' spliceCorrection(X, wav, splice = c(1000, 1830), interpol.bands = 10) 9 | #' 10 | #' @param X a numeric matrix or vector to transform (optionally a data frame that can 11 | #' be coerced to a numerical matrix). 12 | #' @param wav a numeric vector with band positions. 13 | #' @param splice a numeric vector of length 1 or 2 with the positions of the 14 | #' splice(s). Default: 15 | #' \code{c(1000, 1830)} (as for the ASD FieldSpec Pro spectrometer of Malvern 16 | #' Panalytical). See details. 17 | #' @param interpol.bands the number of interpolation bands. 18 | #' @details 19 | #' This function uses by default the positions for the ASD FieldSpec Pro 20 | #' spectroradiometer (Malvern Panalytical) which usually exhibit 21 | #' steps at the splice of the three built-in detectors, 22 | #' positioned at 1000 nm (end of VNIR detector) and 1830 nm (end of SWIR1 detector). 23 | #' The data corresponding to the spectral region after the first step is used as 24 | #' reference for correcting the first region and the laste region (if 2 steps 25 | #' are supplied). 26 | #' Other typical examples of splice artifacts caused by concatenating data 27 | #' captured by different detectors inside the spectrometer: 28 | #' \itemize{ 29 | #' \item{XDS (FOSS): 1100 nm} 30 | #' \item{ProxiMate (BUCHI Labortechnik): 900 nm} 31 | #' } 32 | #' 33 | #' 34 | #' @return a matrix with the splice corrected data. 35 | #' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 36 | #' @export 37 | 38 | spliceCorrection <- function(X, wav, splice = c(1000, 1830), interpol.bands = 10) { 39 | extrapfun <- function(x, y, xout) { 40 | fit <- lm(y ~ x) 41 | fit$coefficients[1] + fit$coefficients[2] * xout 42 | } 43 | 44 | if (length(splice) < 1 | length(splice) > 2) { 45 | stop("splice must be a numeric vector of length 1 or 2") 46 | } 47 | 48 | if (length(splice) == 1) { 49 | index_b <- ncol(X) 50 | } else { 51 | index_b <- which(wav == splice[2]) 52 | } 53 | 54 | has_three_regions <- length(splice) == 2 55 | 56 | if (is.data.frame(X)) { 57 | X <- as.matrix(X) 58 | } 59 | 60 | was_vec <- is.vector(X) 61 | if (is.vector(X)) { 62 | nms <- names(X) 63 | X <- matrix(X, ncol = length(X)) 64 | } 65 | if (missing(wav)) { 66 | wav <- seq_len(ncol(X)) 67 | } 68 | 69 | if (length(wav) != ncol(X)) { 70 | stop("length(wav) must be equal to ncol(X)") 71 | } 72 | 73 | index <- which(wav %in% splice) 74 | 75 | if (!length(index)) { 76 | stop("splice positions not found in wav") 77 | } 78 | 79 | Xa <- X[, 1:index[1], drop = FALSE] 80 | Xb <- X[, (index[1] + 1):index_b, drop = FALSE] 81 | 82 | tmp_first <- Xb[, 1:interpol.bands, drop = FALSE] 83 | w_first <- wav[(index[1] + 1):(index[1] + interpol.bands)] 84 | pred_Xa <- apply(tmp_first, 1, function(y) extrapfun(x = w_first, y = y, xout = splice[1])) 85 | offset_a <- Xa[, ncol(Xa)] - pred_Xa 86 | 87 | if (has_three_regions) { 88 | Xc <- X[, (index[2] + 1):ncol(X), drop = FALSE] 89 | tmp_second <- Xb[, (ncol(Xb) - interpol.bands + 1):ncol(Xb), drop = FALSE] 90 | w_second <- wav[(index[2] - interpol.bands + 1):index[2]] 91 | pred_Xb <- apply(tmp_second, 1, function(y) extrapfun(x = w_second, y = y, xout = splice[2])) 92 | offset_b <- Xc[, 1] - pred_Xb 93 | output <- cbind(sweep(Xa, 1, offset_a, "-"), Xb, sweep(Xc, 1, offset_b, "-")) 94 | } else { 95 | output <- cbind(sweep(Xa, 1, offset_a, "-"), Xb) 96 | } 97 | 98 | if (was_vec) { 99 | output <- as.vector(output) 100 | names(output) <- nms 101 | } else { 102 | dimnames(output) <- list(rownames(X), colnames(X)) 103 | } 104 | return(output) 105 | } 106 | -------------------------------------------------------------------------------- /R/prospectr.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib prospectr 2 | #' @import lifecycle 3 | #' @import Rcpp 4 | #' @import foreach 5 | #' @import iterators 6 | #' @import mathjaxr 7 | ## usethis namespace: start 8 | #' @importFrom lifecycle deprecate_soft 9 | ## usethis namespace: end 10 | #' @importFrom grDevices chull 11 | #' @importFrom graphics legend matplot 12 | #' @importFrom stats approx cov kmeans lm prcomp qf sd splinefun var 13 | #' @importFrom utils read.table txtProgressBar setTxtProgressBar 14 | #' 15 | #' @description 16 | #' 17 | #' \ifelse{html}{\out{Stable lifecycle}}{\strong{Stable}} 18 | #' 19 | #' Misc functions for spectral data 20 | #' \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 21 | #' 22 | #' This package implements a number of functions useful for 23 | #' pre-processing spectral data well as for selecting representative samples/spectra. 24 | #' The functions included here are particularly useful in Near-Infrared and Infrared 25 | #' Spectroscopy applications. 26 | #' 27 | #' @details 28 | #' 29 | #' This is the version 30 | #' `r paste(pkg_info()[1:2], collapse = " \U002D\U002D ")` of the package. 31 | #' The main functionality is listed here. 32 | #' 33 | #' Currently, the following preprocessing functions are available: 34 | #' 35 | #' \itemize{ 36 | #' \item{\code{\link{resample}}} 37 | #' \item{\code{\link{resample2}}} 38 | #' \item{\code{\link{movav}}} 39 | #' \item{\code{\link{standardNormalVariate}}} 40 | #' \item{\code{\link{msc}}} 41 | #' \item{\code{\link{detrend}}} 42 | #' \item{\code{\link{baseline}}} 43 | #' \item{\code{\link{blockScale}}} 44 | #' \item{\code{\link{blockNorm}}} 45 | #' \item{\code{\link{binning}}} 46 | #' \item{\code{\link{savitzkyGolay}}} 47 | #' \item{\code{\link{gapDer}}} 48 | #' \item{\code{\link{continuumRemoval}}} 49 | #' } 50 | #' 51 | #' For the selection of representative samples/observations for calibrating 52 | #' spectral models the following functions ca be used: 53 | #' 54 | #' \itemize{ 55 | #' \item{\code{\link{naes}}} 56 | #' \item{\code{\link{honigs}}} 57 | #' \item{\code{\link{shenkWest}}} 58 | #' \item{\code{\link{kenStone}}} 59 | #' \item{\code{\link{duplex}}} 60 | #' \item{\code{\link{puchwein}}} 61 | #' } 62 | #' 63 | #' Other useful functions are also available: 64 | #' 65 | #' \itemize{ 66 | #' \item{\code{\link{read_nircal}}} 67 | #' \item{\code{\link{readASD}}} 68 | #' \item{\code{\link{spliceCorrection}}} 69 | #' \item{\code{\link{cochranTest}}} 70 | #' } 71 | #' @name prospectr-package 72 | #' @aliases prospectr-package prospectr 73 | #' @title Overview of the functions in the prospectr package 74 | #' @seealso 75 | #' Useful links: 76 | #' \itemize{ 77 | #' \item \url{https://github.com/l-ramirez-lopez/prospectr} 78 | #' \item Report bugs at \url{https://github.com/l-ramirez-lopez/prospectr/issues} 79 | #' } 80 | #' @author 81 | #' 82 | #' \strong{Maintainer}: Leonardo Ramirez-Lopez \email{ramirez.lopez.leo@gmail.com} 83 | #' 84 | #' Authors: 85 | #' \itemize{ 86 | #' \item Antoine Stevens (\href{https://orcid.org/0000-0002-1588-7519}{ORCID}) 87 | #' 88 | #' \item Leonardo Ramirez-Lopez (\href{https://orcid.org/0000-0002-5369-5120}{ORCID}) 89 | #' } 90 | ###################################################################### 91 | # prospectr 92 | # Copyrigth (C) 2025 Leonardo Ramirez-Lopez 93 | # 94 | # This program is free software; you can redistribute it and/or modify 95 | # it under the terms of the GNU General Public License as published by 96 | # the Free Software Foundation; either version 2 of the License, or 97 | # any later version. 98 | # 99 | # This program is distributed in the hope that it will be useful, 100 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 101 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 102 | # GNU General Public License for more details. 103 | ###################################################################### 104 | "_PACKAGE" 105 | NULL 106 | -------------------------------------------------------------------------------- /man/naes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/naes.R 3 | \name{naes} 4 | \alias{naes} 5 | \title{k-means sampling} 6 | \usage{ 7 | naes(X, k, pc, iter.max = 10, method = 0, .center = TRUE, .scale = FALSE) 8 | } 9 | \arguments{ 10 | \item{X}{a numeric matrix (optionally a data frame that can 11 | be coerced to a numerical matrix).} 12 | 13 | \item{k}{either the number of calibration samples to select or a set of 14 | cluster centres to initiate the k-means clustering.} 15 | 16 | \item{pc}{optional. If not specified, k-means is run directly on the variable 17 | (Euclidean) space. 18 | Alternatively, a PCA is performed before k-means and \code{pc} is the number of 19 | principal components kept. If \code{pc < 1},the number of principal components 20 | kept corresponds to the number of components explaining at least (\code{pc * 100}) 21 | percent of the total variance.} 22 | 23 | \item{iter.max}{maximum number of iterations allowed for the k-means 24 | clustering. Default is \code{iter.max = 10} (see \code{?kmeans}).} 25 | 26 | \item{method}{the method used for selecting calibration samples within each 27 | cluster: either samples closest to the cluster. 28 | centers (\code{method = 0}, default), samples farthest away from the centre of the 29 | data (\code{method = 1}) or 30 | random selection (\code{method = 2}).} 31 | 32 | \item{.center}{logical value indicating whether the input matrix must be 33 | centered before Principal Component Analysis. Default set to \code{TRUE}.} 34 | 35 | \item{.scale}{logical value indicating whether the input matrix must be 36 | scaled before Principal Component Analysis. Default set to \code{FALSE}.} 37 | } 38 | \value{ 39 | a list with components: 40 | \itemize{ 41 | \item{'\code{model}': numeric vector giving the row indices of the input data 42 | selected for calibration} 43 | \item{'\code{test}': numeric vector giving the row indices of the remaining 44 | observations} 45 | \item{'\code{pc}': if the \code{pc} argument is specified, a numeric matrix of the 46 | scaled pc scores} 47 | \item{'\code{cluster}': integer vector indicating the cluster to which each 48 | point was assigned} 49 | \item{'\code{centers}': a matrix of cluster centres} 50 | } 51 | } 52 | \description{ 53 | Perform a k-means sampling on a matrix for multivariate calibration 54 | } 55 | \details{ 56 | K-means sampling is a simple procedure based on cluster analysis to 57 | select calibration samples from large multivariate datasets. 58 | The method can be described in three points (Naes et al.,2001): 59 | 60 | \enumerate{ 61 | \item Perform a PCA and decide how many principal component to keep, 62 | \item Carry out a k-means clustering on the principal component scores and 63 | choose the number of resulting clusters to be equal to 64 | the number of desired calibration samples, 65 | \item Select one sample from each cluster. 66 | } 67 | } 68 | \examples{ 69 | data(NIRsoil) 70 | sel <- naes(NIRsoil$spc, k = 5, p = .99, method = 0) 71 | # clusters 72 | plot(sel$pc[, 1:2], col = sel$cluster + 2) 73 | # points selected for calibration with method = 0 74 | points(sel$pc[sel$model, 1:2], 75 | col = 2, 76 | pch = 19, 77 | cex = 1 78 | ) 79 | # pre-defined centers can also be provided 80 | sel2 <- naes(NIRsoil$spc, 81 | k = sel$centers, 82 | p = .99, method = 1 83 | ) 84 | # points selected for calibration with method = 1 85 | points(sel$pc[sel2$model, 1:2], 86 | col = 1, 87 | pch = 15, 88 | cex = 1 89 | ) 90 | } 91 | \references{ 92 | Naes, T., 1987. The design of calibration in near infra-red reflectance 93 | analysis by clustering. Journal of Chemometrics 1, 121-134. 94 | 95 | Naes, T., Isaksson, T., Fearn, T., and Davies, T., 2002. A user friendly 96 | guide to multivariate calibration and classification. NIR Publications, 97 | Chichester, United Kingdom. 98 | } 99 | \seealso{ 100 | \code{\link{kenStone}}, \code{\link{honigs}}, \code{\link{duplex}}, 101 | \code{\link{shenkWest}} 102 | } 103 | \author{ 104 | Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 105 | } 106 | -------------------------------------------------------------------------------- /R/continuumRemoval.R: -------------------------------------------------------------------------------- 1 | #' @title Continuum Removal 2 | #' @description 3 | #' 4 | #' \ifelse{html}{\out{Maturing lifecycle}}{\strong{Maturing}} 5 | #' 6 | #' Compute the continuum removed values of a data matrix or vector 7 | #' @usage 8 | #' continuumRemoval(X, wav, type = c("R", "A"), 9 | #' interpol = c("linear", "spline"), 10 | #' method = c("division", "substraction")) 11 | #' @param X a numeric matrix or vector to process (optionally a data frame that can 12 | #' be coerced to a numerical matrix). 13 | #' @param wav optional. A numeric vector of band positions. 14 | #' @param type the type of data: 'R' for reflectance (default), 'A' for 15 | #' absorbance. 16 | #' @param interpol the interpolation method between points on the convex hull: 17 | #' 'linear' (default) or 'spline'. 18 | #' @param method normalization method: 'division' (default) or 'subtraction' 19 | #' (see details section). 20 | #' @author Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 21 | #' @return a matrix or vector with the filtered spectra. 22 | #' @examples 23 | #' data(NIRsoil) 24 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 25 | #' # plot of the 10 first abs spectra 26 | #' matplot(wav, 27 | #' t(NIRsoil$spc[1:10, ]), 28 | #' type = "l", 29 | #' ylim = c(0, .6), 30 | #' xlab = "Wavelength /nm", 31 | #' ylab = "Abs" 32 | #' ) 33 | #' # # type = 'A' is used for absorbance spectra 34 | #' cr <- continuumRemoval(NIRsoil$spc, wav, type = "A") 35 | #' matlines(wav, t(cr[1:10, ])) 36 | #' @seealso 37 | #' \code{\link{savitzkyGolay}}, \code{\link{movav}}, 38 | #' \code{\link{gapDer}}, \code{\link{binning}} 39 | #' @details 40 | #' The continuum removal technique was introduced by Clark and Roush (1984) 41 | #' as a method to highlight energy absorption features of minerals. 42 | #' It can be viewed as a way to perform albedo normalization. 43 | #' The algorithm find points lying on the convex hull (local maxima or envelope) 44 | #' of a spectrum, connects the points by linear or spline interpolation and 45 | #' normalizes the spectrum by dividing (or subtracting) the input data by the 46 | #' interpolated line. 47 | #' @references 48 | #' Clark, R.N., and Roush, T.L., 1984. Reflectance Spectroscopy: Quantitative 49 | #' Analysis Techniques for Remote Sensing Applications. J. Geophys. Res. 89, 50 | #' 6329-6340. 51 | #' @export 52 | 53 | continuumRemoval <- function(X, 54 | wav, 55 | type = c("R", "A"), 56 | interpol = c("linear", "spline"), 57 | method = c("division", "substraction")) { 58 | if (is.data.frame(X)) { 59 | X <- as.matrix(X) 60 | } 61 | 62 | type <- match.arg(type) 63 | interpol <- match.arg(interpol) 64 | method <- match.arg(method) 65 | 66 | if (type == "A") { 67 | X <- 1 / X 68 | } 69 | 70 | crfun <- function(x, wav, interpol) { 71 | id <- sort(chull(c(wav[1] - 1, wav, wav[length(wav)] + 1), c(0, x, 0))) 72 | id <- id[-c(1, length(id))] - 1 73 | cont <- switch(interpol, 74 | linear = { 75 | approx(x = wav[id], y = x[id], xout = wav, method = "linear")$y 76 | }, 77 | spline = { 78 | splinefun(x = wav[id], y = x[id])(wav) 79 | } 80 | ) 81 | return(cont) 82 | } 83 | 84 | if (is.matrix(X)) { 85 | if (missing(wav)) { 86 | wav <- seq_len(ncol(X)) 87 | } 88 | if (length(wav) != ncol(X)) { 89 | stop("length(wav) should be equal to ncol(X)") 90 | } 91 | 92 | cont <- t(apply(X, 1, function(x) crfun(x, wav, interpol))) 93 | } else { 94 | cont <- crfun(X, wav, interpol) 95 | } 96 | 97 | 98 | if (method == "division") { 99 | cr <- X / cont 100 | } # like ENVI 101 | else { 102 | cr <- 1 + X - cont 103 | } 104 | 105 | if (type == "A") { 106 | cr <- 1 / cr - 1 107 | } 108 | 109 | if (is.matrix(X)) { 110 | colnames(cr) <- wav 111 | rownames(cr) <- rownames(X) 112 | } else { 113 | names(cr) <- wav 114 | } 115 | 116 | return(cr) 117 | } 118 | -------------------------------------------------------------------------------- /R/savitzkyGolay.R: -------------------------------------------------------------------------------- 1 | #' @title Savitzky-Golay smoothing and differentiation 2 | #' @description 3 | #' \loadmathjax 4 | #' Savitzky-Golay smoothing and derivative of a data matrix or vector. 5 | #' @usage 6 | #' savitzkyGolay(X, m, p, w, delta.wav) 7 | #' @param X a numeric matrix or vector to process (optionally a data frame that 8 | #' can be coerced to a numerical matrix). 9 | #' @param m an integer indicating the differentiation order. 10 | #' @param p an integer indicating the polynomial order. 11 | #' @param w an integer indicating the window size (must be odd). 12 | #' @param delta.wav (optional) sampling interval. 13 | #' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 14 | #' @examples 15 | #' data(NIRsoil) 16 | #' opar <- par(no.readonly = TRUE) 17 | #' par(mfrow = c(2, 1), mar = c(4, 4, 2, 2)) 18 | #' 19 | #' # plot of the 10 first spectra 20 | #' matplot(as.numeric(colnames(NIRsoil$spc)), 21 | #' t(NIRsoil$spc[1:10, ]), 22 | #' type = "l", 23 | #' xlab = "", 24 | #' ylab = "Absorbance" 25 | #' ) 26 | #' 27 | #' mtext("Raw spectra") 28 | #' NIRsoil$spc_sg <- savitzkyGolay( 29 | #' X = NIRsoil$spc, 30 | #' m = 1, 31 | #' p = 3, 32 | #' w = 11, 33 | #' delta.wav = 2 34 | #' ) 35 | #' 36 | #' matplot(as.numeric(colnames(NIRsoil$spc_sg)), 37 | #' t(NIRsoil$spc_sg[1:10, ]), 38 | #' type = "l", 39 | #' xlab = "Wavelength /nm", 40 | #' ylab = "1st derivative" 41 | #' ) 42 | #' 43 | #' mtext("1st derivative spectra") 44 | #' par(opar) 45 | #' @details 46 | #' The Savitzky-Golay algorithm fits a local polynomial regression on the signal. 47 | #' It requires evenly spaced data points. Mathematically, it operates simply as 48 | #' a weighted sum over a given window: 49 | #' 50 | #' \mjdeqn{ x_j\ast = \frac{1}{N}\sum_{h=-k}^{k}{c_hx_{j+h}}}{ x_j ast = 1/N \sum_{h=-k}^{k} c_hx_{j+h}} 51 | #' 52 | #' where \mjeqn{x_j\ast}{x_j ast} is the new value, \mjeqn{N}{N} is a 53 | #' normalizing coefficient, \mjeqn{k}{k} is the gap size on each side of 54 | #' \mjeqn{j}{j} and \mjeqn{c_h}{c_h} are pre-computed coefficients, that depends 55 | #' on the chosen polynomial order and degree. 56 | #' 57 | #' The sampling interval specified with the `delta.wav` argument is used for 58 | #' scaling and get numerically correct derivatives. 59 | #' 60 | #' The convolution function is written in C++/Rcpp for faster computations. 61 | #' 62 | #' @references 63 | #' Luo, J., Ying, K., He, P., & Bai, J. (2005). Properties of Savitzky–Golay 64 | #' digital differentiators. Digital Signal Processing, 15(2), 122-136. 65 | #' 66 | #' Savitzky, A., and Golay, M.J.E., 1964. Smoothing and 67 | #' differentiation of data by simplified least squares procedures. 68 | #' Anal. Chem. 36, 1627-1639. 69 | #' 70 | #' Schafer, R. W. (2011). What is a Savitzky-Golay filter? (lecture notes). IEEE 71 | #' Signal processing magazine, 28(4), 111-117. 72 | #' 73 | #' Wentzell, P.D., and Brown, C.D., 2000. Signal processing in analytical 74 | #' chemistry. Encyclopedia of Analytical Chemistry, 9764-9800. 75 | #' @export 76 | #' 77 | 78 | savitzkyGolay <- function(X, m, p, w, delta.wav) { 79 | if (is.data.frame(X)) { 80 | X <- as.matrix(X) 81 | } 82 | 83 | if (w %% 2 != 1) { 84 | stop("needs an odd filter length w") 85 | } 86 | if (p >= w) { 87 | stop("filter length w mus be greater than polynomial order p") 88 | } 89 | if (p < m) { 90 | stop("polynomial order p must be geater or equal to differentiation order m") 91 | } 92 | 93 | gap <- (w - 1) / 2 94 | basis <- outer(-gap:gap, 0:p, "^") 95 | A <- solve(crossprod(basis, basis), tol = 0) %*% t(basis) 96 | 97 | if (is.matrix(X)) { 98 | if (w >= ncol(X)) { 99 | stop("filter length w must be lower than ncol(X)") 100 | } 101 | output <- factorial(m) * convCppM(X, A[m + 1, ]) 102 | g <- (w - 1) / 2 103 | colnames(output) <- colnames(X)[(g + 1):(ncol(X) - g)] 104 | rownames(output) <- rownames(X) 105 | } 106 | 107 | if (is.vector(X)) { 108 | if (w >= length(X)) { 109 | stop("filter length w must be lower than length(X)") 110 | } 111 | output <- factorial(m) * convCppV(X, A[m + 1, ]) 112 | g <- (w - 1) / 2 113 | names(output) <- names(X)[(g + 1):(length(X) - g)] 114 | } 115 | # scaling 116 | if (!missing(delta.wav)) { 117 | output <- output / delta.wav^m 118 | } 119 | 120 | return(output) 121 | } 122 | -------------------------------------------------------------------------------- /man/duplex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duplex.R 3 | \name{duplex} 4 | \alias{duplex} 5 | \title{DUPLEX algorithm for calibration sampling} 6 | \usage{ 7 | duplex(X, 8 | k, 9 | metric = c("mahal", "euclid"), 10 | pc, 11 | group, 12 | .center = TRUE, 13 | .scale = FALSE) 14 | } 15 | \arguments{ 16 | \item{X}{a numeric matrix.} 17 | 18 | \item{k}{the number of calibration/validation samples.} 19 | 20 | \item{metric}{the distance metric to be used: 'euclid' (Euclidean distance) 21 | or 'mahal' (Mahalanobis distance, default).} 22 | 23 | \item{pc}{optional. The number of Principal Components to be used to select 24 | the samples. If not specified, distance are computed in the Euclidean space. 25 | Alternatively, distances are computed in the principal component space and 26 | \code{pc} is the number of principal components retained. 27 | If \code{pc < 1}, the number of principal components kept corresponds to the 28 | number 29 | of components explaining at least (\code{pc * 100}) percent of the total variance.} 30 | 31 | \item{group}{An optional \code{factor} (or vector that can be coerced to a factor 32 | by \code{\link{as.factor}}) of length equal to nrow(X), giving the identifier 33 | of related observations (e.g. samples of the same batch of measurements, 34 | samples of the same origin, or of the same soil profile). When one 35 | observation is 36 | selected by the procedure all observations of the same group are removed 37 | together and assigned to the calibration/validation sets. This allows to 38 | select calibration and validation samples that are independent from each 39 | other.} 40 | 41 | \item{.center}{logical value indicating whether the input matrix must be 42 | centered before projecting \code{X} onto the Principal Component space. 43 | Analysis. Default set to \code{TRUE}.} 44 | 45 | \item{.scale}{logical value indicating whether the input matrix must be 46 | scaled before \code{X} onto the Principal Component space. 47 | Analysis. Default set to \code{FALSE}.} 48 | } 49 | \value{ 50 | a \code{list} with components: 51 | \itemize{ 52 | \item{'\code{model}': numeric vector giving the row indices of the input data 53 | selected for calibration} 54 | \item{'\code{test}': numeric vector giving the row indices of the input data 55 | selected for validation} 56 | \item{'\code{pc}': if the \code{pc} argument is specified, a numeric matrix of the 57 | scaled pc scores} 58 | } 59 | } 60 | \description{ 61 | Select calibration samples from a large multivariate data using the DUPLEX 62 | algorithm 63 | } 64 | \details{ 65 | The DUPLEX algorithm is similar to the Kennard-Stone algorithm (see 66 | \code{\link{kenStone}}) but allows to select both calibration and validation 67 | points that are independent. Similarly to the Kennard-Stone algorithm, 68 | it starts by selecting the pair of points that are the farthest apart. They 69 | are assigned to the calibration sets and removed from the list of points. 70 | Then, the next pair of points which are farthest apart are assigned to the 71 | validation sets and removed from the list. In a third step, the procedure 72 | assigns each remaining point alternatively to the calibration 73 | and validation sets based on the distance to the points already selected. 74 | Similarly to the Kennard-Stone algorithm, the default distance metric used 75 | by the procedure is the Euclidean distance, but the Mahalanobis distance can 76 | be used as well using the \code{pc} argument (see \code{\link{kenStone}}). 77 | } 78 | \examples{ 79 | data(NIRsoil) 80 | sel <- duplex(NIRsoil$spc, k = 30, metric = "mahal", pc = .99) 81 | plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 82 | points(sel$pc[sel$model, 1:2], pch = 19, col = 2) # points selected for calibration 83 | points(sel$pc[sel$test, 1:2], pch = 18, col = 3) # points selected for validation 84 | # Test on artificial data 85 | X <- expand.grid(1:20, 1:20) + rnorm(1e5, 0, .1) 86 | plot(X[, 1], X[, 2], xlab = "VAR1", ylab = "VAR2") 87 | sel <- duplex(X, k = 25, metric = "mahal") 88 | points(X[sel$model, ], pch = 19, col = 2) # points selected for calibration 89 | points(X[sel$test, ], pch = 15, col = 3) # points selected for validation 90 | } 91 | \references{ 92 | Kennard, R.W., and Stone, L.A., 1969. Computer aided design of experiments. 93 | Technometrics 11, 137-148. 94 | 95 | Snee, R.D., 1977. Validation of regression models: methods and examples. 96 | Technometrics 19, 415-428. 97 | } 98 | \seealso{ 99 | \code{\link{kenStone}}, \code{\link{honigs}}, \code{\link{shenkWest}}, 100 | \code{\link{naes}} 101 | } 102 | \author{ 103 | Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 104 | } 105 | -------------------------------------------------------------------------------- /R/shenkWest.R: -------------------------------------------------------------------------------- 1 | #' @title SELECT algorithm for calibration sampling 2 | #' 3 | #' @description 4 | #' Select calibration samples from a large multivariate data using the SELECT 5 | #' algorithm as described in Shenk and Westerhaus (1991). 6 | #' @usage 7 | #' shenkWest(X, 8 | #' d.min = 0.6, 9 | #' pc = 0.95, 10 | #' rm.outlier = FALSE, 11 | #' .center = TRUE, 12 | #' .scale = FALSE) 13 | #' @param X a numeric matrix (optionally a data frame that can 14 | #' be coerced to a numerical matrix). 15 | #' @param d.min a minimum distance (default = 0.6). 16 | #' @param pc the number of principal components retained in the computation 17 | #' distance in the standardized Principal Component space (Mahalanobis distance). 18 | #' If `pc < 1`, the number of principal components kept corresponds to the 19 | #' number of components explaining at least (`pc * 100`) percent of the total 20 | #' variance (default = 0.95). 21 | #' @param rm.outlier logical. If `TRUE`, remove observations with a standardized 22 | #' mahalanobis distance to the center of the data greater than 3 23 | #' (default = `FALSE`). 24 | #' @param .center logical. Indicates whether the input matrix should be centered 25 | #' before Principal Component Analysis. Default set to \code{TRUE}. 26 | #' @param .scale logical. Indicates whether the input matrix should be scaled 27 | #' before Principal Component Analysis. Default set to \code{FALSE}. 28 | #' @author Antoine Stevens 29 | #' @return a `list` with components: 30 | #' \itemize{ 31 | #' \item{'`model`': numeric vector giving the row indices of the input data 32 | #' selected for calibration} 33 | #' \item{'`test`': numeric vector giving the row indices of the remaining 34 | #' observations} 35 | #' \item{'`pc`': a numeric matrix of the scaled pc scores} 36 | #' } 37 | #' @details 38 | #' The SELECT algorithm is an iterative procedure based on the standardized 39 | #' Mahalanobis distance between observations. 40 | #' First, the observation having the highest number of neighbours within a given 41 | #' minimum distance is selected and its neighbours are discarded. The procedure 42 | #' is repeated until there is no observation left. 43 | #' 44 | #' If the `rm.outlier` argument is set to `TRUE`, outliers will be removed 45 | #' before running the SELECT algorithm, using the CENTER algorithm of 46 | #' Shenk and Westerhaus (1991), i.e. samples with a standardized Mahalanobis 47 | #' distance `>3` are removed. 48 | #' @examples 49 | #' data(NIRsoil) 50 | #' # reduce data size 51 | #' NIRsoil$spc <- binning(X = NIRsoil$spc, bin.size = 5) 52 | #' sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = FALSE) 53 | #' plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 54 | #' # points selected for calibration 55 | #' points(sel$pc[sel$model, 1:2], pch = 19, col = 2) 56 | #' # without outliers 57 | #' sel <- shenkWest(NIRsoil$spc, pc = .99, d.min = .3, rm.outlier = TRUE) 58 | #' plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 59 | #' # points selected for calibration 60 | #' points(sel$pc[sel$model, 1:2], pch = 15, col = 3) 61 | #' @references Shenk, J.S., and Westerhaus, M.O., 1991. Population Definition, 62 | #' Sample Selection, and Calibration Procedures for Near Infrared Reflectance 63 | #' Spectroscopy. Crop Science 31, 469-474. 64 | #' @seealso \code{\link{kenStone}}, \code{\link{duplex}}, \code{\link{puchwein}} 65 | #' @export 66 | #' 67 | 68 | shenkWest <- function(X, 69 | d.min = 0.6, 70 | pc = 0.95, 71 | rm.outlier = FALSE, 72 | .center = TRUE, 73 | .scale = FALSE) { 74 | if (is.data.frame(X)) { 75 | X <- as.matrix(X) 76 | } 77 | 78 | # Compute scores of PCA 79 | pca <- prcomp(X, center = .center, scale = .scale) 80 | if (pc < 1) { 81 | pvar <- pca$sdev^2 / sum(pca$sdev^2) 82 | pcsum <- cumsum(pvar) < pc 83 | if (any(pcsum)) { 84 | pc <- max(which(pcsum)) + 1 85 | } else { 86 | pc <- 1 87 | } 88 | } 89 | scores.ini <- scores <- sweep(pca$x[, 1:pc, drop = F], 2, pca$sdev[1:pc], "/") # scaling of the scores 90 | 91 | n <- nini <- 1:nrow(X) 92 | model <- NULL 93 | 94 | if (rm.outlier) { 95 | m <- fastDistV(scores, colMeans(scores), "euclid") # squared mahalanobis distance 96 | m <- m / pc # standardized mahalanobis distance (also called GH, Global H distance) 97 | idx <- m <= 3 98 | scores <- scores[idx, , drop = F] # remove samples with H > 3 99 | n <- n[idx] 100 | } 101 | 102 | d <- fastDist(scores, scores, "euclid") # NH - Neighbour mahalanobis H distance 103 | d <- d / pc # standardized mahalanobis distance 104 | d <- d < d.min # distance treshold 105 | 106 | while (ncol(d) > 1) { 107 | idx <- which.max(colSums(d)) 108 | knn <- which(d[, idx]) 109 | if (length(knn) < 2) { 110 | break 111 | } 112 | model <- c(model, n[idx]) 113 | n <- n[-knn] 114 | d <- d[-knn, -knn, drop = F] 115 | } 116 | 117 | return(list(model = model, test = nini[-n], pc = scores.ini)) 118 | } 119 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prospectr 2 | 3 | [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/prospectr)](https://CRAN.R-project.org/package=prospectr) 4 | [![Downloads](https://cranlogs.r-pkg.org/badges/prospectr)](https://cranlogs.r-pkg.org/badges/prospectr) 5 | 6 | 7 | 8 | 9 | 10 |

11 | Misc. Functions for Processing and Sample Selection of Spectroscopic 12 | Data 13 |

14 | 15 |
*Antoine Stevens & Leo Ramirez-Lopez* 16 | 17 | *Last update: 2025-03-05* 18 | 19 | Version: 0.2.8 – galo 20 | 21 | `prospectr` is becoming more and more used in spectroscopic 22 | applications, which is evidenced by the number of scientific 23 | publications citing the package. This package is very useful for signal 24 | processing and chemometrics in general as it provides various utilities 25 | for pre–processing and sample selection of spectral data. While similar 26 | functions are available in other packages, like 27 | [`signal`](https://CRAN.R-project.org/package=signal), the functions in 28 | this package work indifferently for `data.frame`, `matrix` and `vector` 29 | inputs. Besides, several functions are optimized for speed and use C++ 30 | code through the [`Rcpp`](https://CRAN.R-project.org/package=Rcpp) and 31 | [`RcppArmadillo`](https://CRAN.R-project.org/package=RcppArmadillo) 32 | packages. 33 | 34 | ## Installing it from GitHub 35 | 36 | Install this package from github by: 37 | 38 | remotes::install_github("l-ramirez-lopez/prospectr") 39 | 40 | NOTE: in some MAC Os it is still recommended to install `gfortran` and 41 | `clang` from [here](https://cran.r-project.org/bin/macosx/tools/). Even 42 | for R >= 4.0. For more info, check this 43 | [issue](https://github.com/tidyverts/fable/issues/193). 44 | 45 | ## News 46 | 47 | Check the NEWS document for new functionality and general changes in the 48 | package. 49 | 50 | ## Vignette 51 | 52 | A vignette for `prospectr` explaining its core functionality is 53 | available at 54 | . 55 | 56 | ## Core functionality 57 | 58 | A vignette gives an overview of the main functions of the package. Just 59 | type `vignette("prospectr-intro")` in the console to access it. 60 | Currently, the following preprocessing functions are available: 61 | 62 | - `resample()` : resample a signal to new coordinates by linear or 63 | spline interpolation 64 | 65 | - `resample2()` : resample a signal to new coordinates using FWHM 66 | values 67 | 68 | - `movav()` : moving average 69 | 70 | - `standardNormalVariate()` : standard normal variate 71 | 72 | - `msc()` : multiplicative scatter correction 73 | 74 | - `detrend()` : detrend normalization 75 | 76 | - `baseline()` : baseline removal/correction 77 | 78 | - `blockScale()` : block scaling 79 | 80 | - `blockNorm()` : sum of squares block weighting 81 | 82 | - `binning()` : average in column–wise subsets 83 | 84 | - `savitzkyGolay()` : Savitzky-Golay filter (smoothing and 85 | derivatives) 86 | 87 | - `gapDer()` : gap-segment derivative 88 | 89 | - `continuumRemoval()` : continuum-removed absorbance or reflectance 90 | values 91 | 92 | The selection of representative samples/observations for calibration of 93 | spectral models can be achieved with one of the following functions: 94 | 95 | - `naes()` : k-means sampling 96 | 97 | - `kenStone()` : CADEX (Kennard–Stone) algorithm 98 | 99 | - `duplex()` : DUPLEX algorithm 100 | 101 | - `shenkWest()` : SELECT algorithm 102 | 103 | - `puchwein()` : Puchwein sampling 104 | 105 | - `honigs()` : Unique-sample selection by spectral subtraction 106 | 107 | Other useful functions are also available: 108 | 109 | - `read_nircal()` : read binary files exported from BUCHI NIRCal 110 | software 111 | 112 | - `readASD()` : read binary or text files from an ASD instrument 113 | (Indico Pro format) 114 | 115 | - `spliceCorrection()` : correct spectra for steps at the splice of 116 | detectors in an ASD FieldSpec Pro 117 | 118 | - `cochranTest()` : detects replicate outliers with the Cochran *C* 119 | test 120 | 121 | ## Citing the package 122 | 123 | Antoine Stevens and Leornardo Ramirez-Lopez (2025). An introduction to 124 | the prospectr package. R package Vignette R package version 0.2.8. A 125 | BibTeX entry for LaTeX users is: 126 | 127 | @Manual{stevens2022prospectr, 128 | title = {An introduction to the prospectr package}, 129 | author = {Antoine Stevens and Leornardo Ramirez-Lopez}, 130 | publication = {R package Vignette}, 131 | year = {2025}, 132 | note = {R package version 0.2.8}, 133 | } 134 | 135 | ## Bug report and development version 136 | 137 | You can send an email to the package maintainer 138 | () or create an 139 | [issue](https://github.com/l-ramirez-lopez/prospectr/issues) on github. 140 | To install the development version of `prospectr`, simply install 141 | [`devtools`](https://CRAN.R-project.org/package=devtools) from CRAN then 142 | run `install_github("l-ramirez-lopez/prospectr")`. 143 | -------------------------------------------------------------------------------- /R/gapDer.R: -------------------------------------------------------------------------------- 1 | #' @title Gap-Segment derivative 2 | #' @description 3 | #' Gap-Segment derivatives of a data matrix or vector 4 | #' @usage 5 | #' gapDer(X, m = 1, w = 1, s = 1, delta.wav) 6 | #' @param X a numeric matrix or vector` to transform (optionally a data frame 7 | #' that can be coerced to a numerical matrix). 8 | #' @param m an integer indicating the order of the derivative. 9 | #' Note that this function allows for high order derivatives (e.g. m = 6). If 0 10 | #' is passed, the function will just smooth out the signal(s). 11 | #' @param w an integer indicating the gap size (must be odd and >=1), i.e. the spacing 12 | #' between points over which the derivative is computed. 13 | #' @param s an integer indicating the segment size (must be odd and >=1), i.e. 14 | #' the range over which the points are averaged (default = 1, i.e. no 15 | #' smoothing corresponding to Norris-Gap Derivative). 16 | #' @param delta.wav the sampling interval (or band spacing). 17 | #' @author Antoine Stevens and \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 18 | #' @details 19 | #' In this type of derivatives, the gap size denotes the length of the x 20 | #' interval that separates the two segments that are averaged. A detailed 21 | #' explanation of gap segment derivatives can be found in Hopkins (2001). 22 | #' 23 | #' The sampling interval specified with the `delta.wav` argument is used for 24 | #' scaling and get numerically correct derivatives. 25 | #' 26 | #' The convolution function is written in C++/Rcpp for faster computations. 27 | #' @examples 28 | #' data(NIRsoil) 29 | #' opar <- par(no.readonly = TRUE) 30 | #' par(mfrow = c(2, 2), mar = c(4, 4, 2, 2)) 31 | #' # plot of the 10 first spectra 32 | #' matplot(as.numeric(colnames(NIRsoil$spc)), 33 | #' t(NIRsoil$spc[1:10, ]), 34 | #' type = "l", 35 | #' xlab = "", 36 | #' ylab = "Absorbance" 37 | #' ) 38 | #' mtext("Raw spectra") 39 | #' 40 | #' der <- gapDer(NIRsoil$spc, m = 1, w = 1, s = 1, delta.wav = 2) 41 | #' matplot(as.numeric(colnames(der)), 42 | #' t(der[1:10, ]), 43 | #' type = "l", 44 | #' xlab = "Wavelength /nm", 45 | #' ylab = "gap derivative" 46 | #' ) 47 | #' 48 | #' mtext("1st derivative spectra") 49 | #' der <- gapDer(NIRsoil$spc, m = 1, w = 11, s = 1, delta.wav = 2) 50 | #' matplot(as.numeric(colnames(der)), t(der[1:10, ]), 51 | #' type = "l", 52 | #' xlab = "Wavelength /nm", 53 | #' ylab = "gap derivative" 54 | #' ) 55 | #' 56 | #' mtext("1st derivative spectra with a window size = 11 nm") 57 | #' der <- gapDer(NIRsoil$spc, m = 1, w = 11, s = 5, delta.wav = 2) 58 | #' matplot(as.numeric(colnames(der)), t(der[1:10, ]), 59 | #' type = "l", 60 | #' xlab = "Wavelength /nm", 61 | #' ylab = "gap derivative" 62 | #' ) 63 | #' mtext("1st derivative spectra with: window size: 11 nm, smoothing: 5 nm") 64 | #' par(opar) 65 | #' @references 66 | #' Hopkins, D. W. (2001). What is a Norris derivative?. NIR news, 12(3), 3-5. 67 | #' @seealso \code{\link{savitzkyGolay}}, \code{\link{movav}}, 68 | #' \code{\link{binning}}, \code{\link{continuumRemoval}} 69 | #' @return a matrix or vector with the filtered signal(s) 70 | #' @export 71 | #' 72 | gapDer <- function(X, m = 1, w = 1, s = 1, delta.wav) { 73 | if (w < 1 | !w %% 2) { 74 | stop("w must be odd and >= 1") 75 | } 76 | 77 | if (s < 1 | !s %% 2) { 78 | stop("s must be odd and >= 1") 79 | } 80 | 81 | filter_length <- m * w + (m + 1) * s 82 | 83 | if (filter_length > ncol(X)) { 84 | stop("the current parameters produce a filter with a length larger than the number of variables in X") 85 | } 86 | 87 | if (is.data.frame(X)) { 88 | X <- as.matrix(X) 89 | } 90 | 91 | zw <- rep(0, w) 92 | os <- rep(1, s) 93 | nmr <- factorial(m) 94 | 95 | # if (m == 1) { 96 | # fp <- c(-os, zw, os) 97 | # } else if (m == 2) { 98 | # fp <- c(os, zw, -2 * os, zw, os) 99 | # } else if (m == 3) { 100 | # fp <- c(-os, zw, 3 * os, zw, -3 * os, zw, os) 101 | # } else { 102 | # fp <- c(os, zw, -4 * os, zw, 6 * os, zw, -4 * os, zw, os) 103 | # } 104 | 105 | # Compute the filter in more efficient way allowing for higher order 106 | # derivatives 107 | fp <- NULL 108 | for (k in 0:(m - 1)) { 109 | ck <- c((-1)^(m - k) * choose(m, k) * os, zw) 110 | fp <- c(fp, ck) 111 | } 112 | fp <- c(fp, os) 113 | 114 | j <- (length(fp) - 1) / 2 115 | j <- -j:j 116 | nf <- 1 / nmr * sum((j^m) * fp) 117 | sg_filter <- fp / nf # filter 118 | 119 | if (is.matrix(X)) { 120 | if (w >= ncol(X)) { 121 | stop("filter length w must be lower than ncol(X)") 122 | } 123 | output <- convCppM(X, sg_filter) # Convolution 124 | g <- (length(sg_filter) - 1) / 2 125 | colnames(output) <- colnames(X)[(g + 1):(ncol(X) - g)] 126 | rownames(output) <- rownames(X) 127 | } 128 | 129 | if (is.vector(X)) { 130 | if (w >= length(X)) { 131 | stop("filter length w must be lower than length(X)") 132 | } 133 | output <- convCppV(X, sg_filter) # Convolution 134 | g <- (w - 1) / 2 135 | names(output) <- names(X)[((g + 1):(length(X) - g))] 136 | } 137 | 138 | if (!missing(delta.wav)) { 139 | output <- output / delta.wav^m 140 | } 141 | 142 | return(output) 143 | } 144 | -------------------------------------------------------------------------------- /R/cochranTest.R: -------------------------------------------------------------------------------- 1 | #' @title Cochran *C* Test 2 | #' @description 3 | #' \loadmathjax 4 | #' Detects and removes replicate outliers in data series based on the Cochran 5 | #' *C* test for homogeneity in variance. 6 | #' @usage 7 | #' cochranTest(X, id, fun = 'sum', alpha = 0.05) 8 | #' @param X a numeric matrix (optionally a data frame that can 9 | #' be coerced to a numerical matrix). 10 | #' @param id factor of the replicate identifiers. 11 | #' @param fun function to aggregate data: 'sum' (default), 'mean', 'PC1' or 'PC2'. 12 | #' @param alpha *p*-value of the Cochran *C* test. 13 | #' @author Antoine Stevens 14 | #' @return a list with components: 15 | #' \itemize{ 16 | #' \item{'`X`': input matrix from which outlying observations (rows) have 17 | #' been removed} 18 | #' \item{'`outliers`': numeric vector giving the row indices of the input 19 | #' data that have been flagged as outliers} 20 | #' } 21 | #' 22 | #' @details 23 | #' The Cochran *C* test tests whether a single estimate of variance is 24 | #' significantly larger than a a group of variances. 25 | #' It can be computed as: 26 | #' 27 | #' \mjdeqn{RMSD = \sqrt{\frac{1}{n} \sum_{i=1}^n {(y_i - \ddot{y}_i)^2}}}{RMSD = sqrt{{1}/{n} sum (y_i - ddot{y}_i)^2}} 28 | #' 29 | #' where \mjeqn{y_i}{y_i} is the value of the side variable of the \mjeqn{i}{i}th sample, 30 | #' \mjeqn{\ddot{y}_i}{\ddot{y}_i} is the value of the side variable of the nearest neighbor 31 | #' of the \mjeqn{i}{i}th sample and \mjeqn{n}{n} is the total number of observations. 32 | #' 33 | #' For multivariate data, the variance \mjeqn{S_i^2}{S_i^2} can be computed on aggregated 34 | #' data, using a summary function (`fun` argument) 35 | #' such as `sum`, `mean`, or first principal components ('PC1' and 'PC2'). 36 | #' 37 | #' An observation is considered to have an outlying variance if the Cochran *C* 38 | #' statistic is higher than an upper limit critical value \mjeqn{C_{UL}}{C_{UL}} 39 | #' which can be evaluated with ('t Lam, 2010): 40 | #' 41 | #' 42 | #' \mjdeqn{C_{UL}(\alpha, n, N) = 1 + [\frac{N-1}{F_{c}(\alpha/N,(n-1),(N-1)(n-1))}]^{-1} }{C_{UL}(\alpha, n, N) = 1 + [\frac{N-1}{F_{c}(\alpha/N,(n-1),(N-1)(n-1))}]^{-1}} 43 | #' 44 | #' where \mjeqn{\alpha}{\alpha} is the *p*-value of the test, \mjeqn{n}{n} is the (average) 45 | #' number of replicates and \mjeqn{F_c}{F_c} is the critical value of the Fisher's \mjeqn{F}{F} ratio. 46 | #' 47 | #' The replicates with outlying variance are removed and the test can be applied 48 | #' iteratively until no outlying variance is detected under the given *p*-value. 49 | #' Such iterative procedure is implemented in `cochranTest`, allowing the user 50 | #' to specify whether a set of replicates must be removed or not from the 51 | #' dataset by graphical inspection of the outlying replicates. The user has then 52 | #' the possibility to (i) remove all replicates at once, (ii) remove one or more 53 | #' replicates by giving their indices or (iii) remove nothing. 54 | #' @note The test assumes a balanced design (i.e. data series have the same 55 | #' number of replicates). 56 | #' @references 57 | #' Centner, V., Massart, D.L., and De Noord, O.E., 1996. Detection of 58 | #' inhomogeneities in sets of NIR spectra. Analytica Chimica Acta 330, 1-17. 59 | #' 60 | #' R.U.E. 't Lam (2010). Scrutiny of variance results for outliers: Cochran's 61 | #' test optimized. Analytica Chimica Acta 659, 68-84. 62 | #' 63 | #' 64 | #' @export 65 | 66 | cochranTest <- function(X, id, fun = "sum", alpha = 0.05) { 67 | if (!is.factor(id)) { 68 | stop("id should be a factor") 69 | } 70 | id <- id[drop = TRUE] 71 | pval <- 0 72 | X2 <- NULL 73 | n <- nrow(X) 74 | X <- data.frame(ID = 1:n, X, check.names = FALSE) 75 | 76 | while (pval <= alpha) { 77 | x <- switch(fun, 78 | sum = { 79 | apply(X[, -1], 1, sum) 80 | }, 81 | mean = { 82 | apply(X[, -1], 1, mean) 83 | }, 84 | PC1 = { 85 | prcomp(X[, -1], center = TRUE, .scale = FALSE)$x[, 1] 86 | }, 87 | PC2 = { 88 | prcomp(X[, -1], center = TRUE, .scale = FALSE)$x[, 2] 89 | } 90 | ) 91 | 92 | vars <- tapply(x, id, var) # variances 93 | pval <- Cul(max(vars) / sum(vars), mean(table(id)), length(vars)) 94 | if (pval > alpha) { 95 | break 96 | } 97 | print(paste("Cochran p value for max variance = ", pval, sep = "")) 98 | maxvar <- which(id == levels(id)[which.max(vars)]) 99 | matplot(x = as.numeric(colnames(X[, -1])), y = t(X[maxvar, -1]), type = "l", xlab = "", ylab = "", main = levels(id)[which.max(vars)]) 100 | legend("topleft", lty = 1:length(maxvar), col = 1:length(maxvar), legend = 1:length(maxvar)) 101 | out <- readline("Which replicate is an outlier \nthat you want to remove\n(-1 = all; 0 = none; >0 = (comma separated) index of the replicate(s) to remove )\n:") 102 | out <- as.numeric(strsplit(out, ",")[[1]]) 103 | if (out[1]) { 104 | if (out[1] == -1) { 105 | out <- 1:length(maxvar) 106 | } 107 | X <- X[-maxvar[out], ] 108 | id <- id[-maxvar[out]][drop = TRUE] 109 | } else { 110 | # keep those that have been flagged but are not outliers 111 | X2 <- rbind(X2, X[maxvar, ]) 112 | X <- X[-maxvar, ] 113 | id <- id[-maxvar][drop = TRUE] 114 | } 115 | } 116 | X <- rbind(X, X2) 117 | list(X = X[, -1], outliers = which(!1:n %in% X$ID)) 118 | } 119 | -------------------------------------------------------------------------------- /R/honigs.R: -------------------------------------------------------------------------------- 1 | #' @title Honigs algorithm for calibration sampling 2 | #' @description 3 | #' Select calibration samples from a data matrix using the Honings et al. (1985) 4 | #' method 5 | #' @usage 6 | #' honigs(X, k, type) 7 | #' @param X a numeric matrix with absorbance or continuum-removed reflectance 8 | #' values (optionally a data frame that can be coerced to a numerical matrix). 9 | #' @param k the number of samples to select for calibration. 10 | #' @param type type of data: 'A' for absorbance (default), 'R' for reflectance, 11 | #' 'CR' for continuum-removed reflectance 12 | #' @author Antoine Stevens 13 | #' @return a `list` with components: 14 | #' \itemize{ 15 | #' \item{'`model`': numeric vector giving the row indices of the input data 16 | #' selected for calibration} 17 | #' \item{'`test`': numeric vector giving the row indices of the remaining 18 | #' observations} 19 | #' \item{'`bands`': indices of the columns used during the selection procedure} 20 | #' } 21 | #' @examples 22 | #' data(NIRsoil) 23 | #' sel <- honigs(NIRsoil$spc, k = 10, type = "A") 24 | #' wav <- as.numeric(colnames(NIRsoil$spc)) 25 | #' # spectral library 26 | #' matplot(wav, 27 | #' t(NIRsoil$spc), 28 | #' type = "l", 29 | #' xlab = "wavelength /nm", 30 | #' ylab = "Abs", 31 | #' col = "grey50" 32 | #' ) 33 | #' # plot calibration spectra 34 | #' matlines(wav, 35 | #' t(NIRsoil$spc[sel$model, ]), 36 | #' type = "l", 37 | #' xlab = "wavelength /nm", 38 | #' ylab = "Abs", 39 | #' lwd = 2, 40 | #' lty = 1 41 | #' ) 42 | #' # add bands used during the selection process 43 | #' abline(v = wav[sel$bands]) 44 | #' @details 45 | #' The Honigs algorithm is a simple method to select calibration samples based 46 | #' on their absorption features. Absorbance, reflectance and continuum-removed 47 | #' reflectance values (see \code{\link{continuumRemoval}}) can be used (`type` 48 | #' argument). 49 | #' The algorithm can be described as follows: let \eqn{A} be a matrix of 50 | #' \eqn{(i \times j)} absorbance values: 51 | #' 52 | #' \enumerate{ 53 | #' \item the observation (row) with the maximum absolute absorbance 54 | #' (\eqn{max(|A|)}) is selected and assigned to the calibration set. 55 | #' \item a vector of weights \eqn{W} is computed as \eqn{A_j/max_A} where 56 | #' \eqn{A_j} is the column of \eqn{A} having the maximum absolute absorbance 57 | #' and \eqn{max_A} is the absorbance value corresponding to the maximum 58 | #' absolute absorbance of \eqn{A} 59 | #' \item each row \eqn{A_i} is multiplied by the corresponding weight \eqn{W_i} 60 | #' and the resulting vector is subtracted from the original row \eqn{A_i}. 61 | #' \item the row of the selected observation and the column with the maximum 62 | #' absolute absorbance is removed from the matrix 63 | #' \item go back to step 1 and repeat the procedure until the desired number 64 | #' of selected samples is reached 65 | #' } 66 | #' 67 | #' The observation with the maximum absorbance is considered to have 68 | #' an unusual composition. The algorithm selects therefore this observation and 69 | #' remove from other samples the selected absorption feature by subtraction. 70 | #' Samples with low concentration related to this absorption will then have 71 | #' large negative absorption after the subtraction step 72 | #' and hence will be likely to be selected rapidly by the selection procedure 73 | #' as well. 74 | #' 75 | #' @note The selection procedure is sensitive to noisy features in the signal. 76 | #' The number of samples selected `k` selected by the algorithm cannot be 77 | #' greater than the number of wavelengths. 78 | #' @references 79 | #' Honigs D.E., Hieftje, G.M., Mark, H.L. and Hirschfeld, T.B. 1985. 80 | #' Unique-sample selection via Near-Infrared spectral substraction. 81 | #' Analytical Chemistry, 57, 2299-2303 82 | #' 83 | #' @seealso \code{\link{kenStone}}, \code{\link{naes}}, \code{\link{duplex}}, 84 | #' \code{\link{shenkWest}} 85 | #' @export 86 | #' 87 | honigs <- function(X, k, type = c("A", "R", "CR")) { 88 | if (missing(k)) { 89 | stop("'k' must be specified") 90 | } 91 | if (k < 2) { 92 | stop("'k' should be higher than 2") 93 | } 94 | if (ncol(X) < 2) { 95 | stop("'X' must have at least 2 columns") 96 | } 97 | if (k >= nrow(X) | k >= ncol(X)) { 98 | stop("'k' should be lower than nrow(X) or ncol(X)") 99 | } 100 | if (is.data.frame(X)) { 101 | X <- as.matrix(X) 102 | } 103 | 104 | type <- match.arg(type) 105 | if (type == "CR") { 106 | X <- 1 - X 107 | } 108 | # conversion to absorbance 109 | if (type == "R") { 110 | X <- -log10(X) 111 | } 112 | 113 | 114 | n <- nini <- 1:nrow(X) 115 | p <- 1:ncol(X) 116 | model <- rep(NA, k) 117 | psel <- rep(NA, k) 118 | # pdf('test.pdf') 119 | for (i in seq_along(model)) { 120 | aX <- abs(X) 121 | maxx <- max(aX) 122 | idx <- c(which(aX == maxx, arr.ind = TRUE)) 123 | model[i] <- n[idx[1]] 124 | psel[i] <- p[idx[2]] 125 | n <- n[-idx[1]] 126 | weight <- X[, idx[2]] / X[idx[1], idx[2]] # weighting factor 127 | x <- t(X[idx[1], ] %o% weight) 128 | 129 | # matplot(t(X),type='l') lines(X[idx[1]],col=0,cex=1) abline(v=p[idx[2]]) 130 | 131 | X <- X - x # subtraction 132 | p <- p[-idx[2]] 133 | X <- X[-idx[1], -idx[2]] 134 | } 135 | model <- model[!is.na(model)] 136 | psel <- psel[!is.na(psel)] 137 | return(list(model = model, test = nini[-model], bands = psel)) 138 | } 139 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "README" 3 | output: md_document 4 | date: "2022-09-18" 5 | --- 6 | 7 | # prospectr 8 | 9 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/prospectr)](https://CRAN.R-project.org/package=prospectr) 10 | [![Downloads](https://cranlogs.r-pkg.org/badges/prospectr)](https://cranlogs.r-pkg.org/badges/prospectr) 11 | 12 | 13 | 14 | 15 | 16 |

Misc. Functions for Processing and Sample Selection of Spectroscopic Data

17 | _Antoine Stevens & Leo Ramirez-Lopez_ 18 | 19 | _Last update: `r Sys.Date()`_ 20 | 21 | 22 | Version: `r paste(prospectr:::pkg_info()[1:2], collapse = " \U002D\U002D ")` 23 | 24 | `prospectr` is becoming more and more used in spectroscopic applications, which 25 | is evidenced by the number of scientific publications citing the package. 26 | This package is very useful for signal processing and chemometrics in general as 27 | it provides various utilities for pre--processing and sample selection 28 | of spectral data. While similar functions are available in other packages, like 29 | [`signal`](https://CRAN.R-project.org/package=signal), the 30 | functions in this package work indifferently for `data.frame`, `matrix` and 31 | `vector` inputs. Besides, several functions are optimized for speed and use 32 | C++ code through the [`Rcpp`](https://CRAN.R-project.org/package=Rcpp) 33 | and [`RcppArmadillo`](https://CRAN.R-project.org/package=RcppArmadillo) 34 | packages. 35 | 36 | ## Installing it from GitHub 37 | 38 | Install this package from github by: 39 | 40 | ``` 41 | remotes::install_github("l-ramirez-lopez/prospectr") 42 | ``` 43 | NOTE: in some MAC Os it is still recommended to install `gfortran` and `clang` 44 | from [here](https://cran.r-project.org/bin/macosx/tools/). Even for R >= 4.0. 45 | For more info, check this [issue](https://github.com/tidyverts/fable/issues/193). 46 | 47 | 48 | ## News 49 | 50 | Check the NEWS document for new functionality and general changes in the package. 51 | 52 | 53 | ## Vignette 54 | 55 | A vignette for `prospectr` explaining its core functionality is available at [https://CRAN.R-project.org/package=prospectr/vignettes/prospectr.html](https://CRAN.R-project.org/package=prospectr/vignettes/prospectr.html). 56 | 57 | 58 | ## Core functionality 59 | 60 | A vignette gives an overview of the main functions of the package. Just 61 | type `vignette("prospectr-intro")` in the console to access it. Currently, the 62 | following preprocessing functions are available: 63 | 64 | - `resample()` : resample a signal to new coordinates by linear or spline interpolation 65 | 66 | - `resample2()` : resample a signal to new coordinates using FWHM values 67 | 68 | - `movav()` : moving average 69 | 70 | - `standardNormalVariate()` : standard normal variate 71 | 72 | - `msc()` : multiplicative scatter correction 73 | 74 | - `detrend()` : detrend normalization 75 | 76 | - `baseline()` : baseline removal/correction 77 | 78 | - `blockScale()` : block scaling 79 | 80 | - `blockNorm()` : sum of squares block weighting 81 | 82 | - `binning()` : average in column--wise subsets 83 | 84 | - `savitzkyGolay()` : Savitzky-Golay filter (smoothing and derivatives) 85 | 86 | - `gapDer()` : gap-segment derivative 87 | 88 | - `continuumRemoval()` : continuum-removed absorbance or reflectance values 89 | 90 | The selection of representative samples/observations for calibration of spectral 91 | models can be achieved with one of the following functions: 92 | 93 | - `naes()` : k-means sampling 94 | 95 | - `kenStone()` : CADEX (Kennard--Stone) algorithm 96 | 97 | - `duplex()` : DUPLEX algorithm 98 | 99 | - `shenkWest()` : SELECT algorithm 100 | 101 | - `puchwein()` : Puchwein sampling 102 | 103 | - `honigs()` : Unique-sample selection by spectral subtraction 104 | 105 | Other useful functions are also available: 106 | 107 | 108 | - `read_nircal()` : read binary files exported from BUCHI NIRCal software 109 | 110 | - `readASD()` : read binary or text files from an ASD instrument (Indico Pro format) 111 | 112 | - `spliceCorrection()` : correct spectra for steps at the splice of detectors in an ASD FieldSpec Pro 113 | 114 | - `cochranTest()` : detects replicate outliers with the Cochran _C_ test 115 | 116 | ## Citing the package 117 | Antoine Stevens and Leornardo Ramirez-Lopez (2025). An introduction to the prospectr package. R package 118 | Vignette R package version 0.2.8. 119 | A BibTeX entry for LaTeX users is: 120 | 121 | ``` 122 | @Manual{stevens2022prospectr, 123 | title = {An introduction to the prospectr package}, 124 | author = {Antoine Stevens and Leornardo Ramirez-Lopez}, 125 | publication = {R package Vignette}, 126 | year = {2025}, 127 | note = {R package version 0.2.8}, 128 | } 129 | ``` 130 | ## Bug report and development version 131 | 132 | You can send an email to the package maintainer () 133 | or create an [issue](https://github.com/l-ramirez-lopez/prospectr/issues) on github. 134 | To install the development version of `prospectr`, simply install [`devtools`](https://CRAN.R-project.org/package=devtools) from 135 | CRAN then run `install_github("l-ramirez-lopez/prospectr")`. 136 | -------------------------------------------------------------------------------- /man/puchwein.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/puchwein.R 3 | \name{puchwein} 4 | \alias{puchwein} 5 | \title{Puchwein algorithm for calibration sampling} 6 | \usage{ 7 | puchwein(X, 8 | pc = 0.95, 9 | k, 10 | min.sel, 11 | details = FALSE, 12 | .center = TRUE, 13 | .scale = FALSE) 14 | } 15 | \arguments{ 16 | \item{X}{a matrix from which the calibration samples are to be selected 17 | (optionally a data frame that can be coerced to a numerical matrix).} 18 | 19 | \item{pc}{the number of principal components retained in the computation of 20 | the distance in the standardized Principal Component space (Mahalanobis 21 | distance). 22 | If \code{pc < 1}, the number of principal components kept corresponds to the 23 | number of components 24 | explaining at least (\code{pc * 100}) percent of the total variance 25 | (default = 0.95 as in the Puchwein paper).} 26 | 27 | \item{k}{the initial limiting distance parameter, if not specified (default), 28 | set to 0.2. According to Puchwein, a good starting value for the limiting 29 | distance is \eqn{d_{ini} = k(p-2)} where \eqn{p} is the number of 30 | principal components} 31 | 32 | \item{min.sel}{minimum number of samples to select for calibration 33 | (default = 5).} 34 | 35 | \item{details}{logical value, if \code{TRUE}, adds a component in the output list 36 | with the indices of the objects kept in each loop (default to \code{FALSE}).} 37 | 38 | \item{.center}{logical value indicating whether the input matrix must be 39 | centered before Principal Component. 40 | Analysis. Default set to TRUE.} 41 | 42 | \item{.scale}{logical value indicating whether the input matrix must be 43 | scaled before Principal Component 44 | Analysis. Default set to FALSE.} 45 | } 46 | \value{ 47 | a \code{list} with components: 48 | \itemize{ 49 | \item{'\code{model}': indices of the observations (row indices of the input 50 | data) 51 | selected for calibration} 52 | \item{'\code{test}': indices of the remaining observations (row indices of the 53 | input data)} 54 | \item{'\code{pc}': a numeric matrix of the scaled pc scores} 55 | \item{'\code{loop.optimal}': index of the loop producing the maximum difference 56 | between the observed and 57 | theoretical sum of leverages of the selected samples} 58 | \item{'\code{leverage}': data frame giving the observed and theoretical 59 | cumulative sums of leverage of the points selected in each loop} 60 | \item{'\code{details}': list with the indices of the observations kept in each 61 | loop} 62 | } 63 | } 64 | \description{ 65 | Select calibration samples from multivariate data using the Puchwein 66 | algorithm 67 | } 68 | \details{ 69 | The Puchwein algorithm select samples from a data matrix by iteratively 70 | eliminating similar samples using the Mahalanobis distance. 71 | It starts by performing a PCA on the input matrix and extracts the score 72 | matrix truncated to \eqn{A}, the number of principal components. The score 73 | matrix is then normalized to unit variance and the Euclidean distance of each 74 | sample to the centre of the data is computed, which is identical to the 75 | Mahalanobis distance \eqn{H}. Additionally, the Mahalanobis distances between 76 | samples are comptuted. The algorithm then proceeds as follows: 77 | 78 | \enumerate{ 79 | \item Choose a initial limiting distance \eqn{d_{ini}} 80 | \item Select the sample with the highest \eqn{H} distance to the centre 81 | \item Remove all samples within the minimum distance \eqn{d_{ini}} from 82 | the sample selected in step 2 83 | \item Go back to step 2 and proceed until there are no samples/observations 84 | left in the dataset 85 | \item Go back to step 1 and increase the minimum distance by multiplying 86 | the limiting distance by the loop number 87 | } 88 | 89 | It is not possible to obtain a pre-defined number of samples selected by the 90 | method. To choose the adequate number of samples, a data frame is returned 91 | by \code{puchwein} function (\code{leverage}) giving the observed and theoretical 92 | cumulative sum of leverages of the points selected in each iteration. The 93 | theoretical cumulative sum of leverage is computed such as each point has the 94 | same leverage (the sum of leverages divided by the number of observations). 95 | The loop having the largest difference between the observed and theoretical 96 | sums is considered as producing the optimal selection of points (the subset 97 | that best reproduces the variability of the predictor space). 98 | } 99 | \note{ 100 | The Puchwein algorithm is an iterative method and can be slow for large 101 | data matrices. 102 | } 103 | \examples{ 104 | data(NIRsoil) 105 | sel <- puchwein(NIRsoil$spc, k = 0.2, pc = .99) 106 | plot(sel$pc[, 1:2]) 107 | # points selected for calibration 108 | points(NIRsoil$spc[sel$model, 1:2], col = 2, pch = 2) 109 | # Leverage plot 110 | opar <- par(no.readonly = TRUE) 111 | par(mar = c(4, 5, 2, 2)) 112 | plot(sel$leverage$loop, sel$leverage$diff, 113 | type = "l", 114 | xlab = "# loops", 115 | ylab = "Difference between theoretical and \n observed sum of leverages" 116 | ) 117 | par(opar) 118 | } 119 | \references{ 120 | Puchwein, G., 1988. Selection of calibration samples for near-infrared 121 | spectrometry by factor analysis of spectra. Analytical Chemystry 60, 569-573. 122 | 123 | Shetty, N., Rinnan, A., and Gislum, R., 2012. Selection of representative 124 | calibration sample sets for near-infrared reflectance spectroscopy to predict 125 | nitrogen concentration in grasses. Chemometrics and Intelligent Laboratory 126 | Systems 111, 59-65. 127 | } 128 | \seealso{ 129 | \code{\link{kenStone}}, \code{\link{duplex}}, 130 | \code{\link{shenkWest}}, \code{\link{honigs}}, \code{\link{naes}} 131 | } 132 | \author{ 133 | Antoine Stevens 134 | } 135 | -------------------------------------------------------------------------------- /R/naes.R: -------------------------------------------------------------------------------- 1 | #' @title k-means sampling 2 | #' @description 3 | #' Perform a k-means sampling on a matrix for multivariate calibration 4 | #' @usage 5 | #' naes(X, k, pc, iter.max = 10, method = 0, .center = TRUE, .scale = FALSE) 6 | #' @param X a numeric matrix (optionally a data frame that can 7 | #' be coerced to a numerical matrix). 8 | #' @param k either the number of calibration samples to select or a set of 9 | #' cluster centres to initiate the k-means clustering. 10 | #' @param pc optional. If not specified, k-means is run directly on the variable 11 | #' (Euclidean) space. 12 | #' Alternatively, a PCA is performed before k-means and `pc` is the number of 13 | #' principal components kept. If `pc < 1`,the number of principal components 14 | #' kept corresponds to the number of components explaining at least (`pc * 100`) 15 | #' percent of the total variance. 16 | #' @param iter.max maximum number of iterations allowed for the k-means 17 | #' clustering. Default is `iter.max = 10` (see `?kmeans`). 18 | #' @param method the method used for selecting calibration samples within each 19 | #' cluster: either samples closest to the cluster. 20 | #' centers (`method = 0`, default), samples farthest away from the centre of the 21 | #' data (`method = 1`) or 22 | #' random selection (`method = 2`). 23 | #' @param .center logical value indicating whether the input matrix must be 24 | #' centered before Principal Component Analysis. Default set to \code{TRUE}. 25 | #' @param .scale logical value indicating whether the input matrix must be 26 | #' scaled before Principal Component Analysis. Default set to \code{FALSE}. 27 | #' @return a list with components: 28 | #' \itemize{ 29 | #' \item{'`model`': numeric vector giving the row indices of the input data 30 | #' selected for calibration} 31 | #' \item{'`test`': numeric vector giving the row indices of the remaining 32 | #' observations} 33 | #' \item{'`pc`': if the `pc` argument is specified, a numeric matrix of the 34 | #' scaled pc scores} 35 | #' \item{'`cluster`': integer vector indicating the cluster to which each 36 | #' point was assigned} 37 | #' \item{'`centers`': a matrix of cluster centres} 38 | #' } 39 | #' @details K-means sampling is a simple procedure based on cluster analysis to 40 | #' select calibration samples from large multivariate datasets. 41 | #' The method can be described in three points (Naes et al.,2001): 42 | #' 43 | #' \enumerate{ 44 | #' \item Perform a PCA and decide how many principal component to keep, 45 | #' \item Carry out a k-means clustering on the principal component scores and 46 | #' choose the number of resulting clusters to be equal to 47 | #' the number of desired calibration samples, 48 | #' \item Select one sample from each cluster. 49 | #' } 50 | #' @references 51 | #' Naes, T., 1987. The design of calibration in near infra-red reflectance 52 | #' analysis by clustering. Journal of Chemometrics 1, 121-134. 53 | #' 54 | #' Naes, T., Isaksson, T., Fearn, T., and Davies, T., 2002. A user friendly 55 | #' guide to multivariate calibration and classification. NIR Publications, 56 | #' Chichester, United Kingdom. 57 | #' @examples 58 | #' data(NIRsoil) 59 | #' sel <- naes(NIRsoil$spc, k = 5, p = .99, method = 0) 60 | #' # clusters 61 | #' plot(sel$pc[, 1:2], col = sel$cluster + 2) 62 | #' # points selected for calibration with method = 0 63 | #' points(sel$pc[sel$model, 1:2], 64 | #' col = 2, 65 | #' pch = 19, 66 | #' cex = 1 67 | #' ) 68 | #' # pre-defined centers can also be provided 69 | #' sel2 <- naes(NIRsoil$spc, 70 | #' k = sel$centers, 71 | #' p = .99, method = 1 72 | #' ) 73 | #' # points selected for calibration with method = 1 74 | #' points(sel$pc[sel2$model, 1:2], 75 | #' col = 1, 76 | #' pch = 15, 77 | #' cex = 1 78 | #' ) 79 | #' @author Antoine Stevens & \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} 80 | #' @seealso \code{\link{kenStone}}, \code{\link{honigs}}, \code{\link{duplex}}, 81 | #' \code{\link{shenkWest}} 82 | #' @export 83 | 84 | naes <- function(X, k, pc, iter.max = 10, method = 0, .center = TRUE, .scale = FALSE) { 85 | if (is.data.frame(X)) { 86 | X <- as.matrix(X) 87 | } 88 | if (missing(k)) { 89 | stop("'k' must be a number or matrix") 90 | } 91 | 92 | if (ncol(X) < 2) { 93 | stop("'X' must have at least 2 columns") 94 | } 95 | 96 | if (!method %in% 0:2) { 97 | stop("'method' must be 0, 1 or 2") 98 | } 99 | 100 | if (!missing(pc)) { 101 | pca <- prcomp(X, center = .center, scale = .scale) 102 | if (pc < 1) { 103 | pvar <- pca$sdev^2 / sum(pca$sdev^2) 104 | pcsum <- cumsum(pvar) < pc 105 | if (any(pcsum)) { 106 | pc <- max(which(pcsum)) + 1 107 | } else { 108 | pc <- 1 109 | } 110 | } 111 | X <- sweep(pca$x[, 1:pc, drop = F], 2, pca$sdev[1:pc], "/") # scaling of the scores 112 | } 113 | 114 | if (length(k) > 1) { 115 | if (ncol(k) != ncol(X)) { 116 | stop("number of columns in 'k' must be equal to the number of columns in 'X'") 117 | } 118 | n <- nrow(k) 119 | } else { 120 | if (k < 2) { 121 | stop("'k' has to be higher than 2") 122 | } 123 | if (k >= nrow(X)) { 124 | stop("'k' must be lower than nrow(X)") 125 | } 126 | n <- k 127 | } 128 | 129 | kM <- kmeans(x = X, centers = k, iter.max = iter.max, nstart = 1) 130 | id <- 1:nrow(X) 131 | 132 | if (method == 0) { 133 | # select sample within each cluster the closest to the center of the cluster 134 | model <- rep(NA, n) 135 | for (i in 1:n) { 136 | idx <- kM$cluster == i 137 | d <- fastDistV(X[idx, , drop = F], kM$center[i, ], "euclid") # Euclidean distance to the centre of the cluster 138 | model[i] <- id[idx][which.min(d)] 139 | } 140 | } else if (method == 1) { 141 | # select sample within each cluster the farthest apart from the center of the data 142 | d <- fastDistV(X, colMeans(X), "euclid") # Euclidean distance to the centre 143 | model <- by(data.frame(id = id, d = d), kM$cluster, function(x) x$id[which.max(x$d)]) 144 | attributes(model) <- NULL # delete attributes 145 | } else { 146 | # method==2 random sampling within each cluster 147 | model <- tapply(id, kM$cluster, function(x) sample(x, 1)) 148 | } 149 | if (missing(pc)) { 150 | return(list(model = model, test = id[-model], cluster = kM$cluster, centers = kM$centers)) 151 | } else { 152 | return(list(model = model, test = id[-model], pc = X, cluster = kM$cluster, centers = kM$centers)) 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /man/kenStone.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kenstone.R 3 | \name{kenStone} 4 | \alias{kenStone} 5 | \title{Kennard-Stone algorithm for calibration sampling} 6 | \usage{ 7 | kenStone(X, k, metric = "mahal", pc, group, 8 | .center = TRUE, .scale = FALSE, init = NULL) 9 | } 10 | \arguments{ 11 | \item{X}{a numeric matrix.} 12 | 13 | \item{k}{number of calibration samples to be selected.} 14 | 15 | \item{metric}{distance metric to be used: 'euclid' (Euclidean distance) or 16 | 'mahal' (Mahalanobis distance, default).} 17 | 18 | \item{pc}{optional. If not specified, distance are computed in the Euclidean 19 | space. Alternatively, distance are computed 20 | in the principal component score space and \code{pc} is the number of principal 21 | components retained. 22 | If \code{pc < 1}, the number of principal components kept corresponds to the 23 | number of components explaining at least (\code{pc * 100}) percent of the total 24 | variance.} 25 | 26 | \item{group}{An optional \code{factor} (or vector that can be coerced to a factor 27 | by \code{\link{as.factor}}) of length equal to \code{nrow(X)}, giving the identifier 28 | of related observations (e.g. samples of the same batch of measurements, 29 | samples of the same origin, or of the same soil profile). Note that by using 30 | this option in some cases, the number of samples retrieved is not exactly the 31 | one specified in \code{k} as it will depend on the groups. See details.} 32 | 33 | \item{.center}{logical value indicating whether the input matrix should be 34 | centered before Principal Component Analysis. Default set to \code{TRUE}.} 35 | 36 | \item{.scale}{logical value indicating whether the input matrix should be 37 | scaled before Principal Component 38 | Analysis. Default set to \code{FALSE}.} 39 | 40 | \item{init}{(optional) a vector of integers indicating the indices of the 41 | observations/rows that are to be used as observations that must be included 42 | at the first iteration of the search process. Default is \code{NULL}, i.e. no 43 | fixed initialization. The function will take by default the two most distant 44 | observations. If the \code{group} argument is used, then all the observations 45 | in the groups covered by the \code{init} observations will be also included 46 | in the \code{init} subset.} 47 | } 48 | \value{ 49 | a list with the following components: 50 | \itemize{ 51 | \item{\code{model}: numeric vector giving the row indices of the input data 52 | selected for calibration} 53 | \item{\code{test}: numeric vector giving the row indices of the remaining 54 | observations} 55 | \item{\code{pc}: if the \code{pc} argument is specified, a numeric matrix of the 56 | scaled pc scores} 57 | } 58 | } 59 | \description{ 60 | \loadmathjax 61 | Select calibration samples from a large multivariate data using the 62 | Kennard-Stone algorithm 63 | } 64 | \details{ 65 | The Kennard--Stone algorithm allows to select samples with a uniform 66 | distribution over the predictor space (Kennard and Stone, 1969). 67 | It starts by selecting the pair of points that are the farthest apart. 68 | They are assigned to the calibration set and removed from the list of points. 69 | Then, the procedure assigns remaining points to the calibration set 70 | by computing the distance between each unassigned points 71 | \mjeqn{i_0}{i_0} and selected points \mjeqn{i}{i} 72 | and finding the point for which: 73 | 74 | \mjdeqn{d_{selected} = \max\limits_{i_0}(\min\limits_{i}(d_{i,i_{0}}))}{d_{sel ected} = \max_{i_0}(\min_{i}(d_{i,i0}))} 75 | 76 | This essentially selects point \mjeqn{i_0}{i_0} which is the farthest apart from its 77 | closest neighbors \mjeqn{i}{i} in the calibration set. 78 | The algorithm uses the Euclidean distance to select the points. However, 79 | the Mahalanobis distance can also be used. This can be achieved by performing 80 | a PCA on the input data and computing the Euclidean distance on the truncated 81 | score matrix according to the following definition of the Mahalanobis \mjeqn{H}{H} 82 | distance: 83 | 84 | \mjdeqn{H_{ij}^2 = \sum_{a=1}^A (\hat t_{ia} - \hat t_{ja})^{2} / \hat \lambda_a}{H_{ij}^2 = sum_{a=1}^A (hat t_{ia} - hat t_{ja})^{2} / hat lambda_a} 85 | 86 | where \mjeqn{\hat t_{ia}}{hatt_{ia}} is the \mjeqn{a^{th}}{a^{th}} principal component 87 | score of point \mjeqn{i}{i}, \mjeqn{\hat t_{ja}}{hatt_{ja}} is the 88 | corresponding value for point \mjeqn{j}{j}, 89 | \mjeqn{\hat \lambda_a}{hat lambda_a} is the eigenvalue of principal 90 | component \mjeqn{a}{a} and \mjeqn{A}{A} is the number of principal components 91 | included in the computation. 92 | 93 | When the \code{group} argument is used, the sampling is conducted in such a 94 | way that at each iteration, when a single sample is selected, this sample 95 | along with all the samples that belong to its group, are assigned to the 96 | final calibration set. In this respect, at each iteration, the algorithm 97 | will select one sample (in case that sample is the only one in that group) 98 | or more to the calibration set. This also implies that the argument \code{k} 99 | passed to the function will not necessary reflect the exact number of samples 100 | selected. For example, if \code{k = 2} and if the first sample identified 101 | belongs to with group of 5 samples and the second one belongs to a group with 102 | 10 samples, then, the total amount of samples retrieved by the 103 | function will be 15. 104 | } 105 | \examples{ 106 | data(NIRsoil) 107 | sel <- kenStone(NIRsoil$spc, k = 30, pc = .99) 108 | plot(sel$pc[, 1:2], xlab = "PC1", ylab = "PC2") 109 | # points selected for calibration 110 | points(sel$pc[sel$model, 1:2], pch = 19, col = 2) 111 | # Test on artificial data 112 | X <- expand.grid(1:20, 1:20) + rnorm(1e5, 0, .1) 113 | plot(X, xlab = "VAR1", ylab = "VAR2") 114 | sel <- kenStone(X, k = 25, metric = "euclid") 115 | points(X[sel$model, ], pch = 19, col = 2) 116 | 117 | # Using the group argument 118 | library(prospectr) 119 | 120 | # create groups 121 | set.seed(1) 122 | my_groups <- sample(1:275, nrow(NIRsoil$spc), replace = TRUE) 123 | my_groups <- as.factor(my_groups) 124 | 125 | # check the group size 126 | table(my_groups) 127 | 128 | results_group <- kenStone(X = NIRsoil$spc, k = 2, pc = 3, group = my_groups) 129 | 130 | # as the first two samples selected belong to groups 131 | # which have in total more than 2 samples (k). 132 | table(factor(my_groups[results_group$model])) 133 | 134 | } 135 | \references{ 136 | Kennard, R.W., and Stone, L.A., 1969. Computer aided design of experiments. 137 | Technometrics 11, 137-148. 138 | } 139 | \seealso{ 140 | \code{\link{duplex}}, \code{\link{shenkWest}}, \code{\link{naes}}, 141 | \code{\link{honigs}} 142 | } 143 | \author{ 144 | Antoine Stevens & 145 | \href{https://orcid.org/0000-0002-5369-5120}{Leonardo Ramirez-Lopez} with 146 | contributions from Thorsten Behrens and Philipp Baumann 147 | } 148 | -------------------------------------------------------------------------------- /vignettes/prospectr.bib: -------------------------------------------------------------------------------- 1 | @InCollection{barnes1989, 2 | title={Standard normal variate transformation and de-trending of near-infrared diffuse reflectance spectra}, 3 | author={Barnes, RJ and Dhanoa, Mewa Singh and Lister, Susan J}, 4 | journal={Applied spectroscopy}, 5 | volume={43}, 6 | number={5}, 7 | pages={772--777}, 8 | year={1989}, 9 | publisher={SAGE Publications Sage UK: London, England} 10 | } 11 | 12 | @InCollection{clark1984, 13 | title={Reflectance spectroscopy: Quantitative analysis techniques for remote sensing applications}, 14 | author={Clark, Roger N and Roush, Ted L}, 15 | journal={Journal of Geophysical Research: Solid Earth}, 16 | volume={89}, 17 | number={B7}, 18 | pages={6329--6340}, 19 | year={1984}, 20 | publisher={Wiley Online Library} 21 | } 22 | 23 | 24 | @Book{eriksson2006, 25 | title={Multi-and megavariate data analysis}, 26 | author={Eriksson, Lennart and Johansson, Erik and Kettaneh-Wold, N and Trygg, Johan and Wikstr{\"o}m, C and Wold, Svante}, 27 | volume={1}, 28 | year={2006}, 29 | publisher={Umetrics Sweden} 30 | } 31 | 32 | @article{fearn2008, 33 | title={The interaction between standard normal variate and derivatives}, 34 | author={Fearn, Tom}, 35 | journal={NIR news}, 36 | volume={19}, 37 | number={7}, 38 | pages={16--17}, 39 | year={2008}, 40 | publisher={SAGE Publications Sage UK: London, England} 41 | } 42 | 43 | @article{fearn2010, 44 | title={Combining other predictors with NIR spectra}, 45 | author={Fearn, Tom}, 46 | journal={NIR news}, 47 | volume={21}, 48 | number={2}, 49 | pages={13--16}, 50 | year={2010}, 51 | publisher={SAGE Publications Sage UK: London, England} 52 | } 53 | 54 | @article{fernandez2008, 55 | title={Soil parameter quantification by NIRS as a Chemometric challenge at ‘Chimiom{\'e}trie 2006’}, 56 | author={Fernandez-Pierna, Juan Antonio and Dardenne, Pierre}, 57 | journal={Chemometrics and intelligent laboratory systems}, 58 | volume={91}, 59 | number={1}, 60 | pages={94--98}, 61 | year={2008}, 62 | publisher={Elsevier} 63 | } 64 | 65 | @article{honigs1985, 66 | title={Unique-sample selection via near-infrared spectral subtraction}, 67 | author={Honigs, DE and Hieftje, Gary M and Mark, HL and Hirschfeld, TB}, 68 | journal={Analytical Chemistry}, 69 | volume={57}, 70 | number={12}, 71 | pages={2299--2303}, 72 | year={1985}, 73 | publisher={ACS Publications} 74 | } 75 | 76 | @article{kennard1969, 77 | title={Computer aided design of experiments}, 78 | author={Kennard, Ronald W and Stone, Larry A}, 79 | journal={Technometrics}, 80 | volume={11}, 81 | number={1}, 82 | pages={137--148}, 83 | year={1969}, 84 | publisher={Taylor \& Francis} 85 | } 86 | 87 | @article{mullen2007, 88 | title={An Introduction to the'Special Volume Spectroscopy and Chemometrics in R'}, 89 | author={Mullen, Katharine M and van Stokkum, Ivo HM}, 90 | journal={Journal of Statistical Software}, 91 | volume={18}, 92 | number={01}, 93 | year={2007}, 94 | publisher={American Statistical Association} 95 | } 96 | 97 | @misc{naes2002, 98 | title={Outlier detection. A user-friendly guide to multivariate calibration and classification}, 99 | author={Naes, T and Isaksson, T and Fearn, T and Davies, T}, 100 | year={2002}, 101 | publisher={NIR Publications, Chichester} 102 | } 103 | 104 | @article{puchwein1988, 105 | title={Selection of calibration samples for near-infrared spectrometry by factor analysis of spectra}, 106 | author={Puchwein, Gerd}, 107 | journal={Analytical Chemistry}, 108 | volume={60}, 109 | number={6}, 110 | pages={569--573}, 111 | year={1988}, 112 | publisher={ACS Publications} 113 | } 114 | 115 | @article{savitzky1964, 116 | title={Smoothing and differentiation of data by simplified least squares procedures.}, 117 | author={Savitzky, Abraham and Golay, Marcel JE}, 118 | journal={Analytical chemistry}, 119 | volume={36}, 120 | number={8}, 121 | pages={1627--1639}, 122 | year={1964}, 123 | publisher={ACS Publications} 124 | } 125 | 126 | @article{shenk1991, 127 | title={Population definition, sample selection, and calibration procedures for near infrared reflectance spectroscopy}, 128 | author={Shenk, JS and Westerhaus, MO}, 129 | journal={Crop science}, 130 | volume={31}, 131 | number={2}, 132 | pages={469--474}, 133 | year={1991}, 134 | publisher={Crop Science Society of America} 135 | } 136 | 137 | @article{shetty2012, 138 | title={Selection of representative calibration sample sets for near-infrared reflectance spectroscopy to predict nitrogen concentration in grasses}, 139 | author={Shetty, Nisha and Rinnan, {\AA}smund and Gislum, Ren{\'e}}, 140 | journal={Chemometrics and Intelligent Laboratory Systems}, 141 | volume={111}, 142 | number={1}, 143 | pages={59--65}, 144 | year={2012}, 145 | publisher={Elsevier} 146 | } 147 | 148 | @article{snee1977, 149 | title={Validation of regression models: methods and examples}, 150 | author={Snee, Ronald D}, 151 | journal={Technometrics}, 152 | volume={19}, 153 | number={4}, 154 | pages={415--428}, 155 | year={1977}, 156 | publisher={Taylor \& Francis Group} 157 | } 158 | 159 | @article{ramirez2014, 160 | title={Sampling optimal calibration sets in soil infrared spectroscopy}, 161 | author={Ramirez-Lopez, Leonardo and Schmidt, Karsten and Behrens, Thorsten and Van Wesemael, Bas and Dematt{\^e}, Jose AM and Scholten, Thomas}, 162 | journal={Geoderma}, 163 | volume={226}, 164 | pages={140--150}, 165 | year={2014}, 166 | publisher={Elsevier} 167 | } 168 | 169 | @article{rinnan2009review, 170 | title={Review of the most common pre-processing techniques for near-infrared spectra}, 171 | author={Rinnan, {\AA}smund and Van Den Berg, Frans and Engelsen, S{\o}ren Balling}, 172 | journal={TrAC Trends in Analytical Chemistry}, 173 | volume={28}, 174 | number={10}, 175 | pages={1201--1222}, 176 | year={2009}, 177 | publisher={Elsevier} 178 | } 179 | 180 | @article{geladi1985linearization, 181 | title={Linearization and scatter-correction for near-infrared reflectance spectra of meat}, 182 | author={Geladi, P and MacDougall, D and Martens, H}, 183 | journal={Applied spectroscopy}, 184 | volume={39}, 185 | number={3}, 186 | pages={491--500}, 187 | year={1985}, 188 | publisher={Society for Applied Spectroscopy} 189 | } 190 | 191 | @article{hopkins2001norris, 192 | title={What is a Norris derivative?}, 193 | author={Hopkins, David W}, 194 | journal={NIR news}, 195 | volume={12}, 196 | number={3}, 197 | pages={3--5}, 198 | year={2001}, 199 | publisher={SAGE Publications Sage UK: London, England} 200 | } 201 | 202 | @article{luo2005properties, 203 | title={Properties of Savitzky--Golay digital differentiators}, 204 | author={Luo, Jianwen and Ying, Kui and He, Ping and Bai, Jing}, 205 | journal={Digital Signal Processing}, 206 | volume={15}, 207 | number={2}, 208 | pages={122--136}, 209 | year={2005}, 210 | publisher={Elsevier} 211 | } 212 | 213 | 214 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // bitAND 15 | int bitAND(int aa, int bb); 16 | RcppExport SEXP _prospectr_bitAND(SEXP aaSEXP, SEXP bbSEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< int >::type aa(aaSEXP); 21 | Rcpp::traits::input_parameter< int >::type bb(bbSEXP); 22 | rcpp_result_gen = Rcpp::wrap(bitAND(aa, bb)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | // bitSR 27 | int bitSR(int a, int b); 28 | RcppExport SEXP _prospectr_bitSR(SEXP aSEXP, SEXP bSEXP) { 29 | BEGIN_RCPP 30 | Rcpp::RObject rcpp_result_gen; 31 | Rcpp::RNGScope rcpp_rngScope_gen; 32 | Rcpp::traits::input_parameter< int >::type a(aSEXP); 33 | Rcpp::traits::input_parameter< int >::type b(bSEXP); 34 | rcpp_result_gen = Rcpp::wrap(bitSR(a, b)); 35 | return rcpp_result_gen; 36 | END_RCPP 37 | } 38 | // convCppM 39 | NumericMatrix convCppM(NumericMatrix X, NumericVector f); 40 | RcppExport SEXP _prospectr_convCppM(SEXP XSEXP, SEXP fSEXP) { 41 | BEGIN_RCPP 42 | Rcpp::RObject rcpp_result_gen; 43 | Rcpp::RNGScope rcpp_rngScope_gen; 44 | Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); 45 | Rcpp::traits::input_parameter< NumericVector >::type f(fSEXP); 46 | rcpp_result_gen = Rcpp::wrap(convCppM(X, f)); 47 | return rcpp_result_gen; 48 | END_RCPP 49 | } 50 | // convCppV 51 | NumericVector convCppV(NumericVector X, NumericVector f); 52 | RcppExport SEXP _prospectr_convCppV(SEXP XSEXP, SEXP fSEXP) { 53 | BEGIN_RCPP 54 | Rcpp::RObject rcpp_result_gen; 55 | Rcpp::RNGScope rcpp_rngScope_gen; 56 | Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP); 57 | Rcpp::traits::input_parameter< NumericVector >::type f(fSEXP); 58 | rcpp_result_gen = Rcpp::wrap(convCppV(X, f)); 59 | return rcpp_result_gen; 60 | END_RCPP 61 | } 62 | // fastDist 63 | arma::mat fastDist(NumericMatrix X, NumericMatrix Y, String method); 64 | RcppExport SEXP _prospectr_fastDist(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP) { 65 | BEGIN_RCPP 66 | Rcpp::RObject rcpp_result_gen; 67 | Rcpp::RNGScope rcpp_rngScope_gen; 68 | Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); 69 | Rcpp::traits::input_parameter< NumericMatrix >::type Y(YSEXP); 70 | Rcpp::traits::input_parameter< String >::type method(methodSEXP); 71 | rcpp_result_gen = Rcpp::wrap(fastDist(X, Y, method)); 72 | return rcpp_result_gen; 73 | END_RCPP 74 | } 75 | // fastDistV 76 | NumericVector fastDistV(NumericMatrix X, NumericVector Y, String method); 77 | RcppExport SEXP _prospectr_fastDistV(SEXP XSEXP, SEXP YSEXP, SEXP methodSEXP) { 78 | BEGIN_RCPP 79 | Rcpp::RObject rcpp_result_gen; 80 | Rcpp::RNGScope rcpp_rngScope_gen; 81 | Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); 82 | Rcpp::traits::input_parameter< NumericVector >::type Y(YSEXP); 83 | Rcpp::traits::input_parameter< String >::type method(methodSEXP); 84 | rcpp_result_gen = Rcpp::wrap(fastDistV(X, Y, method)); 85 | return rcpp_result_gen; 86 | END_RCPP 87 | } 88 | // get_msc_coeff 89 | NumericMatrix get_msc_coeff(arma::mat X, arma::vec ref_spectrum); 90 | RcppExport SEXP _prospectr_get_msc_coeff(SEXP XSEXP, SEXP ref_spectrumSEXP) { 91 | BEGIN_RCPP 92 | Rcpp::RObject rcpp_result_gen; 93 | Rcpp::RNGScope rcpp_rngScope_gen; 94 | Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP); 95 | Rcpp::traits::input_parameter< arma::vec >::type ref_spectrum(ref_spectrumSEXP); 96 | rcpp_result_gen = Rcpp::wrap(get_msc_coeff(X, ref_spectrum)); 97 | return rcpp_result_gen; 98 | END_RCPP 99 | } 100 | // resample_fwhm 101 | NumericMatrix resample_fwhm(NumericMatrix X, NumericVector wav, NumericVector new_wav, NumericVector fwhm); 102 | RcppExport SEXP _prospectr_resample_fwhm(SEXP XSEXP, SEXP wavSEXP, SEXP new_wavSEXP, SEXP fwhmSEXP) { 103 | BEGIN_RCPP 104 | Rcpp::RObject rcpp_result_gen; 105 | Rcpp::RNGScope rcpp_rngScope_gen; 106 | Rcpp::traits::input_parameter< NumericMatrix >::type X(XSEXP); 107 | Rcpp::traits::input_parameter< NumericVector >::type wav(wavSEXP); 108 | Rcpp::traits::input_parameter< NumericVector >::type new_wav(new_wavSEXP); 109 | Rcpp::traits::input_parameter< NumericVector >::type fwhm(fwhmSEXP); 110 | rcpp_result_gen = Rcpp::wrap(resample_fwhm(X, wav, new_wav, fwhm)); 111 | return rcpp_result_gen; 112 | END_RCPP 113 | } 114 | // resample_fwhm_vec 115 | NumericVector resample_fwhm_vec(NumericVector X, NumericVector wav, NumericVector new_wav, NumericVector fwhm); 116 | RcppExport SEXP _prospectr_resample_fwhm_vec(SEXP XSEXP, SEXP wavSEXP, SEXP new_wavSEXP, SEXP fwhmSEXP) { 117 | BEGIN_RCPP 118 | Rcpp::RObject rcpp_result_gen; 119 | Rcpp::RNGScope rcpp_rngScope_gen; 120 | Rcpp::traits::input_parameter< NumericVector >::type X(XSEXP); 121 | Rcpp::traits::input_parameter< NumericVector >::type wav(wavSEXP); 122 | Rcpp::traits::input_parameter< NumericVector >::type new_wav(new_wavSEXP); 123 | Rcpp::traits::input_parameter< NumericVector >::type fwhm(fwhmSEXP); 124 | rcpp_result_gen = Rcpp::wrap(resample_fwhm_vec(X, wav, new_wav, fwhm)); 125 | return rcpp_result_gen; 126 | END_RCPP 127 | } 128 | // residLm 129 | NumericMatrix residLm(NumericMatrix Yr, NumericMatrix Xr); 130 | RcppExport SEXP _prospectr_residLm(SEXP YrSEXP, SEXP XrSEXP) { 131 | BEGIN_RCPP 132 | Rcpp::RObject rcpp_result_gen; 133 | Rcpp::RNGScope rcpp_rngScope_gen; 134 | Rcpp::traits::input_parameter< NumericMatrix >::type Yr(YrSEXP); 135 | Rcpp::traits::input_parameter< NumericMatrix >::type Xr(XrSEXP); 136 | rcpp_result_gen = Rcpp::wrap(residLm(Yr, Xr)); 137 | return rcpp_result_gen; 138 | END_RCPP 139 | } 140 | 141 | static const R_CallMethodDef CallEntries[] = { 142 | {"_prospectr_bitAND", (DL_FUNC) &_prospectr_bitAND, 2}, 143 | {"_prospectr_bitSR", (DL_FUNC) &_prospectr_bitSR, 2}, 144 | {"_prospectr_convCppM", (DL_FUNC) &_prospectr_convCppM, 2}, 145 | {"_prospectr_convCppV", (DL_FUNC) &_prospectr_convCppV, 2}, 146 | {"_prospectr_fastDist", (DL_FUNC) &_prospectr_fastDist, 3}, 147 | {"_prospectr_fastDistV", (DL_FUNC) &_prospectr_fastDistV, 3}, 148 | {"_prospectr_get_msc_coeff", (DL_FUNC) &_prospectr_get_msc_coeff, 2}, 149 | {"_prospectr_resample_fwhm", (DL_FUNC) &_prospectr_resample_fwhm, 4}, 150 | {"_prospectr_resample_fwhm_vec", (DL_FUNC) &_prospectr_resample_fwhm_vec, 4}, 151 | {"_prospectr_residLm", (DL_FUNC) &_prospectr_residLm, 2}, 152 | {NULL, NULL, 0} 153 | }; 154 | 155 | RcppExport void R_init_prospectr(DllInfo *dll) { 156 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 157 | R_useDynamicSymbols(dll, FALSE); 158 | } 159 | --------------------------------------------------------------------------------