├── .Rbuildignore
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test.get_ecosis_data.R
    │   ├── test.simple_plsr.R
    │   ├── test.create_data_split.R
    │   └── test.optimal_components.R
├── data
    └── ely_plsr_data.rda
├── spectratrait_1.2.6.pdf
├── vignettes
    ├── kit_sla_ex3.pdf
    ├── ely_etal_ex1.pdf
    ├── ely_etal_ex2.pdf
    ├── neon_lma_ex4.pdf
    ├── reseco_leafN_ex6.pdf
    ├── reseco_leafN_ex7.pdf
    ├── reseco_lma_ex8.pdf
    ├── neon_canopy_leafN_ex5.pdf
    ├── neon_canopy_lma_ex10.pdf
    ├── sserbin2019_plsr_ex9.pdf
    ├── kit_sla_ex3_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   └── unnamed-chunk-16-1.png
    ├── ely_etal_ex1_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── ely_etal_ex2_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── neon_lma_ex4_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── reseco_leafN_ex6_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   └── unnamed-chunk-16-1.png
    ├── reseco_leafN_ex7_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   └── unnamed-chunk-16-1.png
    ├── reseco_lma_ex8_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-17-1.png
    │   │   ├── unnamed-chunk-18-1.png
    │   │   └── unnamed-chunk-7-1.png
    ├── neon_canopy_leafN_ex5_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   └── unnamed-chunk-16-1.png
    ├── neon_canopy_lma_ex10_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-16-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── sserbin2019_plsr_ex9_files
    │   └── figure-gfm
    │   │   └── unnamed-chunk-11-1.png
    ├── sserbin2019_plsr_ex9.Rmd
    ├── sserbin2019_plsr_ex9.md
    ├── ely_etal_ex2.Rmd
    └── ely_etal_ex1.Rmd
├── R
    ├── plsr_data.R
    ├── get_ecosis_data.R
    ├── f.coef.valid.R
    ├── vip.R
    ├── f.plot.coef.R
    ├── f.plot.spec.R
    ├── utils.R
    ├── create_data_split.R
    ├── pls_permutation.R
    └── find_optimal_components.R
├── man
    ├── grapes-notin-grapes.Rd
    ├── testForPackage.Rd
    ├── VIP.Rd
    ├── VIPjh.Rd
    ├── ely_plsr_data.Rd
    ├── source_GitHubData.Rd
    ├── f.coef.valid.Rd
    ├── get_ecosis_data.Rd
    ├── f.plot.coef.Rd
    ├── percent_rmse.Rd
    ├── f.plot.spec.Rd
    ├── create_data_split.Rd
    ├── pls_permutation.Rd
    ├── find_optimal_components.Rd
    ├── find_optimal_comp_by_groups.Rd
    └── pls_permutation_by_groups.Rd
├── inst
    └── scripts
    │   ├── install_dependencies.R
    │   ├── pull_data_from_ecosis.R
    │   ├── apply_sserbin2019_lma_plsr_to_ely_example.R
    │   ├── apply_sserbin2019_lma_plsr_to_neon_example.R
    │   ├── apply_sserbin2014_leafN_plsr_to_ext_data_example.R
    │   └── simple_spectra-trait_plsr_example.R
├── .gitignore
├── NAMESPACE
├── data-raw
    └── plsr_data.R
├── .github
    └── workflows
    │   ├── run_ecosis_pull_example.yaml
    │   ├── check-r-release.yaml
    │   ├── run_plsr_example_auto.yaml
    │   ├── ci-run_plsr_example.yaml
    │   ├── ci-weekly.yaml
    │   └── check-os.yaml
├── DESCRIPTION
└── README.md


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^data-raw$
2 | ^doc$
3 | ^Meta$
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | test_check("spectratrait")


--------------------------------------------------------------------------------
/data/ely_plsr_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/data/ely_plsr_data.rda


--------------------------------------------------------------------------------
/spectratrait_1.2.6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/spectratrait_1.2.6.pdf


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3.pdf


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1.pdf


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2.pdf


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4.pdf


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6.pdf


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7.pdf


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8.pdf


--------------------------------------------------------------------------------
/R/plsr_data.R:
--------------------------------------------------------------------------------
1 | #' Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061
2 | "ely_plsr_data"


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5.pdf


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10.pdf


--------------------------------------------------------------------------------
/vignettes/sserbin2019_plsr_ex9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/sserbin2019_plsr_ex9.pdf


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/vignettes/sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/tests/testthat/test.get_ecosis_data.R:
--------------------------------------------------------------------------------
1 | context("Grabbing data via the EcoSIS API works")
2 | 
3 | test_that("Downloading data from EcoSIS doesnt throw an error", {
4 |   ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9"
5 |   dat_raw <- get_ecosis_data(ecosis_id = ecosis_id)
6 |   expect_true(!is.null(dat_raw))
7 | })


--------------------------------------------------------------------------------
/man/grapes-notin-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{\%notin\%}
 4 | \alias{\%notin\%}
 5 | \title{Not \%in\% function}
 6 | \usage{
 7 | x \%notin\% table
 8 | }
 9 | \arguments{
10 | \item{x}{initial list}
11 | 
12 | \item{table}{list to check against}
13 | }
14 | \description{
15 | Not \%in\% function
16 | }
17 | 


--------------------------------------------------------------------------------
/man/testForPackage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{testForPackage}
 4 | \alias{testForPackage}
 5 | \title{Function to check for installed package}
 6 | \usage{
 7 | testForPackage(pkg)
 8 | }
 9 | \arguments{
10 | \item{pkg}{name of package to check if installed
11 | not presently used}
12 | }
13 | \description{
14 | Function to check for installed package
15 | }
16 | 


--------------------------------------------------------------------------------
/man/VIP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vip.R
 3 | \name{VIP}
 4 | \alias{VIP}
 5 | \title{VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix.}
 6 | \usage{
 7 | VIP(object)
 8 | }
 9 | \arguments{
10 | \item{object}{fitted pls::plsr object}
11 | }
12 | \description{
13 | VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/VIPjh.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vip.R
 3 | \name{VIPjh}
 4 | \alias{VIPjh}
 5 | \title{VIPjh returns the VIP of variable j with h components}
 6 | \usage{
 7 | VIPjh(object, j, h)
 8 | }
 9 | \arguments{
10 | \item{object}{fitted pls::plsr object}
11 | 
12 | \item{j}{which variable in the fitted pls::plsr object}
13 | 
14 | \item{h}{the number of components in the fitted pls::plsr object to calculate the VIP}
15 | }
16 | \description{
17 | VIPjh returns the VIP of variable j with h components
18 | }
19 | 


--------------------------------------------------------------------------------
/man/ely_plsr_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plsr_data.R
 3 | \docType{data}
 4 | \name{ely_plsr_data}
 5 | \alias{ely_plsr_data}
 6 | \title{Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061}
 7 | \format{
 8 | An object of class \code{data.frame} with 178 rows and 1908 columns.
 9 | }
10 | \usage{
11 | ely_plsr_data
12 | }
13 | \description{
14 | Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/source_GitHubData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{source_GitHubData}
 4 | \alias{source_GitHubData}
 5 | \title{Function to source text data from GitHub}
 6 | \usage{
 7 | source_GitHubData(url, sep = ",", header = TRUE)
 8 | }
 9 | \arguments{
10 | \item{url}{http/https URL to the github dataset}
11 | 
12 | \item{sep}{dataset file delimiter}
13 | 
14 | \item{header}{TRUE/FALSE does the file have a column header?}
15 | }
16 | \description{
17 | Function to source text data from GitHub
18 | }
19 | \author{
20 | gist.github.com/christophergandrud/4466237
21 | }
22 | 


--------------------------------------------------------------------------------
/man/f.coef.valid.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/f.coef.valid.R
 3 | \name{f.coef.valid}
 4 | \alias{f.coef.valid}
 5 | \title{f.coef.valid}
 6 | \usage{
 7 | f.coef.valid(plsr.out, data_plsr, ncomp, inVar)
 8 | }
 9 | \arguments{
10 | \item{plsr.out}{plsr model obtained with jaccknife = TRUE}
11 | 
12 | \item{data_plsr}{data used for the plsr model with Spectra the matrix of spectra}
13 | 
14 | \item{ncomp}{number of selection components}
15 | 
16 | \item{inVar}{Name of the PLSR model response variable}
17 | }
18 | \value{
19 | B returns the intercept and the coefficients of the jackknife or bootstrap validation
20 | }
21 | \description{
22 | f.coef.valid
23 | }
24 | \author{
25 | Julien Lamour
26 | }
27 | 


--------------------------------------------------------------------------------
/man/get_ecosis_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_ecosis_data.R
 3 | \name{get_ecosis_data}
 4 | \alias{get_ecosis_data}
 5 | \title{Function to pull data from EcoSIS using the EcoSIS API}
 6 | \usage{
 7 | get_ecosis_data(ecosis_id = NULL)
 8 | }
 9 | \arguments{
10 | \item{ecosis_id}{the alphanumeric EcoSIS API dataset ID}
11 | }
12 | \value{
13 | EcoSIS spectral dataset object
14 | }
15 | \description{
16 | Function to pull data from EcoSIS using the EcoSIS API
17 | }
18 | \examples{
19 | \dontrun{ 
20 | ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9"
21 | dat_raw <- get_ecosis_data(ecosis_id = ecosis_id)
22 | head(dat_raw)
23 | names(dat_raw)[1:40]
24 | }
25 | 
26 | }
27 | \author{
28 | Shawn P. Serbin, Alexey Shiklomanov
29 | }
30 | 


--------------------------------------------------------------------------------
/inst/scripts/install_dependencies.R:
--------------------------------------------------------------------------------
 1 | ####################################################################################################
 2 | # Install dependencies
 3 | #
 4 | ####################################################################################################
 5 | 
 6 | 
 7 | #--------------------------------------------------------------------------------------------------#
 8 | req.packages <- c("devtools","remotes","readr","RCurl","httr","pls","dplyr","reshape2","here",
 9 |                   "plotrix","scales","ggplot2","gridExtra")
10 | new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])]
11 | if(length(new.packages)) install.packages(new.packages, dependencies=TRUE)
12 | #--------------------------------------------------------------------------------------------------#


--------------------------------------------------------------------------------
/man/f.plot.coef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/f.plot.coef.R
 3 | \name{f.plot.coef}
 4 | \alias{f.plot.coef}
 5 | \title{f.plot.coef}
 6 | \usage{
 7 | f.plot.coef(
 8 |   Z,
 9 |   wv,
10 |   xlim = NULL,
11 |   position = "topright",
12 |   type = "Coefficient",
13 |   plot_label = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{Z}{Coefficient matrix with each row corresponding to the coefficients and wavelength in columns}
18 | 
19 | \item{wv}{vector of wavelengths}
20 | 
21 | \item{xlim}{vector to change the default xlim of the plots (ex xlim = c(500, 2400))}
22 | 
23 | \item{position}{Position of the legend (see base function legend for help)}
24 | 
25 | \item{type}{Name of the y axis and of the legend}
26 | 
27 | \item{plot_label}{optional plot label to include with the figure}
28 | }
29 | \description{
30 | f.plot.coef
31 | }
32 | \author{
33 | Julien Lamour
34 | }
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # R test files
 2 | Rplots.pdf
 3 | 
 4 | # History files
 5 | .Rhistory
 6 | .Rapp.history
 7 | 
 8 | # Session Data files
 9 | .RData
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | 
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.nb.html
26 | ## commented out to preserve pdf for display in GitHub
27 | #vignettes/*.pdf
28 | #vignettes/*_files/
29 | 
30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
31 | .httr-oauth
32 | 
33 | # knitr and R markdown default cache directories
34 | /*_cache/
35 | /cache/
36 | 
37 | # Temporary files created by R markdown
38 | *.utf8.md
39 | *.knit.md
40 | 
41 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
42 | rsconnect/
43 | 
44 | # MacOSX
45 | *.DS*
46 | *._*
47 | /doc/
48 | /Meta/
49 | 


--------------------------------------------------------------------------------
/man/percent_rmse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{percent_rmse}
 4 | \alias{percent_rmse}
 5 | \title{Calculate RMSE and percent RMSE with PLSR model results}
 6 | \usage{
 7 | percent_rmse(
 8 |   plsr_dataset = NULL,
 9 |   inVar = NULL,
10 |   residuals = NULL,
11 |   range = "full"
12 | )
13 | }
14 | \arguments{
15 | \item{plsr_dataset}{input plsr dataset}
16 | 
17 | \item{inVar}{the trait variable used in the calculation of RMSE}
18 | 
19 | \item{residuals}{predicted minus observed residual vector from either
20 | a cross-validation CV or independent validation}
21 | 
22 | \item{range}{calculate over the full data range or the 95\% of data range.
23 | options full or 95perc}
24 | }
25 | \value{
26 | output a list containing the rmse and perc_rmse.
27 | output <- list(rmse = rmse, perc_rmse = perc_rmse)
28 | }
29 | \description{
30 | Calculate RMSE and percent RMSE with PLSR model results
31 | }
32 | \author{
33 | Shawn P. Serbin
34 | }
35 | 


--------------------------------------------------------------------------------
/R/get_ecosis_data.R:
--------------------------------------------------------------------------------
 1 | ##' Function to pull data from EcoSIS using the EcoSIS API
 2 | ##' 
 3 | ##' @param ecosis_id the alphanumeric EcoSIS API dataset ID
 4 | ##' 
 5 | ##' @examples 
 6 | ##' \dontrun{ 
 7 | ##' ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9"
 8 | ##' dat_raw <- get_ecosis_data(ecosis_id = ecosis_id)
 9 | ##' head(dat_raw)
10 | ##' names(dat_raw)[1:40]
11 | ##' }
12 | ##' 
13 | ##' @importFrom readr read_csv
14 | ##' 
15 | ##' @return EcoSIS spectral dataset object
16 | ##' 
17 | ##' @author Shawn P. Serbin, Alexey Shiklomanov
18 | ##' @export
19 | get_ecosis_data <- function(ecosis_id = NULL) {
20 |   if(!is.null(ecosis_id)) {
21 |     print("**** Downloading Ecosis data ****")
22 |     ecosis_id <- ecosis_id
23 |     ecosis_file <- sprintf(
24 |       "https://ecosis.org/api/package/%s/export?metadata=true",
25 |       ecosis_id)
26 |     message("Downloading data...")
27 |     dat_raw <- readr::read_csv(ecosis_file)
28 |     message("Download complete!")
29 |     return(dat_raw)
30 |   } else {
31 |     stop("**** No EcoSIS ID provided.  Please provide a valid ID before proceeding ****")
32 |   }
33 | }


--------------------------------------------------------------------------------
/man/f.plot.spec.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/f.plot.spec.R
 3 | \name{f.plot.spec}
 4 | \alias{f.plot.spec}
 5 | \title{f.plot.spec}
 6 | \usage{
 7 | f.plot.spec(
 8 |   Z,
 9 |   wv,
10 |   xlim = NULL,
11 |   position = "topright",
12 |   type = "Reflectance",
13 |   plot_label = NULL,
14 |   CI = 95
15 | )
16 | }
17 | \arguments{
18 | \item{Z}{Spectra matrix with each row corresponding to a spectra and wavelength in columns}
19 | 
20 | \item{wv}{vector of wavelengths corresponding to the column of the spectra matrix Z}
21 | 
22 | \item{xlim}{vector to change the default xlim of the plots (ex xlim = c(500, 2400))}
23 | 
24 | \item{position}{Position of the legend (see base function legend for help)}
25 | 
26 | \item{type}{Name of the y axis and of the legend. E.g. Reflectance, Transmittance}
27 | 
28 | \item{plot_label}{optional plot label to include with the figure}
29 | 
30 | \item{CI}{Desired confidence interval for the spectra plot. Options are: 95
31 | or 90. Default is: 95}
32 | }
33 | \description{
34 | f.plot.spec
35 | }
36 | \author{
37 | Julien Lamour, Shawn P. Serbin, Andrés Baresch
38 | }
39 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%notin%")
 4 | export(VIP)
 5 | export(VIPjh)
 6 | export(create_data_split)
 7 | export(f.coef.valid)
 8 | export(f.plot.coef)
 9 | export(f.plot.spec)
10 | export(find_optimal_comp_by_groups)
11 | export(find_optimal_components)
12 | export(get_ecosis_data)
13 | export(percent_rmse)
14 | export(pls_permutation)
15 | export(pls_permutation_by_groups)
16 | export(source_GitHubData)
17 | import(ggplot2)
18 | import(httr)
19 | importFrom(dplyr,all_of)
20 | importFrom(dplyr,group_by_at)
21 | importFrom(dplyr,mutate)
22 | importFrom(dplyr,n)
23 | importFrom(dplyr,row_number)
24 | importFrom(dplyr,slice)
25 | importFrom(dplyr,vars)
26 | importFrom(graphics,box)
27 | importFrom(graphics,legend)
28 | importFrom(graphics,lines)
29 | importFrom(graphics,polygon)
30 | importFrom(magrittr,"%>%")
31 | importFrom(pls,plsr)
32 | importFrom(pls,selectNcomp)
33 | importFrom(readr,read_csv)
34 | importFrom(reshape2,melt)
35 | importFrom(stats,as.formula)
36 | importFrom(stats,coef)
37 | importFrom(stats,predict)
38 | importFrom(stats,quantile)
39 | importFrom(stats,t.test)
40 | importFrom(utils,flush.console)
41 | importFrom(utils,read.table)
42 | importFrom(utils,setTxtProgressBar)
43 | importFrom(utils,txtProgressBar)
44 | 


--------------------------------------------------------------------------------
/man/create_data_split.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/create_data_split.R
 3 | \name{create_data_split}
 4 | \alias{create_data_split}
 5 | \title{Create a calibration (training) / validation data split for PLSR model fitting and testing}
 6 | \usage{
 7 | create_data_split(
 8 |   dataset = NULL,
 9 |   approach = NULL,
10 |   split_seed = 123456789,
11 |   prop = 0.8,
12 |   group_variables = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{dataset}{input full PLSR dataset to split into cal/val datasets}
17 | 
18 | \item{approach}{approach to splitting the dataset. Options: base or dplyr}
19 | 
20 | \item{split_seed}{random seed to use for splitting data}
21 | 
22 | \item{prop}{the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2).
23 | This sets the calibration proportion}
24 | 
25 | \item{group_variables}{Use factor variables to conduct a stratified sampling for cal/val}
26 | }
27 | \value{
28 | output_list A list containing the calibration dataset (cal_data)
29 | and validation dataset (val_data)
30 | }
31 | \description{
32 | Create a calibration (training) / validation data split for PLSR model fitting and testing
33 | }
34 | \author{
35 | Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
36 | }
37 | 


--------------------------------------------------------------------------------
/data-raw/plsr_data.R:
--------------------------------------------------------------------------------
 1 | getwd()
 2 | devtools::load_all()
 3 | library(dplyr)
 4 | `%notin%` <- Negate(`%in%`)
 5 | 
 6 | # get data
 7 | ecosis_id <- "25770ad9-d47c-428b-bf99-d1543a4b0ec9"
 8 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 9 | head(dat_raw)
10 | names(dat_raw)[1:60]
11 | 
12 | # setup data
13 | Start.wave <- 500
14 | End.wave <- 2400
15 | wv <- seq(Start.wave,End.wave,1)
16 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
17 | colnames(Spectra) <- c(paste0("Wave_",wv))
18 | head(Spectra)[1:6,1:10]
19 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
20 | head(sample_info)
21 | 
22 | sample_info2 <- sample_info %>%
23 |   select(Species_Code=`USDA Symbol`, Common_Name=`Common Name`, C_N_mass, C_g_m2, 
24 |          H20_g_m2, LMA_g_m2, N_g_m2, QC)
25 | 
26 | plsr_data <- data.frame(sample_info2,Spectra)
27 | plsr_data <- plsr_data %>%
28 |   filter(is.na(QC)) %>%
29 |   select(-QC)
30 | plsr_data <- plsr_data[complete.cases(plsr_data),]
31 | 
32 | write.csv(x = plsr_data, file = "ely_plsr_data.csv", row.names = F)
33 | ely_plsr_data <- read.csv("ely_plsr_data.csv", header = T)
34 | 
35 | usethis::use_data(ely_plsr_data, overwrite = TRUE, internal = FALSE)
36 |                               
37 |                               
38 |                               


--------------------------------------------------------------------------------
/R/f.coef.valid.R:
--------------------------------------------------------------------------------
 1 | ## Return the intercept and the coefficients of the jackknife validation
 2 | ##' @title f.coef.valid
 3 | ##' 
 4 | ##' @param plsr.out plsr model obtained with jaccknife = TRUE 
 5 | ##' @param data_plsr data used for the plsr model with Spectra the matrix of spectra
 6 | ##' @param ncomp number of selection components
 7 | ##' @param inVar Name of the PLSR model response variable
 8 | ##' 
 9 | ##' @return B returns the intercept and the coefficients of the jackknife or bootstrap validation 
10 | ##' 
11 | ##' @author Julien Lamour
12 | ##' @export
13 | f.coef.valid <- function(plsr.out, data_plsr, ncomp, inVar) {
14 |   ## Only work in the case where center=TRUE in the plsr model
15 |   B <- plsr.out$validation$coefficients[, , ncomp,, drop = FALSE]
16 |   dB <- dim(B)
17 |   dB[1] <- dB[1] + 1
18 |   dnB <- dimnames(B)
19 |   dnB[[1]] <- c("(Intercept)", dnB[[1]])
20 |   BInt <- array(dim = dB, dimnames = dnB)
21 |   BInt[-1, , ,] <- B
22 |   nseg=dB[[4]]
23 |   for (i in 1:nseg){
24 |     Y<-data_plsr[,inVar]
25 |     Y<-Y[-plsr.out$validation$segments[[i]]]
26 |     Ymeans<-mean(Y)
27 |     X<-data_plsr$Spectra
28 |     X<-X[-plsr.out$validation$segments[[i]],]
29 |     Xmeans<-colMeans(X)
30 |     BInt[1, , ,i] <- Ymeans - Xmeans %*% B[, , , i]
31 |   } 
32 |   B <- BInt
33 |   return(B)
34 | }


--------------------------------------------------------------------------------
/tests/testthat/test.simple_plsr.R:
--------------------------------------------------------------------------------
 1 | context("*** Test simple PLSR model fit using Ely et al. (2019) data ***")
 2 | 
 3 | ### Setup data for tests
 4 | #Load Ely et al 2019 dataset
 5 | data("ely_plsr_data")
 6 | inVar <- "N_g_m2"
 7 | Start.wave <- 500
 8 | End.wave <- 2400
 9 | wv <- seq(Start.wave,End.wave,1)
10 | plsr_data <- ely_plsr_data
11 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))])
12 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))],
13 |                         Spectra=I(spec))
14 | 
15 | test_that("Test fitting simple PLSR model", {
16 |   method <- "firstPlateau"
17 |   random_seed <- 1245565
18 |   seg <- 50
19 |   maxComps <- 20
20 |   iterations <- 80
21 |   prop <- 0.70
22 |   
23 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
24 |                                                   method=method, maxComps=maxComps, 
25 |                                                   iterations=iterations, seg=seg, prop=prop, 
26 |                                                   random_seed=random_seed)
27 |   
28 |   segs <- 100
29 |   plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="CV",
30 |                    segments=segs, segment.type="interleaved",trace=FALSE,data=plsr_data)
31 |   fit <- plsr.out$fitted.values[,1,nComps]
32 |   expect_type(fit, "double")
33 | })


--------------------------------------------------------------------------------
/.github/workflows/run_ecosis_pull_example.yaml:
--------------------------------------------------------------------------------
 1 | name: run_ecosis_pull_example
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron:  '15 1 */5 * *'
 6 | 
 7 | env:
 8 |   R_LIBS_USER: /usr/local/lib/R/site-library
 9 |   LC_ALL: en_US.UTF-8
10 |   NCPUS: 2
11 | 
12 | jobs:
13 |   run-plsr: 
14 |     # The type of runner that the job will run on
15 |     runs-on: ubuntu-latest
16 | 
17 |     env:
18 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 |       R_KEEP_PKG_SOURCE: yes
20 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
21 | 
22 |     steps:
23 |       - uses: actions/checkout@v4
24 | 
25 |       - uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           use-public-rspm: true
28 | 
29 |       - uses: r-lib/actions/setup-r-dependencies@v2
30 | 
31 |       - name: Query dependencies
32 |         run: |
33 |           install.packages('remotes')
34 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
35 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
36 |         shell: Rscript {0}
37 |       
38 |       - name: Install dependencies
39 |         run: |
40 |           Rscript -e 'remotes::install_github(repo="TESTgroup-BNL/spectratrait", dependencies=TRUE)'
41 | 
42 |       # Run R script
43 |       - name: Run EcoSIS API Pull Example
44 |         run: |
45 |           source("inst/scripts/pull_data_from_ecosis.R")
46 |         shell: Rscript {0}


--------------------------------------------------------------------------------
/man/pls_permutation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pls_permutation.R
 3 | \name{pls_permutation}
 4 | \alias{pls_permutation}
 5 | \title{Run a PLSR model permutation analysis. Can be used to determine the optimal number of components
 6 | or conduct a boostrap uncertainty analysis}
 7 | \usage{
 8 | pls_permutation(
 9 |   dataset = NULL,
10 |   targetVariable = NULL,
11 |   maxComps = 20,
12 |   iterations = 20,
13 |   prop = 0.7,
14 |   verbose = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
19 | 
20 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
21 | Usually the "inVar" variable set at the beginning of a PLS script}
22 | 
23 | \item{maxComps}{maximum number of components to use for each PLSR fit}
24 | 
25 | \item{iterations}{how many different permutations to run}
26 | 
27 | \item{prop}{proportion of data to preserve for each permutation}
28 | 
29 | \item{verbose}{Should the function report the current iteration status/progress to the terminal
30 | or run silently? TRUE/FALSE. Default FALSE}
31 | }
32 | \value{
33 | output a list containing the PRESS and coef_array.
34 | output <- list(PRESS=press.out, coef_array=coefs)
35 | }
36 | \description{
37 | See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123
38 | }
39 | \author{
40 | Julien Lamour, Shawn P. Serbin
41 | }
42 | 


--------------------------------------------------------------------------------
/man/find_optimal_components.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_optimal_components.R
 3 | \name{find_optimal_components}
 4 | \alias{find_optimal_components}
 5 | \title{Applies different methods for the determination of the optimal number of PLSR model components}
 6 | \usage{
 7 | find_optimal_components(
 8 |   dataset = NULL,
 9 |   targetVariable = NULL,
10 |   method = "pls",
11 |   maxComps = 20,
12 |   iterations = 20,
13 |   seg = 100,
14 |   prop = 0.7,
15 |   random_seed = 123456789
16 | )
17 | }
18 | \arguments{
19 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
20 | 
21 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
22 | Usually the "inVar" variable set at the beginning of a PLS script}
23 | 
24 | \item{method}{Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin}
25 | 
26 | \item{maxComps}{maximum number of components to consider}
27 | 
28 | \item{iterations}{how many different permutations to run}
29 | 
30 | \item{seg}{For the built-in pls method, how many different data segments to select from the input dataset}
31 | 
32 | \item{prop}{proportion of data to preserve for each permutation}
33 | 
34 | \item{random_seed}{random seed to use for splitting data}
35 | }
36 | \value{
37 | nComps the optimal number of PLSR components
38 | }
39 | \description{
40 | Applies different methods for the determination of the optimal number of PLSR model components
41 | }
42 | \author{
43 | Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
44 | }
45 | 


--------------------------------------------------------------------------------
/.github/workflows/check-r-release.yaml:
--------------------------------------------------------------------------------
 1 | name: R-CMD-check-R-release
 2 | 
 3 | # CI to test package on different versions of R, rel-1, rel, rel+1
 4 | on:
 5 |   push:
 6 |     branches: [main, master]
 7 |   pull_request:
 8 |     branches: [main, master]
 9 | 
10 | env:
11 |   R_LIBS_USER: /usr/local/lib/R/site-library
12 |   LC_ALL: en_US.UTF-8
13 |   NCPUS: 2
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ubuntu-latest
18 |     env:
19 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 |       R_KEEP_PKG_SOURCE: yes
21 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
22 | 
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         R:
27 |           - "4.0"
28 |           - "4.1"
29 |           - "4.2"
30 |           - "4.3"
31 |           - "4.4"
32 |           - "4.5"
33 | 
34 |     steps:
35 |       #check out source code
36 |       - uses: actions/checkout@v4
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       # rcmdcheck but do not build vignettes
48 |       - name: Run Fast Build Check Across R Releases
49 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error")
50 |         shell: Rscript {0}
51 |         
52 |       - name: Upload check results
53 |         if: failure()
54 |         uses: actions/upload-artifact@v4
55 |         with:
56 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
57 |           path: check


--------------------------------------------------------------------------------
/.github/workflows/run_plsr_example_auto.yaml:
--------------------------------------------------------------------------------
 1 | name: run_PLSR_example-auto
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron:  '0 0 */2 * *'
 6 | 
 7 | env:
 8 |   R_LIBS_USER: /usr/local/lib/R/site-library
 9 |   LC_ALL: en_US.UTF-8
10 |   NCPUS: 2
11 | 
12 | jobs:
13 |   run-plsr: 
14 |     # The type of runner that the job will run on
15 |     runs-on: ubuntu-latest
16 | 
17 |     env:
18 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
19 |       R_KEEP_PKG_SOURCE: yes
20 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
21 |       
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         R:
26 |           - "4.0"
27 |           - "4.1"
28 |           - "4.2"
29 |           - "4.3"
30 |           - "4.4"
31 |           - "4.5"
32 |  
33 |     steps:
34 |       #check out source code
35 |       - uses: actions/checkout@v4
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           use-public-rspm: true
40 | 
41 |       - uses: r-lib/actions/setup-r-dependencies@v2
42 | 
43 |       - name: Query dependencies
44 |         run: |
45 |           install.packages('remotes')
46 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
47 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
48 |         shell: Rscript {0}
49 |       
50 |       - name: Install dependencies
51 |         run: |
52 |           Rscript -e 'remotes::install_github(repo="TESTgroup-BNL/spectratrait", dependencies=TRUE)'
53 | 
54 |       # Run R script
55 |       - name: Run Ely et al. (2019) PLSR Example
56 |         run: |
57 |           source("inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R")
58 |         shell: Rscript {0}


--------------------------------------------------------------------------------
/R/vip.R:
--------------------------------------------------------------------------------
 1 | ##' @title VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix.
 2 | ##' @param object fitted pls::plsr object
 3 | ##' @export
 4 | VIP <- function(object) {
 5 |   ## VIP returns all VIP values for all variables and all number of components,
 6 |   ## as a ncomp x nvars matrix.
 7 |   if (object$method != "oscorespls")
 8 |     stop("Only implemented for orthogonal scores algorithm.  Refit with 'method = \"oscorespls\"'")
 9 |   if (nrow(object$Yloadings) > 1)
10 |     stop("Only implemented for single-response models")
11 |   
12 |   SS <- c(object$Yloadings)^2 * colSums(object$scores^2)
13 |   Wnorm2 <- colSums(object$loading.weights^2)
14 |   SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*") # Replace with matrix mult.
15 |   sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS))
16 | }
17 | 
18 | ##' @title VIPjh returns the VIP of variable j with h components
19 | ##' @param object fitted pls::plsr object
20 | ##' @param j which variable in the fitted pls::plsr object
21 | ##' @param h the number of components in the fitted pls::plsr object to calculate the VIP
22 | ##' @export
23 | VIPjh <- function(object, j, h) {
24 |   ## VIPjh returns the VIP of variable j with h components
25 |   if (object$method != "oscorespls")
26 |     stop("Only implemented for orthogonal scores algorithm.  Refit with 'method = \"oscorespls\"'")
27 |   if (nrow(object$Yloadings) > 1)
28 |     stop("Only implemented for single-response models")
29 |   
30 |   b <- c(object$Yloadings)[1:h]
31 |   T <- object$scores[,1:h, drop = FALSE]
32 |   SS <- b^2 * colSums(T^2)
33 |   W <- object$loading.weights[,1:h, drop = FALSE]
34 |   Wnorm2 <- colSums(W^2)
35 |   sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS))
36 | }


--------------------------------------------------------------------------------
/.github/workflows/ci-run_plsr_example.yaml:
--------------------------------------------------------------------------------
 1 | name: ci-run_PLSR_example
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | env:
10 |   R_LIBS_USER: /usr/local/lib/R/site-library
11 |   LC_ALL: en_US.UTF-8
12 |   NCPUS: 2
13 | 
14 | jobs:
15 |   run-plsr: 
16 |     # The type of runner that the job will run on
17 |     runs-on: ubuntu-latest
18 | 
19 |     env:
20 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
21 |       R_KEEP_PKG_SOURCE: yes
22 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
23 |       
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         R:
28 |           - "4.0"
29 |           - "4.1"
30 |           - "4.2"
31 |           - "4.3"
32 |           - "4.4"
33 |           - "4.5"
34 |  
35 |     steps:
36 |       #check out source code
37 |       - uses: actions/checkout@v4
38 | 
39 |       - uses: r-lib/actions/setup-r@v2
40 |         with:
41 |           use-public-rspm: true
42 | 
43 |       - uses: r-lib/actions/setup-r-dependencies@v2
44 | 
45 |       - name: Query dependencies
46 |         run: |
47 |           install.packages('remotes')
48 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
49 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
50 |         shell: Rscript {0}
51 |       
52 |       - name: Install dependencies
53 |         run: |
54 |           Rscript -e 'remotes::install_github(repo="plantphys/spectratrait", dependencies=TRUE)'
55 | 
56 |       # Run R script
57 |       - name: Run Ely et al. (2019) PLSR Example
58 |         run: |
59 |           source("inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R")
60 |         shell: Rscript {0}


--------------------------------------------------------------------------------
/man/find_optimal_comp_by_groups.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_optimal_components.R
 3 | \name{find_optimal_comp_by_groups}
 4 | \alias{find_optimal_comp_by_groups}
 5 | \title{Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
 6 | by group (i.e. optimal selection by stratification)}
 7 | \usage{
 8 | find_optimal_comp_by_groups(
 9 |   dataset = NULL,
10 |   targetVariable = NULL,
11 |   method = "firstPlateau",
12 |   maxComps = 20,
13 |   iterations = 20,
14 |   prop = 0.7,
15 |   random_seed = 123456789,
16 |   group_variables = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
21 | 
22 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
23 | Usually the "inVar" variable set at the beginning of a PLS script}
24 | 
25 | \item{method}{Which approach to use to find optimal components. Options: firstPlateau, firstMin}
26 | 
27 | \item{maxComps}{maximum number of components to consider}
28 | 
29 | \item{iterations}{how many different permutations to run}
30 | 
31 | \item{prop}{proportion of data to preserve for each permutation}
32 | 
33 | \item{random_seed}{random seed to use for splitting data}
34 | 
35 | \item{group_variables}{group_variables character vector of the form c("var1", "var2"..."varn")
36 | providing the factors used for stratified sampling.}
37 | }
38 | \value{
39 | nComps the optimal number of PLSR components
40 | }
41 | \description{
42 | Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
43 | by group (i.e. optimal selection by stratification)
44 | }
45 | \author{
46 | asierrl, Shawn P. Serbin
47 | }
48 | 


--------------------------------------------------------------------------------
/man/pls_permutation_by_groups.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pls_permutation.R
 3 | \name{pls_permutation_by_groups}
 4 | \alias{pls_permutation_by_groups}
 5 | \title{Run a PLSR model permutation analysis stratified by selected "groups". Can be used to
 6 | determine the optimal number of components or conduct a boostrap uncertainty analysis}
 7 | \usage{
 8 | pls_permutation_by_groups(
 9 |   dataset = NULL,
10 |   targetVariable = NULL,
11 |   maxComps = 20,
12 |   iterations = 20,
13 |   prop = 0.7,
14 |   group_variables = NULL,
15 |   verbose = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset}
20 | 
21 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model?
22 | Usually the "inVar" variable set at the beginning of a PLS script}
23 | 
24 | \item{maxComps}{maximum number of components to use for each PLSR fit}
25 | 
26 | \item{iterations}{how many different permutations to run}
27 | 
28 | \item{prop}{proportion of data to preserve for each permutation}
29 | 
30 | \item{group_variables}{Character vector of the form c("var1", "var2"..."varn")
31 | providing the factors used for stratified sampling in the PLSR permutation analysis}
32 | 
33 | \item{verbose}{Should the function report the current iteration status/progress to the terminal
34 | or run silently? TRUE/FALSE. Default FALSE}
35 | }
36 | \value{
37 | output a list containing the PRESS and coef_array.
38 | output <- list(PRESS=press.out, coef_array=coefs)
39 | }
40 | \description{
41 | Run a PLSR model permutation analysis stratified by selected "groups". Can be used to
42 | determine the optimal number of components or conduct a boostrap uncertainty analysis
43 | }
44 | \author{
45 | asierrl, Shawn P. Serbin, Julien Lamour
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test.create_data_split.R:
--------------------------------------------------------------------------------
 1 | context("*** Test that the create data split function has the expected behavior *** ")
 2 | 
 3 | test_that("Generating a data split using the dplyr approach doesn't throw an error or generate duplicates between cal. and val. data", {
 4 |   plot<- rep(c("plot1", "plot2", "plot3"),each=42)
 5 |   season<- rep(1:6, 21)
 6 |   disease<- c(rep(0,84), rep(1,42))
 7 |   d<- seq(1:126)
 8 |   df <- data.frame(plot,season,disease,d)
 9 |   df <- df %>% mutate(id=row_number())
10 |   
11 |   split_data <- spectratrait::create_data_split(dataset=df, approach="dplyr", 
12 |                                                 split_seed=7529075, prop=0.8, 
13 |                                                 group_variables=c("plot", 
14 |                                                                 "season", 
15 |                                                                 "disease"))
16 |   expect_false(sum(split_data$cal_data$id %in% split_data$val_data$id)>0)
17 | })
18 | 
19 | test_that("Generating a data split using the base approach doesn't throw an error or generate duplicates between cal. and val. data", {
20 |   plot<- rep(c("plot1", "plot2", "plot3"), each=42)
21 |   season<- rep(1:6, 21)
22 |   disease<- c(rep(0,84), rep(1,42))
23 |   d<- seq(1:126)
24 |   df <- data.frame(plot, season, disease, d)
25 |   df <- df %>% mutate(id=row_number())
26 |   
27 |   split_data <- spectratrait::create_data_split(dataset=df, approach="base", 
28 |                                                 split_seed=7529075, prop=0.8, 
29 |                                                 group_variables=c("plot", 
30 |                                                                   "season", 
31 |                                                                   "disease"))
32 |   expect_false(sum(split_data$cal_data$id %in% split_data$val_data$id)>0)
33 | })


--------------------------------------------------------------------------------
/R/f.plot.coef.R:
--------------------------------------------------------------------------------
 1 | ##' @title f.plot.coef
 2 | ##' 
 3 | ##' @param Z Coefficient matrix with each row corresponding to the coefficients and wavelength in columns
 4 | ##' @param wv vector of wavelengths
 5 | ##' @param xlim vector to change the default xlim of the plots (ex xlim = c(500, 2400))
 6 | ##' @param position Position of the legend (see base function legend for help)
 7 | ##' @param type Name of the y axis and of the legend
 8 | ##' @param plot_label optional plot label to include with the figure
 9 | ##' 
10 | ##' @importFrom stats quantile
11 | ##' @importFrom graphics polygon lines legend box
12 | ##' 
13 | ##' @author Julien Lamour
14 | ##' @export
15 | f.plot.coef <- function(
16 |   Z,                  ## Coefficient matrix with each row corresponding to the coefficients and wavelength in columns
17 |   wv,                 ## vector of wavelengths 
18 |   xlim=NULL,          ## vector to change the default xlim of the plots (ex xlim = c(500, 2400))
19 |   position='topright',## Position of the legend (see base function legend for help)
20 |   type='Coefficient', ## Name of the y axis and of the legend
21 |   plot_label=NULL     ## optional label for plot
22 | ){
23 |   
24 |   if(is.null(xlim)){xlim=c(min(wv),max(wv))}
25 |   mean_spec <- colMeans(Z)
26 |   spectra_quantiles <- apply(Z,2,quantile,na.rm=T,probs=c(0,0.01,0.025,0.05,0.5,0.95,0.975,0.99,1))
27 |   
28 |   plot(x=NULL,y=NULL,xlim=xlim,ylim=c(min(Z),max(Z)),xlab="Wavelength (nm)",
29 |        ylab=type,main=plot_label)
30 |   polygon(c(wv ,rev(wv)),c(spectra_quantiles[9,], rev(spectra_quantiles[1,])),
31 |           col="grey60",border=NA)
32 |   polygon(c(wv ,rev(wv)),c(spectra_quantiles[6,], rev(spectra_quantiles[4,])),
33 |           col="#99CC99",border=NA)
34 |   lines(wv,mean_spec,lwd=2, lty=1, col="black")
35 |   lines(wv,spectra_quantiles[1,], lty=3, col="grey60")
36 |   lines(wv,spectra_quantiles[9,], lty=3, col="grey60")
37 |   legend(position,legend=c(paste("Mean",type),"Min/Max (range)", "95% CI"),lty=c(1,1,1),
38 |          lwd=c(2,10,10),col=c("black","grey50","#99CC99"),bty="n")
39 |   box(lwd=2.2)
40 | }


--------------------------------------------------------------------------------
/.github/workflows/ci-weekly.yaml:
--------------------------------------------------------------------------------
 1 | name: R-CMD-check-Weekly
 2 | 
 3 | permissions: read-all
 4 | 
 5 | on:
 6 |   # every Monday at 4:30 AM
 7 |     schedule:
 8 |     - cron: '30 4 * * 1'
 9 | 
10 | env:
11 |   NCPUS: 2
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 |   
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macOS-latest,  r: 'oldrel-1'}
24 |           - {os: macOS-latest,  r: 'release'}
25 |           - {os: windows-latest, r: 'release'}
26 |           # use 4.1 to check with rtools40's older compiler
27 |           - {os: windows-latest, r: '4.1'}
28 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
29 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
30 |           - {os: ubuntu-latest, r: 'oldrel-1'}
31 |           - {os: ubuntu-latest, r: 'release'}
32 |           - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
33 |   
34 |     env:
35 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
36 |       R_KEEP_PKG_SOURCE: yes
37 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
38 | 
39 |     steps:
40 |       - uses: actions/checkout@v4
41 | 
42 |       - uses: r-lib/actions/setup-pandoc@v2
43 | 
44 |       - uses: r-lib/actions/setup-r@v2
45 |         with:
46 |           r-version: ${{ matrix.config.r }}
47 |           http-user-agent: ${{ matrix.config.http-user-agent }}
48 |           use-public-rspm: true
49 |           
50 |       - uses: r-lib/actions/setup-r-dependencies@v2
51 |         with:
52 |           extra-packages: any::rcmdcheck
53 |           needs: check
54 | 
55 |       # rcmdcheck but do not build vignettes
56 |       - name: Build Check Across OS & R Releases
57 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error")
58 |         shell: Rscript {0}
59 | 
60 |       - name: Upload check results
61 |         if: failure()
62 |         uses: actions/upload-artifact@v4
63 |         with:
64 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
65 |           path: check


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: spectratrait
 2 | Title: A simple add-on package to aid in the fitting of leaf or canopy scale spectra-trait PLSR models
 3 | Version: 1.2.6
 4 | Authors@R:
 5 |   c(person(given = "Julien",
 6 |            family = "Lamour",
 7 |            role = c("aut", "ctb"),
 8 |            email = "jlamour.sci@gmail.com",
 9 |            comment = c(ORCID = "0000-0002-4410-507X")),
10 |     person(given = "Jeremiah",
11 |            family = "Anderson",
12 |            role = "ctb",
13 |            email = "jeremiah.a.anderson@nasa.gov",
14 |            comment = c(ORCID = "0000-0001-8925-5226")),
15 |     person(given = "Kenneth",
16 |            family = "Davidson",
17 |            role = "ctb",
18 |            email = "kdavidson@americanforests.org",
19 |            comment = c(ORCID = "0000-0001-5745-9689")),
20 |     person(given = "Kim",
21 |            family = "Ely",
22 |            role = "ctb",
23 |            email = "kely@bnl.gov",
24 |            comment = c(ORCID = "0000-0002-3915-001X")),
25 |     person(given = "Shawn",
26 |            family = "Serbin",
27 |            role = c("aut", "cre", "ctb"),
28 |            email = "shawn.p.serbin@nasa.gov",
29 |            comment = c(ORCID = "0000-0003-4136-8971")))
30 | Maintainer: Shawn P. Serbin <shawn.p.serbin@nasa.gov>
31 | Description: This package provides functions to conduct standardized spectra-trait PLSR model fitting, including uncertainty analysis that follows DOI: https://doi.org/10.1111/nph.16123
32 | License: MIT + file LICENSE
33 | Encoding: UTF-8
34 | LazyData: true
35 | Roxygen: list(markdown = TRUE)
36 | RoxygenNote: 7.3.1
37 | Imports: 
38 |     httr (>= 1.4.2),
39 |     readr (>= 1.3.1),
40 |     pls (>= 2.7-2),
41 |     dplyr (>= 1.0.1),
42 |     magrittr (>= 2.0.1),
43 |     reshape2 (>= 1.4.4),
44 |     here (>= 0.1),
45 |     plotrix (>= 3.7-8),
46 |     ggplot2 (>= 3.3.2),
47 |     gridExtra (>= 2.3),
48 |     scales (>= 1.1.1),
49 |     testthat (>= 3.1.2)
50 | Suggests:
51 |     devtools (>= 2.3.1),
52 |     remotes (>= 2.2.0),
53 |     RCurl (>= 1.98-1.2),
54 |     knitr (>= 1.37),
55 |     markdown,
56 |     rmarkdown
57 | Depends:
58 |     R (>= 4.0)
59 | VignetteBuilder: knitr
60 | NeedsCompilation: no
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test.optimal_components.R:
--------------------------------------------------------------------------------
 1 | context("*** Test methods for finding optimal number of PLSR components ***")
 2 | 
 3 | ### Setup data for tests
 4 | #Load Ely et al 2019 dataset
 5 | data("ely_plsr_data")
 6 | inVar <- "N_g_m2"
 7 | Start.wave <- 500
 8 | End.wave <- 2400
 9 | wv <- seq(Start.wave,End.wave,1)
10 | plsr_data <- ely_plsr_data
11 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))])
12 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))],
13 |                         Spectra=I(spec))
14 | ###
15 | 
16 | test_that("Finding optimal components using the built-in PLS package approach", {
17 |   method <- "pls"
18 |   random_seed <- 1245565
19 |   seg <- 50
20 |   maxComps <- 20
21 |   iterations <- 80
22 |   prop <- 0.70
23 |   
24 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar,
25 |                                                   method=method, maxComps=maxComps, seg=seg, 
26 |                                                   random_seed=random_seed)
27 |   expect_gte(nComps, 12)
28 | })
29 | 
30 | test_that("Finding optimal components using the firstMin approach", {
31 |   method <- "firstMin"
32 |   random_seed <- 1245565
33 |   seg <- 50
34 |   maxComps <- 20
35 |   iterations <- 80
36 |   prop <- 0.70
37 |   
38 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
39 |                                                   method=method, maxComps=maxComps, 
40 |                                                   iterations=iterations, seg=seg, prop=prop, 
41 |                                                   random_seed=random_seed)
42 |   expect_gte(nComps, 12)
43 |   
44 | })
45 | 
46 | test_that("Finding optimal components using the firstPlateau approach", {
47 |   method <- "firstPlateau"
48 |   random_seed <- 1245565
49 |   seg <- 50
50 |   maxComps <- 20
51 |   iterations <- 80
52 |   prop <- 0.70
53 |   
54 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
55 |                                                   method=method, maxComps=maxComps, 
56 |                                                   iterations=iterations, seg=seg, prop=prop, 
57 |                                                   random_seed=random_seed)
58 |   expect_gte(nComps, 12)
59 |   
60 | })


--------------------------------------------------------------------------------
/.github/workflows/check-os.yaml:
--------------------------------------------------------------------------------
 1 | name: R-CMD-check-OS-R
 2 | 
 3 | # Check OS and R releases
 4 | on:
 5 |   push:
 6 |     branches: [main, master]
 7 |   pull_request:
 8 |     branches: [main, master]
 9 | 
10 | permissions: read-all
11 | 
12 | env:
13 |   NCPUS: 2
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: macOS-latest,  r: 'oldrel-1'}
26 |           - {os: macos-latest,   r: 'release'}
27 |           - {os: windows-latest, r: 'release'}
28 |           # use 4.1 to check with rtools40's older compiler
29 |           - {os: windows-latest, r: '4.1'}
30 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
31 |           - {os: ubuntu-latest,   r: 'release'}
32 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
33 |           
34 |     env:
35 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
36 |       R_KEEP_PKG_SOURCE: yes
37 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
38 |       RSPM: ${{ matrix.config.rspm }}
39 | 
40 |     steps:
41 |       - uses: actions/checkout@v4
42 | 
43 |       - uses: r-lib/actions/setup-pandoc@v2
44 | 
45 |       - uses: r-lib/actions/setup-r@v2
46 |         with:
47 |           r-version: ${{ matrix.config.r }}
48 |           http-user-agent: ${{ matrix.config.http-user-agent }}
49 |           use-public-rspm: true
50 | 
51 |       - name: Query dependencies
52 |         run: |
53 |           install.packages('remotes')
54 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
55 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
56 |         shell: Rscript {0}
57 | 
58 |       - uses: r-lib/actions/setup-r-dependencies@v2
59 |         with:
60 |           extra-packages: any::rcmdcheck
61 |           needs: check
62 | 
63 |       # rcmdcheck but do not build vignettes
64 |       - name: Build Check Across OS & R Releases
65 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error")
66 |         shell: Rscript {0}
67 | 
68 |       - name: Upload check results
69 |         if: failure()
70 |         uses: actions/upload-artifact@v4
71 |         with:
72 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
73 |           path: check


--------------------------------------------------------------------------------
/R/f.plot.spec.R:
--------------------------------------------------------------------------------
 1 | ##' @title f.plot.spec
 2 | ##' 
 3 | ##' @param Z Spectra matrix with each row corresponding to a spectra and wavelength in columns
 4 | ##' @param wv vector of wavelengths corresponding to the column of the spectra matrix Z
 5 | ##' @param xlim vector to change the default xlim of the plots (ex xlim = c(500, 2400))
 6 | ##' @param position Position of the legend (see base function legend for help)
 7 | ##' @param type Name of the y axis and of the legend. E.g. Reflectance, Transmittance
 8 | ##' @param plot_label optional plot label to include with the figure
 9 | ##' @param CI Desired confidence interval for the spectra plot. Default is: 95; can be changed dynamically
10 | ##' 
11 | ##' @importFrom stats quantile
12 | ##' @importFrom graphics polygon lines legend box
13 | ##' 
14 | ##' @author Julien Lamour, Shawn P. Serbin, Andrés Baresch
15 | ##' @export
16 | f.plot.spec <- function(
17 |   Z,                  ## Spectra matrix with each row corresponding to a spectra and wavelength in columns
18 |   wv,                 ## vector of wavelengths corresponding to the column of the spectra matrix Z
19 |   xlim=NULL,          ## vector to change the default xlim of the plots (ex xlim = c(500, 2400))
20 |   position='topright',## Position of the legend (see base function legend for help)
21 |   type='Reflectance', ## Name of the y axis and of the legend
22 |   plot_label=NULL ,   ## optional label for plot
23 |   CI = 95             ## confidence interval as percentage, default 95% ; can be prescribed  
24 | ){
25 |   if(mean(as.matrix(Z),na.rm=TRUE)>1){Z=Z/100} ## Check if the spectra are in pc [0,100] or in [0,1]
26 |   if(is.null(xlim)){xlim=c(min(wv),max(wv))}
27 |   mean_spec <- colMeans(Z,na.rm=TRUE)
28 |   
29 |   CI_l=0+((100-95)/200) # lower bound for the confidence interval  
30 |   CI_u=1-((100-95)/200) # upper bound for the confidence interval 
31 |   
32 |   spectra_quantiles <- apply(Z,2,quantile,na.rm=T,probs=c(0,CI_l,CI_u,1)) 
33 |   
34 |   plot(x=NULL,y=NULL,ylim=c(0,100),xlim=xlim,xlab="Wavelength (nm)",
35 |        ylab=paste0(type," (%)"),main=plot_label)
36 |     
37 |   polygon(c(wv ,rev(wv)),c(spectra_quantiles[3,]*100, rev(spectra_quantiles[2,]*100)),
38 |           col="#99CC99",border=NA)
39 |       
40 |        
41 |   ci_text=paste0(CI,"% CI")          
42 |   lines(wv,mean_spec*100,lwd=2, lty=1, col="black")
43 |   lines(wv,spectra_quantiles[1,]*100, lty=3, col="grey40")
44 |   lines(wv,spectra_quantiles[4,]*100, lty=3, col="grey40")
45 |   legend(position,legend=c(paste("Mean",type),"Min/Max", ci_text),lty=c(1,3,1),
46 |          lwd=c(2,1,10),col=c("black","grey40","#99CC99"),bty="n")
47 |   box(lwd=2.2)
48 | }
49 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | ##' Function to source text data from GitHub
 2 | ##' 
 3 | ##' @param url http/https URL to the github dataset
 4 | ##' @param sep dataset file delimiter
 5 | ##' @param header TRUE/FALSE does the file have a column header?
 6 | ##' 
 7 | ##' @import httr
 8 | ##' 
 9 | ##' @author gist.github.com/christophergandrud/4466237
10 | ##' @export
11 | source_GitHubData <- function(url, sep = ",", header = TRUE) {
12 |   # define function to grab PLSR model from GitHub
13 |   #devtools::source_gist("gist.github.com/christophergandrud/4466237")
14 |   request <- httr::GET(url)
15 |   httr::stop_for_status(request)
16 |   handle <- textConnection(httr::content(request, as = 'text'))
17 |   on.exit(close(handle))
18 |   read.table(handle, sep = sep, header = header)
19 | }
20 | 
21 | ##' Calculate RMSE and percent RMSE with PLSR model results
22 | ##' 
23 | ##' @param plsr_dataset input plsr dataset
24 | ##' @param inVar the trait variable used in the calculation of RMSE
25 | ##' @param residuals predicted minus observed residual vector from either 
26 | ##' a cross-validation CV or independent validation
27 | ##' @param range calculate over the full data range or the 95% of data range. 
28 | ##' options full or 95perc
29 | ##' @return output a list containing the rmse and perc_rmse. 
30 | ##' output <- list(rmse = rmse, perc_rmse = perc_rmse)
31 | ##' 
32 | ##' @author Shawn P. Serbin
33 | ##' 
34 | ##' @export
35 | percent_rmse <- function(plsr_dataset = NULL, inVar = NULL, 
36 |                          residuals = NULL, range = "full") {
37 |   rmse <- sqrt(mean(residuals^2, na.rm = TRUE))
38 |   val_data_range <- range(plsr_dataset[,inVar], na.rm = TRUE)
39 |   val_95perc_data_range <- quantile(plsr_dataset[,inVar], 
40 |                                     probs = c(0.025,0.975), 
41 |                                     na.rm = TRUE)
42 |   if (range=="full") {
43 |     perc_rmse <- (rmse/(val_data_range[2]-val_data_range[1]))*100
44 |   } else if (range=="95perc") {
45 |     perc_rmse <- (rmse/(val_95perc_data_range[[2]]-val_95perc_data_range[[1]]))*100 
46 |   } else {
47 |     print("Not a valid option, defaulting to full")
48 |     perc_rmse <- (rmse/(val_data_range[2]-val_data_range[1]))*100
49 |   }
50 |   
51 |   # output rmse list
52 |   output <- list(rmse = rmse, perc_rmse = perc_rmse)
53 |   return(output)
54 | }
55 | 
56 | 
57 | ##' Not %in% function
58 | ##' @param x initial list
59 | ##' @param table list to check against
60 | ##' 
61 | ##' @export
62 | #`%notin%` <- Negate(`%in%`)
63 | `%notin%` <- function(x, table) {
64 |   !(x %in% table)
65 | }
66 | 
67 | ##' Function to check for installed package
68 | ##' @param pkg name of package to check if installed
69 | ##' not presently used
70 | testForPackage <- function(pkg) {
71 |   if (!requireNamespace(pkg)) {
72 |     stop("Package", pkg, "required but not installed")
73 |   }
74 | }


--------------------------------------------------------------------------------
/R/create_data_split.R:
--------------------------------------------------------------------------------
 1 | ##' Create a calibration (training) / validation data split for PLSR model fitting and testing
 2 | ##' 
 3 | ##' @param dataset input full PLSR dataset to split into cal/val datasets
 4 | ##' @param approach approach to splitting the dataset. Options: base or dplyr
 5 | ##' @param split_seed random seed to use for splitting data
 6 | ##' @param prop the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). 
 7 | ##' This sets the calibration proportion
 8 | ##' @param group_variables Use factor variables to conduct a stratified sampling for cal/val
 9 | ##' 
10 | ##' @return output_list A list containing the calibration dataset (cal_data)
11 | ##' and validation dataset (val_data)
12 | ##' 
13 | ##' @importFrom magrittr %>%
14 | ##' @importFrom dplyr mutate group_by_at slice n vars all_of row_number
15 | ##' 
16 | ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
17 | ##' @export
18 | create_data_split <- function(dataset=NULL, approach=NULL, split_seed=123456789, prop=0.8,
19 |                               group_variables=NULL) {
20 |   # TODO: import only required functions from dplyr
21 |   set.seed(split_seed)
22 |   
23 |   # outer if/else to stop if approach set to NULL
24 |   if(!is.null(approach)) {
25 |     
26 |     ## base R data split function
27 |     if (approach=="base") {
28 |       dataset$CalVal <- NA
29 |       split_var <- group_variables
30 |       if(length(group_variables) > 1) {
31 |         dataset$ID <- apply(dataset[, group_variables], MARGIN = 1, FUN = function(x) paste(x, collapse = " "))
32 |       } else {
33 |         dataset$ID <- dataset[, group_variables]
34 |       }
35 |       split_var_list <- unique(dataset$ID)
36 |       for(i in 1:length(split_var_list)) {
37 |         temp <- row.names(dataset[dataset$ID == split_var_list[i], ])
38 |         ## there should probably be more than 4 obs I'm guessing, so this may need adjusting
39 |         if(length(temp) > 3) {
40 |           Cal <- sample(temp,round(prop*length(temp)))
41 |           Val <- temp[temp %notin% Cal]
42 |           dataset$CalVal[ row.names(dataset) %in% Cal ] <- "Cal"
43 |           dataset$CalVal[ row.names(dataset) %in% Val ] <- "Val"
44 |           p_cal <- length(Cal)/length(temp) * 100
45 |           message(paste0(split_var_list[i], "   ", "Cal", ": ", round(p_cal,3), "%"))
46 |         } else {
47 |           message(paste(split_var_list[i], "Not enough observations"))
48 |         }
49 |       }
50 |       dataset$ID <- NULL
51 |       # drop NA's in CalVal
52 |       dataset <- dataset[!is.na(dataset$CalVal), ]
53 |       cal.plsr.data <- dataset[dataset$CalVal== "Cal",]
54 |       val.plsr.data <- dataset[dataset$CalVal== "Val",]
55 |       
56 |       # Remove temporary CalVal column
57 |       cal.plsr.data <- cal.plsr.data[,-which(names(cal.plsr.data)=="CalVal")]
58 |       val.plsr.data <- val.plsr.data[,-which(names(val.plsr.data)=="CalVal")]
59 |       
60 |       # dplyr based data split function
61 |     } else if (approach=="dplyr") {
62 |       dataset <- dataset %>% mutate(ids=row_number())
63 |       cal.plsr.data <- dataset %>%
64 |         group_by_at(vars(all_of(group_variables))) %>% 
65 |         slice(sample(1:n(), prop*n())) %>% 
66 |         data.frame()
67 |       val.plsr.data <- dataset[dataset$ids %notin% cal.plsr.data$ids,]
68 |       cal.plsr.data <- cal.plsr.data[,-which(colnames(cal.plsr.data)=="ids")]
69 |       val.plsr.data <- val.plsr.data[,-which(colnames(val.plsr.data)=="ids")]
70 |   } else {
71 |     stop("**** Please set approach to either base R or dplyr data split ****")
72 |   }
73 |     output_list <- list(cal_data=cal.plsr.data, val_data=val.plsr.data)
74 |     return(output_list)
75 |   }
76 |   
77 |   # if approach is set to NULL (i.e. not set) return error message
78 |   stop("**** Please set approach to either base R or dplyr data split ****")
79 |   
80 | }


--------------------------------------------------------------------------------
/inst/scripts/pull_data_from_ecosis.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################
  2 | #
  3 | #  
  4 | #   A simple example showing how to pull spectra and trait data from EcoSIS and plot it up
  5 | #
  6 | #   Spectra and trait data source:
  7 | #   https://ecosis.org/package/ngee-arctic-2016-leaf-spectral-reflectance-kougarok-road-seward-peninsula-alaska-2016
  8 | #
  9 | #    Notes:
 10 | #    * Provided as a basic example of how to apply the model to new spectra observations
 11 | #    * The author notes the code is not the most elegant or clean, but is functional 
 12 | #    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
 13 | #    * Code is provided under GNU General Public License v3.0 
 14 | #
 15 | #
 16 | ####################################################################################################
 17 | 
 18 | 
 19 | #--------------------------------------------------------------------------------------------------#
 20 | ### Load libraries
 21 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 22 |                       "spectratrait")
 23 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 24 | #--------------------------------------------------------------------------------------------------#
 25 | 
 26 | 
 27 | #--------------------------------------------------------------------------------------------------#
 28 | ### Setup options
 29 | 
 30 | # What is the source dataset from EcoSIS?
 31 | ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9"
 32 | 
 33 | # Specify output directory, output_dir 
 34 | # Options: 
 35 | # tempdir - use a OS-specified temporary directory 
 36 | # user defined PATH - e.g. "~/scratch/PLSR"
 37 | output_dir <- "tempdir"
 38 | #--------------------------------------------------------------------------------------------------#
 39 | 
 40 | 
 41 | #--------------------------------------------------------------------------------------------------#
 42 | ### Set working directory
 43 | if (output_dir=="tempdir") {
 44 |   outdir <- tempdir()
 45 | } else {
 46 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 47 |   outdir <- file.path(path.expand(output_dir))
 48 | }
 49 | setwd(outdir) # set working directory
 50 | getwd()  # check wd
 51 | #--------------------------------------------------------------------------------------------------#
 52 | 
 53 | 
 54 | #--------------------------------------------------------------------------------------------------#
 55 | ### Example dataset
 56 | # 
 57 | # URL:
 58 | # https://ecosis.org/package/ngee-arctic-2016-leaf-spectral-reflectance-kougarok-road-watershed-seward-peninsula-alaska
 59 | #
 60 | #--------------------------------------------------------------------------------------------------#
 61 | 
 62 | 
 63 | #--------------------------------------------------------------------------------------------------#
 64 | ### Get source dataset from EcoSIS
 65 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 66 | head(dat_raw)
 67 | names(dat_raw)[1:40]
 68 | #--------------------------------------------------------------------------------------------------#
 69 | 
 70 | 
 71 | #--------------------------------------------------------------------------------------------------#
 72 | ### Prepare data
 73 | Start.wave <- 500
 74 | End.wave <- 2400
 75 | wv <- seq(Start.wave,End.wave,1)
 76 | spectra <- data.frame(dat_raw[,names(dat_raw) %in% wv])
 77 | names(spectra) <- c(paste0("Wave_",wv))
 78 | head(spectra)[,1:5]
 79 | 
 80 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 81 | head(sample_info)
 82 | names(sample_info)
 83 | #--------------------------------------------------------------------------------------------------#
 84 | 
 85 | 
 86 | #--------------------------------------------------------------------------------------------------#
 87 | ### Plot spectra
 88 | spectratrait::f.plot.spec(Z=spectra,wv=wv,plot_label="NGEE-Arctic Leaf Spectra")
 89 | dev.copy(png,file.path(outdir,'Leaf_Spectra.png'), 
 90 |          height=2500,width=4900, res=340)
 91 | dev.off();
 92 | par(mfrow=c(1,1))
 93 | #--------------------------------------------------------------------------------------------------#
 94 | 
 95 | 
 96 | #--------------------------------------------------------------------------------------------------#
 97 | ### Plot trait data
 98 | print("**** Plotting Ecosis trait data. Writing to scratch space ****")
 99 | 
100 | # Organize leaf trait data
101 | trait_data <- sample_info %>%
102 |   select(Site,Sample_ID,USDA_Species_Code=`USDA Symbol`,
103 |          Measurement_Date=`Sample Collection Date`,Cmass_g_g,Nmass_g_g,CN_Ratio,
104 |          LMA_g_m2)
105 | head(trait_data)        
106 | 
107 | # Prepare data for ggplot
108 | trait_data <- reshape2::melt(data = trait_data, id.vars = "USDA_Species_Code", measure.vars = c("LMA_g_m2",
109 |                                                                                                 "Cmass_g_g",
110 |                                                                                                 "Nmass_g_g",
111 |                                                                                                 "CN_Ratio"))
112 | head(trait_data)
113 | 
114 | # Graph the trait data and save a file to the scratch space
115 | p2 <- ggplot(trait_data, aes(x=USDA_Species_Code, y=value)) + 
116 |   geom_boxplot() +
117 |   facet_wrap(~variable, scale="free")
118 | print(p2)
119 | ggsave(filename = file.path(outdir,"NGEE-Arctic_2016_Kougarok_Trait_data.png"), plot = p2,
120 |        width = 40, height = 20, units = "cm")
121 | #--------------------------------------------------------------------------------------------------#
122 | 
123 | ### EOF


--------------------------------------------------------------------------------
/R/pls_permutation.R:
--------------------------------------------------------------------------------
  1 | ##' Run a PLSR model permutation analysis. Can be used to determine the optimal number of components 
  2 | ##' or conduct a boostrap uncertainty analysis
  3 | ##' 
  4 | ##' See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123
  5 | ##'
  6 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
  7 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
  8 | ##' Usually the "inVar" variable set at the beginning of a PLS script
  9 | ##' @param maxComps maximum number of components to use for each PLSR fit
 10 | ##' @param iterations how many different permutations to run
 11 | ##' @param prop proportion of data to preserve for each permutation
 12 | ##' @param verbose Should the function report the current iteration status/progress to the terminal
 13 | ##' or run silently? TRUE/FALSE. Default FALSE
 14 | ##' @return output a list containing the PRESS and coef_array.
 15 | ##' output <- list(PRESS=press.out, coef_array=coefs)
 16 | ##' 
 17 | ##' @importFrom pls plsr 
 18 | ##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar
 19 | ##' 
 20 | ##' @author Julien Lamour, Shawn P. Serbin
 21 | ##' @export
 22 | pls_permutation <- function(dataset=NULL, targetVariable=NULL, maxComps=20, iterations=20, 
 23 |                             prop=0.70, verbose=FALSE) {
 24 |   inVar <- targetVariable
 25 |   coefs <- array(0,dim=c((ncol(dataset$Spectra)+1),iterations,maxComps))
 26 |   press.out <- array(data=NA, dim=c(iterations,maxComps))
 27 |   print("*** Running permutation test.  Please hang tight, this can take awhile ***")
 28 |   print("Options:")
 29 |   print(paste("Max Components:",maxComps, "Iterations:", iterations, 
 30 |               "Data Proportion (percent):", prop*100, sep=" "))
 31 |   
 32 |   if (verbose) {
 33 |     j <- 1
 34 |     pb <- txtProgressBar(min = 0, max = iterations, 
 35 |                          char="*",width=70,style = 3)
 36 |   }
 37 |   
 38 |   for (i in seq_along(1:iterations)) {
 39 |     rows <- sample(1:nrow(dataset),floor(prop*nrow(dataset)))
 40 |     sub.data <- dataset[rows,]
 41 |     val.sub.data <- dataset[-rows,]
 42 |     plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, 
 43 |                      ncomp=maxComps, validation="none", data=sub.data)
 44 |     pred_val <- predict(plsr.out,newdata=val.sub.data)
 45 |     sq_resid <- (pred_val[,,]-val.sub.data[,inVar])^2
 46 |     press <- apply(X = sq_resid, MARGIN = 2, FUN = sum)
 47 |     press.out[i,] <- press
 48 |     coefs[,i,] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps)
 49 |     rm(rows,sub.data,val.sub.data,plsr.out,pred_val,sq_resid,press)
 50 |     
 51 |     ### Display progress to console
 52 |     if (verbose) {
 53 |       setTxtProgressBar(pb, j)
 54 |       j <- j+1
 55 |       flush.console()
 56 |     }
 57 |   }
 58 |   if (verbose) {
 59 |     close(pb)
 60 |   }
 61 |   
 62 |   # create a new list with PRESS and permuted coefficients x wavelength x component number
 63 |   print("*** Providing PRESS and coefficient array output ***")
 64 |   output <- list(PRESS=press.out, coef_array=coefs)
 65 |   return(output)
 66 | }
 67 | 
 68 | 
 69 | ##' Run a PLSR model permutation analysis stratified by selected "groups". Can be used to 
 70 | ##' determine the optimal number of components or conduct a boostrap uncertainty analysis
 71 | ##' 
 72 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
 73 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
 74 | ##' Usually the "inVar" variable set at the beginning of a PLS script
 75 | ##' @param maxComps maximum number of components to use for each PLSR fit
 76 | ##' @param iterations how many different permutations to run
 77 | ##' @param prop proportion of data to preserve for each permutation
 78 | ##' @param verbose Should the function report the current iteration status/progress to the terminal
 79 | ##' or run silently? TRUE/FALSE. Default FALSE
 80 | ##' @param group_variables Character vector of the form c("var1", "var2"..."varn") 
 81 | ##' providing the factors used for stratified sampling in the PLSR permutation analysis
 82 | ##' 
 83 | ##' @return output a list containing the PRESS and coef_array.
 84 | ##' output <- list(PRESS=press.out, coef_array=coefs)
 85 | ##' 
 86 | ##' @importFrom magrittr %>%
 87 | ##' @importFrom dplyr mutate group_by_at slice n row_number
 88 | ##' @importFrom pls plsr 
 89 | ##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar
 90 | ##' 
 91 | ##' @author asierrl, Shawn P. Serbin, Julien Lamour
 92 | ##' @export
 93 | ##' 
 94 | pls_permutation_by_groups <- function (dataset = NULL, targetVariable=NULL, maxComps = 20, 
 95 |                                        iterations = 20, prop = 0.7, group_variables=NULL,
 96 |                                        verbose = FALSE) {
 97 |   inVar <- targetVariable
 98 |   coefs <- array(0, dim = c((ncol(dataset$Spectra) + 1), iterations, maxComps))
 99 |   press.out <- array(data = NA, dim = c(iterations, maxComps))
100 |   print("*** Running permutation test.  Please hang tight, this can take awhile ***")
101 |   print("Options:")
102 |   print(paste("Max Components:", maxComps, "Iterations:", iterations, 
103 |               "Data Proportion (percent):", prop * 100, sep = " "))
104 |   if (verbose) {
105 |     j <- 1
106 |     pb <- utils::txtProgressBar(min = 0, max = iterations, 
107 |                                 char = "*", width = 70, style = 3)
108 |   }
109 |   for (i in seq_along(1:iterations)) {
110 |     if (!is.null(group_variables)) {
111 |       trainset <- dataset %>%
112 |         mutate(int_id=row_number()) %>%
113 |         group_by_at(group_variables) %>%
114 |         slice(sample(1:n(), prop * n()))
115 |       rows <- trainset$int_id
116 |       } else {
117 |        rows <- sample(1:nrow(dataset), floor(prop * nrow(dataset)))
118 |       }
119 |     sub.data <- dataset[rows, ]
120 |     val.sub.data <- dataset[-rows, ]
121 |     plsr.out <- plsr(as.formula(paste(inVar, "~", "Spectra")), 
122 |                      scale = FALSE, center = TRUE, ncomp = maxComps, 
123 |                      validation = "none", 
124 |                      data = sub.data)
125 |     pred_val <- predict(plsr.out, newdata = val.sub.data)
126 |     sq_resid <- (pred_val[, , ] - val.sub.data[, inVar])^2
127 |     press <- apply(X = sq_resid, MARGIN = 2, FUN = sum)
128 |     press.out[i, ] <- press
129 |     coefs[, i, ] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps)
130 |     rm(rows, sub.data, val.sub.data, plsr.out, pred_val, sq_resid, press)
131 |     if (verbose) {
132 |       setTxtProgressBar(pb, j)
133 |       j <- j + 1
134 |       flush.console()
135 |       }
136 |   }
137 |   if (verbose) {
138 |     close(pb)
139 |   }
140 |   # create a new list with PRESS and permuted coefficients x wavelength x component number
141 |   print("*** Providing PRESS and coefficient array output ***")
142 |   output <- list(PRESS = press.out, coef_array = coefs)
143 |   return(output)
144 | }


--------------------------------------------------------------------------------
/inst/scripts/apply_sserbin2019_lma_plsr_to_ely_example.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################
  2 | #
  3 | #  
  4 | #    Notes:
  5 | #    * The author notes the code is not the most elegant or clean, but is functional 
  6 | #    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
  7 | #    * Code is provided under GNU General Public License v3.0 
  8 | #
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #--------------------------------------------------------------------------------------------------#
 13 | ### Load libraries
 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 15 |                       "spectratrait")
 16 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 17 | #--------------------------------------------------------------------------------------------------#
 18 | 
 19 | 
 20 | #--------------------------------------------------------------------------------------------------#
 21 | ### Setup options
 22 | 
 23 | # Default par options
 24 | opar <- par(no.readonly = T)
 25 | 
 26 | # Specify output directory, output_dir 
 27 | # Options: 
 28 | # tempdir - use a OS-specified temporary directory 
 29 | # user defined PATH - e.g. "~/scratch/PLSR"
 30 | output_dir <- "tempdir"
 31 | #--------------------------------------------------------------------------------------------------#
 32 | 
 33 | 
 34 | #--------------------------------------------------------------------------------------------------#
 35 | ### Load Ely et al 2019 dataset
 36 | data("ely_plsr_data")
 37 | head(ely_plsr_data)[,1:8]
 38 | 
 39 | # What is the target variable?
 40 | inVar <- "LMA_g_m2"
 41 | #--------------------------------------------------------------------------------------------------#
 42 | 
 43 | 
 44 | #--------------------------------------------------------------------------------------------------#
 45 | ### Set working directory
 46 | if (output_dir=="tempdir") {
 47 |   outdir <- tempdir()
 48 | } else {
 49 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 50 |   outdir <- file.path(path.expand(output_dir))
 51 | }
 52 | setwd(outdir) # set working directory
 53 | getwd()  # check wd
 54 | #--------------------------------------------------------------------------------------------------#
 55 | 
 56 | 
 57 | #--------------------------------------------------------------------------------------------------#
 58 | ### PLSR Coefficients - Grab from GitHub
 59 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/"
 60 | print("**** Downloading PLSR coefficients ****")
 61 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv")
 62 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL)
 63 | rm(githubURL)
 64 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv")
 65 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL)
 66 | rm(githubURL)
 67 | #--------------------------------------------------------------------------------------------------#
 68 | 
 69 | 
 70 | #--------------------------------------------------------------------------------------------------#
 71 | ### Ely et al spectral and trait data
 72 | Start.wave <- 500
 73 | End.wave <- 2400
 74 | wv <- seq(Start.wave,End.wave,1)
 75 | plsr_data <- ely_plsr_data
 76 | #--------------------------------------------------------------------------------------------------#
 77 | 
 78 | 
 79 | #--------------------------------------------------------------------------------------------------#
 80 | #### Example data cleaning.  End user needs to do what's appropriate for their 
 81 | #### data.  This may be an iterative process.
 82 | # Keep only complete rows of inVar and spec data before fitting
 83 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
 84 |                                                   c(inVar,paste0("Wave_",wv))]),]
 85 | #--------------------------------------------------------------------------------------------------#
 86 | 
 87 | 
 88 | #--------------------------------------------------------------------------------------------------#
 89 | print("**** Applying PLSR model to estimate LMA from spectral observations ****")
 90 | # setup model
 91 | dims <- dim(LeafLMA.plsr.coeffs)
 92 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,]
 93 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],])
 94 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs")
 95 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2])
 96 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 
 97 |                                                    paste0("Wave_",seq(Start.wave,End.wave,1)))])
 98 | sub_spec <- sub_spec*0.01 # convert to 0-1
 99 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2]
100 | leafLMA <- plsr_pred[,1]^2  # convert to standard LMA units from sqrt(LMA)
101 | names(leafLMA) <- "PLSR_LMA_gDW_m2"
102 | 
103 | # organize output
104 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 
105 |                                                       paste0("Wave_",seq(Start.wave,End.wave,1)))],
106 |                                    PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar])
107 | head(LeafLMA.PLSR.dataset)
108 | #--------------------------------------------------------------------------------------------------#
109 | 
110 | 
111 | #--------------------------------------------------------------------------------------------------#
112 | print("**** Generate PLSR uncertainty estimates ****")
113 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]])
114 | jk_coef <- t(jk_coef)
115 | head(jk_coef)[,1:6]
116 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2])
117 | head(jk_int)[,1:6]
118 | 
119 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 
120 |                                          byrow=TRUE, ncol=length(jk_int))
121 | jk_pred <- jk_pred^2
122 | head(jk_pred)[,1:6]
123 | dim(jk_pred)
124 | interval <- c(0.025,0.975)
125 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 
126 |                        probs=c(interval[1], interval[2]))
127 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd)
128 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals)
129 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
130 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,]
131 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,]
132 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot
133 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot
134 | head(LeafLMA.PLSR.dataset)
135 | #--------------------------------------------------------------------------------------------------#
136 | 
137 | 
138 | #--------------------------------------------------------------------------------------------------#
139 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 
140 |                                               inVar = inVar, 
141 |                                               residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 
142 |                                               range="full")
143 | RMSEP <- rmsep_percrmsep$rmse
144 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
145 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~
146 |                          LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2)
147 | expr <- vector("expression", 3)
148 | expr[[1]] <- bquote(R^2==.(r2))
149 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
150 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
151 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI))
152 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
153 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
154 |                 li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 
155 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
156 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
157 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
158 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
159 |                 cex.axis=1.5,cex.lab=1.8)
160 | abline(0,1,lty=2,lw=2)
161 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
162 |                 li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 
163 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
164 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
165 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
166 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
167 |                 cex.axis=1.5,cex.lab=1.8, add=T)
168 | legend("topleft", legend=expr, bty="n", cex=1.5)
169 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
170 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
171 | box(lwd=2.2)
172 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
173 |          height=2800, width=3200,  res=340)
174 | dev.off();
175 | #--------------------------------------------------------------------------------------------------#
176 | 
177 | #--------------------------------------------------------------------------------------------------#
178 | ### EOF


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PLSR modeling for the estimation of plant functional traits
  2 | This repository contains source code and example scripts to illustrate best-practices for fitting, evaluating, and reporting spectra-trait PLSR models. This code and example scripts encompass several possibilities that you may encounter when carrying out PLSR model fitting. Start by reading *Burnett et al. (2021)*, then work through the scripts or vignettes  we have provided here to get experience in developing models for estimating plant functional traits using spectral measurements!   You can also explore examples available in the [Wiki](https://github.com/plantphys/spectratrait/wiki) pages
  3 | 
  4 | ### Article citation:
  5 | Burnett AC, Anderson J, Davidson KD, Ely KS, Lamour J, Li Q, Morrison BD, Yang D, Rogers A, Serbin SP (2021) A best-practice guide to predicting plant traits from leaf-level hyperspectral data using partial least squares regression. Journal of Experimental Botany. https://doi.org/10.1093/jxb/erab295
  6 | 
  7 | ### Source code citation:
  8 | [![DOI](https://zenodo.org/badge/222699149.svg)](https://zenodo.org/doi/10.5281/zenodo.4330119)
  9 | 
 10 | ### EcoSML
 11 | https://ecosml.org/package/github/TESTgroup-BNL/spectratrait
 12 | 
 13 | ### Getting started, tips and tricks:
 14 | * If you are new to R you should start by reading https://support.rstudio.com/hc/en-us/articles/201141096-Getting-Started-with-R & https://www.dataquest.io/blog/tutorial-getting-started-with-r-and-rstudio/
 15 | * Software requirements: R software (version 4.0 or above) and preferred operating environment (e.g. RStudio). 
 16 | * Install package dependencies and the spectratrait package: See the Depends and INSTALL sections below
 17 | * To work with the repository locally, clone the repository to your local machine (https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). Once you have the repository on your local machine you can run the scripts in inst/scripts or vignettes folders. You can also start editing the code yourself or contributing to the development of the package through new pull requests (https://guides.github.com/activities/hello-world/)
 18 | * Or if you don't want to obtain the code via cloning the repository, you can click the green "Code" button at the top of this page and select "Download ZIP". Extract the contents of the ZIP in your preferred location. Use RStudio to open your selected example script from the inst/scripts folder and then run or source the code.
 19 | 
 20 | ### Depends: 
 21 | ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2), 
 22 | httr (>= 1.4.2), pls (>= 2.7-2), magrittr (>= 2.0.1), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), 
 23 | plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1), knitr (>= 1.4.2)
 24 | 
 25 | ### INSTALL
 26 | spectratrait is not currently on CRAN, but you can install from GitHub using devtools().  First, make sure you have all of the package dependencies installed. You can do this either by 1) installing the packages individually using install.packages(), for example:
 27 | 
 28 | ``` r
 29 | install.packages("pls")
 30 | install.packages("ggplot2")
 31 | ...
 32 | ```
 33 | 
 34 | and so forth until all of the dependencies (listed above in the "Depends" section) are installed. **Note** - you should pay careful attention at this stage to any R messages in your terminal alerting you that you need to update existing or install new R packages. These messages usually show up after you attempt to run install.packages() and require you 
 35 | to respond in your terminal to a y/n or multiple choice question before the install can continue.
 36 | 
 37 | Or 2) you can also run or source the "install_dependencies.R" script located in inst/scripts which should also install all of the required dependencies.  **Note** - again you will need to watch for any R prompts to update packages in order for the install to proceed correctly.
 38 | 
 39 | Finally, to complete the installation you will also need to install the spectratrait package itself.  You can do this by copying and pasting the command below into your R or RStudio (preferred) terminal.
 40 | 
 41 | #### To install the main (default) branch version
 42 | ``` r
 43 | devtools::install_github(repo = "plantphys/spectratrait", dependencies=TRUE)
 44 | ```
 45 | <br>
 46 | 
 47 | #### To install the main branch version - with Vignettes (though slower)
 48 | ``` r
 49 | devtools::install_github(repo = "plantphys/spectratrait", dependencies=TRUE, build_vignettes = TRUE)
 50 | ```
 51 | <br>
 52 | 
 53 | #### To install a specific release, for example release 1.0.5
 54 | ``` r
 55 | devtools::install_github(repo = "plantphys/spectratrait@v1.0.5", dependencies=TRUE)
 56 | ```
 57 | 
 58 | <br>
 59 | 
 60 | #### Or install a specific branch, e.g. a branch named devbranch
 61 | ``` r
 62 | devtools::install_github(repo = "plantphys/spectratrait", ref = "devbranch", dependencies=TRUE)
 63 | ```
 64 | 
 65 | <br>
 66 | 
 67 | ## Contains:
 68 | 1. Core package functions are located in the in the main "R" folder
 69 | 2. inst/scripts contains example PLSR workflows for fitting example leaf and canopy spectra-trait PLSR models for different leaf traits, including LMA and foliar    nitrogen
 70 | 3. Example datasets that can be loaded in your R environment using the base load() function can be found in the data/ folder
 71 | 4. man - the manual pages that are accessible in R 
 72 | 5. tests - package tests to check that functions are still operational and produce the expected results
 73 | 6. vignettes - example Rmarkdown and github markdown vignettes illustrating the various PLSR model fitting examples. These can be used to learn how to use the      PLSR workflow and associated functions for new applications
 74 | 7. spectratrait_X.X.X.pdf (where X.X.X is the current release number) is the pdf documentation
 75 | 
 76 | ### Linked dataset citations, DOIs, and EcoSIS IDs/URLs: <br>
 77 | 1) Leaf spectra, structural and biochemical leaf traits of eight crop species (Ely et al., 2019) <br>
 78 | EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species <br>
 79 | EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9 <br>
 80 | DOI: https://doi.org/doi:10.21232/C2GM2Z <br>
 81 | Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/spectratrait_ex1 <br>
 82 | Rpubs LeafN bootstrap by group (species) example output: https://rpubs.com/sserbin/spectratrait_ex2 <br>
 83 | 
 84 | 2) Leaf reflectance plant functional gradient IFGG/KIT <br>
 85 | Target variable: SLA <br>
 86 | EcoSIS URL: https://ecosis.org/package/leaf-reflectance-plant-functional-gradient-ifgg-kit <br>
 87 | EcoSIS ID: 3cf6b27e-d80e-4bc7-b214-c95506e46daa <br>
 88 | Rpubs example output: https://rpubs.com/sserbin/spectratrait_ex3 <br>
 89 | 
 90 | 3) Fresh leaf spectra to estimate LMA over NEON domains in eastern United States <br>
 91 | Target variable: LMA <br>
 92 | EcoSIS URL: https://ecosis.org/package/fresh-leaf-spectra-to-estimate-lma-over-neon-domains-in-eastern-united-states <br>
 93 | EcoSIS ID: 5617da17-c925-49fb-b395-45a51291bd2d <br>
 94 | DOI: https://doi.org/doi:10.21232/9831-rq60 <br>
 95 | Rpubs example output: https://rpubs.com/sserbin/spectratrait_ex4 <br>
 96 | Rpubs example showing Serbin et al. (2019) applied to NEON data: https://rpubs.com/sserbin/spectratrait_ex9 <br>
 97 | 
 98 | 4) Canopy spectra to map foliar functional traits over NEON domains in eastern United States <br>
 99 | Target variable: leaf nitrogen <br>
100 | EcoSIS URL: https://ecosis.org/package/canopy-spectra-to-map-foliar-functional-traits-over-neon-domains-in-eastern-united-states <br>
101 | EcoSIS ID: b9dbf3db-5b9c-4ab2-88c2-26c8b39d0903 <br>
102 | DOI: https://doi.org/doi:10.21232/e2jt-5209 <br>
103 | Rpubs leaf nitrogen example output: https://rpubs.com/sserbin/spectratrait_ex5 <br>
104 | 
105 | 5) Leaf spectra of 36 species growing in Rosa rugosa invaded coastal grassland communities in Belgium <br>
106 | Target variable: LMA, leaf nitrogen <br>
107 | EcoSIS URL: https://ecosis.org/package/leaf-spectra-of-36-species-growing-in-rosa-rugosa-invaded-coastal-grassland-communities-in-belgium <br>
108 | EcoSIS ID: 9db4c5a2-7eac-4e1e-8859-009233648e89 <br>
109 | DOI: https://doi.org/doi:10.21232/9nr6-sq54 <br>
110 | Rpubs LeafN example output: https://rpubs.com/sserbin/spectratrait_ex6 <br>
111 | Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/spectratrait_ex7 <br>
112 | Rpubs LMA example output: https://rpubs.com/sserbin/spectratrait_ex8 <br>
113 | 
114 | ## Build status
115 | Auto-run PLSR example:
116 | [![.github/workflows/run_plsr_example_auto.yaml](https://github.com/plantphys/spectratrait/actions/workflows/run_plsr_example_auto.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/run_plsr_example_auto.yaml) <br>
117 | CI run PLSR example:
118 | [![ci-run_PLSR_example](https://github.com/plantphys/spectratrait/actions/workflows/ci-run_plsr_example.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/ci-run_plsr_example.yaml) <br>
119 | CI OS and R Release Checks:
120 | [![R-CMD-check-OS-R](https://github.com/plantphys/spectratrait/actions/workflows/check-os.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/check-os.yaml) <br>
121 | Weekly CI Checks:
122 | [![R-CMD-check-Weekly](https://github.com/plantphys/spectratrait/actions/workflows/ci-weekly.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/ci-weekly.yaml) <br>
123 | EcoSIS API Check:
124 | [![run_ecosis_pull_example](https://github.com/plantphys/spectratrait/actions/workflows/run_ecosis_pull_example.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/run_ecosis_pull_example.yaml)
125 | 


--------------------------------------------------------------------------------
/vignettes/sserbin2019_plsr_ex9.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: An example showing how to apply an existing PLSR model to new data. In this case applying the LMA model from Serbin et al., (2019; DOI - 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites 
  3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   github_document: default
  7 |   html_notebook: default
  8 |   pdf_document: default
  9 |   html_document:
 10 |     df_print: paged
 11 |     keep_md: true
 12 |   rmarkdown: html_vignette
 13 | vignette: >
 14 |   %\VignetteIndexEntry{An example showing how to apply an existing PLSR model to new data. In this case applying the LMA model from Serbin et al., (2019; DOI - 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites}
 15 |   %\usepackage[utf8]{inputenc}
 16 |   %\VignetteEngine{knitr::knitr}
 17 | ---
 18 | 
 19 | ```{r setup, include=FALSE, echo=FALSE}
 20 | knitr::opts_chunk$set(echo = TRUE)
 21 | ```
 22 | 
 23 | ### Getting Started
 24 | ### Load libraries
 25 | ```{r, eval=TRUE, echo=TRUE}
 26 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 27 |                       "spectratrait")
 28 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 29 | ```
 30 | 
 31 | ### Setup other functions and options
 32 | ```{r, echo=TRUE}
 33 | ### Setup options
 34 | 
 35 | # Script options
 36 | pls::pls.options(plsralg = "oscorespls")
 37 | pls::pls.options("plsralg")
 38 | 
 39 | # Default par options
 40 | opar <- par(no.readonly = T)
 41 | 
 42 | # What is the target variable?
 43 | inVar <- "LMA_gDW_m2"
 44 | 
 45 | # What is the source dataset from EcoSIS?
 46 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d"
 47 | 
 48 | # Specify output directory, output_dir 
 49 | # Options: 
 50 | # tempdir - use a OS-specified temporary directory 
 51 | # user defined PATH - e.g. "~/scratch/PLSR"
 52 | output_dir <- "tempdir"
 53 | ```
 54 | 
 55 | ### Set working directory (scratch space)
 56 | ```{r, echo=FALSE}
 57 | if (output_dir=="tempdir") {
 58 |   outdir <- tempdir()
 59 | } else {
 60 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 61 |   outdir <- file.path(path.expand(output_dir))
 62 | }
 63 | setwd(outdir) # set working directory
 64 | getwd()  # check wd
 65 | ```
 66 | 
 67 | ### Grab PLSR Coefficients from GitHub
 68 | ```{r, echo=TRUE}
 69 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/"
 70 | print("**** Downloading PLSR coefficients ****")
 71 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv")
 72 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL)
 73 | rm(githubURL)
 74 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv")
 75 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL)
 76 | rm(githubURL)
 77 | ```
 78 | 
 79 | ### Get source dataset from EcoSIS
 80 | ```{r, echo=TRUE}
 81 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 82 | head(dat_raw)
 83 | names(dat_raw)[1:40]
 84 | ```
 85 | 
 86 | ### Prepare new data for estimation
 87 | ```{r, echo=TRUE}
 88 | Start.wave <- 500
 89 | End.wave <- 2400
 90 | wv <- seq(Start.wave,End.wave,1)
 91 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
 92 | colnames(Spectra) <- c(paste0("Wave_",wv))
 93 | head(Spectra)[1:6,1:10]
 94 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 95 | head(sample_info)
 96 | 
 97 | sample_info2 <- sample_info %>%
 98 |   select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA)
 99 | head(sample_info2)
100 | 
101 | plsr_data <- data.frame(sample_info2,Spectra)
102 | rm(sample_info,sample_info2,Spectra)
103 | ```
104 | 
105 | #### Example data cleaning. 
106 | ```{r, echo=TRUE}
107 | #### End user needs to do what's appropriate for their data.  This may be an iterative process.
108 | # Keep only complete rows of inVar and spec data before fitting
109 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
110 |                                                   c(inVar,paste0("Wave_",wv))]),]
111 | ```
112 | 
113 | #### Prepare PLSR model
114 | ```{r, echo=TRUE}
115 | print("**** Applying PLSR model to estimate LMA from spectral observations ****")
116 | # setup model
117 | dims <- dim(LeafLMA.plsr.coeffs)
118 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,]
119 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],])
120 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs")
121 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2])
122 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 
123 |                                                    paste0("Wave_",seq(Start.wave,End.wave,1)))])
124 | ```
125 | 
126 | #### Apply PLSR model
127 | ```{r, echo=TRUE}
128 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2]
129 | leafLMA <- plsr_pred[,1]^2  # convert to standard LMA units from sqrt(LMA)
130 | names(leafLMA) <- "PLSR_LMA_gDW_m2"
131 | 
132 | # organize output
133 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 
134 |                                                       paste0("Wave_",seq(Start.wave,End.wave,1)))],
135 |                                    PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar])
136 | head(LeafLMA.PLSR.dataset)
137 | ```
138 | 
139 | #### Generate PLSR uncertainty estimates
140 | ```{r, echo=TRUE}
141 | print("**** Generate PLSR uncertainty estimates ****")
142 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]])
143 | jk_coef <- t(jk_coef)
144 | head(jk_coef)[,1:6]
145 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2])
146 | head(jk_int)[,1:6]
147 | 
148 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 
149 |                                                     byrow=TRUE, ncol=length(jk_int))
150 | jk_pred <- jk_pred^2
151 | head(jk_pred)[,1:6]
152 | dim(jk_pred)
153 | interval <- c(0.025,0.975)
154 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 
155 |                        probs=c(interval[1], interval[2]))
156 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd)
157 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals)
158 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
159 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,]
160 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,]
161 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot
162 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot
163 | head(LeafLMA.PLSR.dataset)
164 | ```
165 | 
166 | #### Generate PLSR estimated LMA observed vs predicted plot
167 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE}
168 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 
169 |                                               inVar = inVar, 
170 |                                               residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 
171 |                                               range="full")
172 | RMSEP <- rmsep_percrmsep$rmse
173 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
174 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~
175 |                          LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2)
176 | expr <- vector("expression", 3)
177 | expr[[1]] <- bquote(R^2==.(r2))
178 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
179 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
180 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI))
181 | par(mfrow=c(1,1), mar=c(4,5.3,1,0.4), oma=c(0.1, 0.1, 0.1, 0.2))
182 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
183 |                 li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 
184 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
185 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
186 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
187 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
188 |                 cex.axis=1.5,cex.lab=1.8)
189 | abline(0,1,lty=2,lw=2)
190 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
191 |                 li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 
192 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
193 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
194 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
195 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
196 |                 cex.axis=1.5,cex.lab=1.8, add=T)
197 | legend("topleft", legend=expr, bty="n", cex=1.5)
198 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
199 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
200 | box(lwd=2.2)
201 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
202 |          height=2800, width=3200,  res=340)
203 | dev.off();
204 | ```
205 | ```{r, echo=TRUE}
206 | print(paste("Output directory: ", outdir))
207 | 
208 | # Observed versus predicted
209 | write.csv(LeafLMA.PLSR.dataset,file=file.path(outdir,
210 |                                          paste0(inVar,'_PLSR_Estimates.csv')),
211 |           row.names=FALSE)
212 | ```
213 | 
214 | ### Confirm files were written to temp space
215 | ```{r, echo=TRUE}
216 | print("**** PLSR output files: ")
217 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
218 | ```
219 | 


--------------------------------------------------------------------------------
/inst/scripts/apply_sserbin2019_lma_plsr_to_neon_example.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################
  2 | #
  3 | #  
  4 | #    Notes:
  5 | #    * The author notes the code is not the most elegant or clean, but is functional 
  6 | #    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
  7 | #    * Code is provided under GNU General Public License v3.0 
  8 | #
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #--------------------------------------------------------------------------------------------------#
 13 | ### Load libraries
 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 15 |                       "spectratrait")
 16 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 17 | #--------------------------------------------------------------------------------------------------#
 18 | 
 19 | 
 20 | #--------------------------------------------------------------------------------------------------#
 21 | ### Setup options
 22 | 
 23 | # Default par options
 24 | opar <- par(no.readonly = T)
 25 | 
 26 | # What is the target variable?
 27 | inVar <- "LMA_gDW_m2"
 28 | 
 29 | # What is the source dataset from EcoSIS?
 30 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d"
 31 | 
 32 | # Specify output directory, output_dir 
 33 | # Options: 
 34 | # tempdir - use a OS-specified temporary directory 
 35 | # user defined PATH - e.g. "~/scratch/PLSR"
 36 | output_dir <- "tempdir"
 37 | #--------------------------------------------------------------------------------------------------#
 38 | 
 39 | 
 40 | #--------------------------------------------------------------------------------------------------#
 41 | ### Set working directory
 42 | if (output_dir=="tempdir") {
 43 |   outdir <- tempdir()
 44 | } else {
 45 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 46 |   outdir <- file.path(path.expand(output_dir))
 47 | }
 48 | setwd(outdir) # set working directory
 49 | getwd()  # check wd
 50 | #--------------------------------------------------------------------------------------------------#
 51 | 
 52 | 
 53 | #--------------------------------------------------------------------------------------------------#
 54 | ### PLSR Coefficients - Grab from GitHub
 55 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/"
 56 | print("**** Downloading PLSR coefficients ****")
 57 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv")
 58 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL)
 59 | rm(githubURL)
 60 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv")
 61 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL)
 62 | rm(githubURL)
 63 | #--------------------------------------------------------------------------------------------------#
 64 | 
 65 | 
 66 | #--------------------------------------------------------------------------------------------------#
 67 | ### Get source dataset from EcoSIS
 68 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 69 | head(dat_raw)
 70 | names(dat_raw)[1:40]
 71 | #--------------------------------------------------------------------------------------------------#
 72 | 
 73 | 
 74 | #--------------------------------------------------------------------------------------------------#
 75 | ### Prepare new data for estimation
 76 | Start.wave <- 500
 77 | End.wave <- 2400
 78 | wv <- seq(Start.wave,End.wave,1)
 79 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
 80 | colnames(Spectra) <- c(paste0("Wave_",wv))
 81 | head(Spectra)[1:6,1:10]
 82 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 83 | head(sample_info)
 84 | 
 85 | sample_info2 <- sample_info %>%
 86 |   select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA)
 87 | head(sample_info2)
 88 | 
 89 | plsr_data <- data.frame(sample_info2,Spectra)
 90 | rm(sample_info,sample_info2,Spectra)
 91 | #--------------------------------------------------------------------------------------------------#
 92 | 
 93 | 
 94 | #--------------------------------------------------------------------------------------------------#
 95 | #### Example data cleaning.  End user needs to do what's appropriate for their 
 96 | #### data.  This may be an iterative process.
 97 | # Keep only complete rows of inVar and spec data before fitting
 98 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
 99 |                                                   c(inVar,paste0("Wave_",wv))]),]
100 | #--------------------------------------------------------------------------------------------------#
101 | 
102 | 
103 | #--------------------------------------------------------------------------------------------------#
104 | print("**** Applying PLSR model to estimate LMA from spectral observations ****")
105 | # setup model
106 | dims <- dim(LeafLMA.plsr.coeffs)
107 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,]
108 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],])
109 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs")
110 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2])
111 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 
112 |                                                    paste0("Wave_",seq(Start.wave,End.wave,1)))])
113 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2]
114 | leafLMA <- plsr_pred[,1]^2  # convert to standard LMA units from sqrt(LMA)
115 | names(leafLMA) <- "PLSR_LMA_gDW_m2"
116 | 
117 | # organize output
118 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 
119 |                                                       paste0("Wave_",seq(Start.wave,End.wave,1)))],
120 |                                    PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar])
121 | head(LeafLMA.PLSR.dataset)
122 | #--------------------------------------------------------------------------------------------------#
123 | 
124 | 
125 | #--------------------------------------------------------------------------------------------------#
126 | print("**** Generate PLSR uncertainty estimates ****")
127 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]])
128 | jk_coef <- t(jk_coef)
129 | head(jk_coef)[,1:6]
130 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2])
131 | head(jk_int)[,1:6]
132 | 
133 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 
134 |                                          byrow=TRUE, ncol=length(jk_int))
135 | jk_pred <- jk_pred^2
136 | head(jk_pred)[,1:6]
137 | dim(jk_pred)
138 | interval <- c(0.025,0.975)
139 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 
140 |                        probs=c(interval[1], interval[2]))
141 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd)
142 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals)
143 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
144 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,]
145 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,]
146 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot
147 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot
148 | head(LeafLMA.PLSR.dataset)
149 | #--------------------------------------------------------------------------------------------------#
150 | 
151 | 
152 | #--------------------------------------------------------------------------------------------------#
153 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 
154 |                                               inVar = inVar, 
155 |                                               residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 
156 |                                               range="full")
157 | RMSEP <- rmsep_percrmsep$rmse
158 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
159 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~
160 |                          LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2)
161 | expr <- vector("expression", 3)
162 | expr[[1]] <- bquote(R^2==.(r2))
163 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
164 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
165 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI))
166 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
167 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
168 |                 li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 
169 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
170 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
171 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
172 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
173 |                 cex.axis=1.5,cex.lab=1.8)
174 | abline(0,1,lty=2,lw=2)
175 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
176 |        li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 
177 |        lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
178 |        err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
179 |        cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
180 |        ylab=paste0("Observed ", paste(inVar), " (units)"),
181 |        cex.axis=1.5,cex.lab=1.8, add=T)
182 | legend("topleft", legend=expr, bty="n", cex=1.5)
183 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
184 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
185 | box(lwd=2.2)
186 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
187 |          height=2800, width=3200,  res=340)
188 | dev.off();
189 | #--------------------------------------------------------------------------------------------------#
190 | 
191 | 
192 | #--------------------------------------------------------------------------------------------------#
193 | ### EOF


--------------------------------------------------------------------------------
/inst/scripts/apply_sserbin2014_leafN_plsr_to_ext_data_example.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################
  2 | #
  3 | #  
  4 | #    Notes:
  5 | #    * The author notes the code is not the most elegant or clean, but is functional 
  6 | #    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
  7 | #    * Code is provided under GNU General Public License v3.0 
  8 | #
  9 | ####################################################################################################
 10 | 
 11 | 
 12 | #--------------------------------------------------------------------------------------------------#
 13 | ### Load libraries
 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 15 |                       "spectratrait")
 16 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 17 | #--------------------------------------------------------------------------------------------------#
 18 | 
 19 | 
 20 | #--------------------------------------------------------------------------------------------------#
 21 | ### Setup options
 22 | 
 23 | # Default par options
 24 | opar <- par(no.readonly = T)
 25 | 
 26 | # Specify output directory, output_dir 
 27 | # Options: 
 28 | # tempdir - use a OS-specified temporary directory 
 29 | # user defined PATH - e.g. "~/scratch/PLSR"
 30 | output_dir <- "tempdir"
 31 | #--------------------------------------------------------------------------------------------------#
 32 | 
 33 | 
 34 | #--------------------------------------------------------------------------------------------------#
 35 | ### Set ecosis dataset
 36 | # https://ecosis.org/package/fresh-leaf-spectra-to-estimate-leaf-traits-for-california-ecosystems
 37 | # title: Fresh Leaf Spectra to Estimate Leaf Traits for California Ecosystems
 38 | 
 39 | # What is the target variable?
 40 | inVar <- "Nmass_g_g" # unclear if N in this dataset is Nmass or Narea. Assuming Nmass
 41 | 
 42 | # What is the source dataset from EcoSIS?
 43 | ecosis_id <- "0fadcc45-f79e-4fd3-a6ca-8afaf26ae299"
 44 | #--------------------------------------------------------------------------------------------------#
 45 | 
 46 | 
 47 | #--------------------------------------------------------------------------------------------------#
 48 | ### Set working directory
 49 | if (output_dir=="tempdir") {
 50 |   outdir <- tempdir()
 51 | } else {
 52 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 53 |   outdir <- file.path(path.expand(output_dir))
 54 | }
 55 | setwd(outdir) # set working directory
 56 | getwd()  # check wd
 57 | #--------------------------------------------------------------------------------------------------#
 58 | 
 59 | 
 60 | #--------------------------------------------------------------------------------------------------#
 61 | ### Grab ecosis data
 62 | print(paste0("Output directory: ",getwd()))  # check wd
 63 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 64 | #--------------------------------------------------------------------------------------------------#
 65 | 
 66 | 
 67 | #--------------------------------------------------------------------------------------------------#
 68 | ### PLSR Coefficients - Grab from GitHub
 69 | git_repo <- "https://raw.githubusercontent.com/serbinsh/NASA_FFT_Leaf_Spectra-Trait_Models/refs/heads/master/"
 70 | print("**** Downloading PLSR coefficients ****")
 71 | githubURL <- paste0(git_repo,"PLSR_model_coefficients/leaf_Nitrogen/FFT_Leaf_Nitrogen_PLSR_Coefficients_11comp.csv")
 72 | LeafN.plsr.coeffs <- spectratrait::source_GitHubData(githubURL)
 73 | rm(githubURL)
 74 | githubURL <- paste0(git_repo,"PLSR_model_coefficients/leaf_Nitrogen/FFT_Leaf_Nitrogen_Jackkife_PLSR_Coefficients.csv")
 75 | LeafN.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL)
 76 | rm(githubURL)
 77 | #--------------------------------------------------------------------------------------------------#
 78 | 
 79 | 
 80 | #--------------------------------------------------------------------------------------------------#
 81 | ### Setup target spectral and trait data
 82 | ### Create plsr dataset
 83 | Start.wave <- 1500
 84 | End.wave <- 2400
 85 | wv <- seq(Start.wave,End.wave,1)
 86 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
 87 | colnames(Spectra) <- c(paste0("Wave_",wv))
 88 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 89 | head(sample_info)
 90 | 
 91 | sample_info2 <- sample_info %>%
 92 |   select(Plant_Species=`species`, Nmass_g_g=`Leaf nitrogen content per leaf dry mass`)
 93 | head(sample_info2)
 94 | 
 95 | plsr_data <- data.frame(sample_info2,Spectra)
 96 | rm(sample_info,sample_info2,Spectra)
 97 | #--------------------------------------------------------------------------------------------------#
 98 | 
 99 | 
100 | #--------------------------------------------------------------------------------------------------#
101 | #### Example data cleaning.  End user needs to do what's appropriate for their 
102 | #### data.  This may be an iterative process.
103 | # Keep only complete rows of inVar and spec data before fitting
104 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
105 |                                                   c(inVar,paste0("Wave_",wv))]),]
106 | #--------------------------------------------------------------------------------------------------#
107 | 
108 | 
109 | #--------------------------------------------------------------------------------------------------#
110 | print("**** Applying PLSR model to estimate leaf N mass from spectral observations ****")
111 | # setup model
112 | dims <- dim(LeafN.plsr.coeffs)
113 | LeafN.plsr.intercept <- LeafN.plsr.coeffs[1,]
114 | LeafN.plsr.coeffs <- data.frame(LeafN.plsr.coeffs[2:dims[1],])
115 | names(LeafN.plsr.coeffs) <- c("wavelength","coefs")
116 | LeafN.plsr.coeffs.vec <- as.vector(LeafN.plsr.coeffs[,2])
117 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 
118 |                                           paste0("Wave_",seq(Start.wave,End.wave,1)))])
119 | #sub_spec <- sub_spec*0.01 # convert to 0-1
120 | plsr_pred <- as.matrix(sub_spec) %*% LeafN.plsr.coeffs.vec + LeafN.plsr.intercept[,2]
121 | leafN <- plsr_pred[,1]
122 | names(leafN) <- "PLSR_LeafN_g_g"
123 | 
124 | # organize output
125 | LeafN.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 
126 |                                                       paste0("Wave_",seq(Start.wave,End.wave,1)))],
127 |                                    PLSR_LeafN_g_g=leafN, PLSR_Residuals=leafN-plsr_data[,inVar])
128 | head(LeafN.PLSR.dataset)
129 | #--------------------------------------------------------------------------------------------------#
130 | 
131 | 
132 | #--------------------------------------------------------------------------------------------------#
133 | print("**** Generate PLSR uncertainty estimates ****")
134 | jk_coef <- data.frame(LeafN.plsr.jk.coeffs[,3:dim(LeafN.plsr.jk.coeffs)[2]])
135 | jk_coef <- t(jk_coef)
136 | head(jk_coef)[,1:6]
137 | jk_int <- t(LeafN.plsr.jk.coeffs[,2])
138 | head(jk_int)[,1:6]
139 | 
140 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 
141 |                                          byrow=TRUE, ncol=length(jk_int))
142 | jk_pred <- jk_pred^2
143 | head(jk_pred)[,1:6]
144 | dim(jk_pred)
145 | interval <- c(0.025,0.975)
146 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 
147 |                        probs=c(interval[1], interval[2]))
148 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd)
149 | sd_res <- sd(LeafN.PLSR.dataset$PLSR_Residuals)
150 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
151 | LeafN.PLSR.dataset$LCI <- Interval_Conf[1,]
152 | LeafN.PLSR.dataset$UCI <- Interval_Conf[2,]
153 | LeafN.PLSR.dataset$LPI <- LeafN.PLSR.dataset$PLSR_LeafN_g_g-1.96*sd_tot
154 | LeafN.PLSR.dataset$UPI <- LeafN.PLSR.dataset$PLSR_LeafN_g_g+1.96*sd_tot
155 | head(LeafN.PLSR.dataset)
156 | #--------------------------------------------------------------------------------------------------#
157 | 
158 | 
159 | #--------------------------------------------------------------------------------------------------#
160 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafN.PLSR.dataset, 
161 |                                               inVar = inVar, 
162 |                                               residuals = LeafN.PLSR.dataset$PLSR_Residuals, 
163 |                                               range="full")
164 | RMSEP <- rmsep_percrmsep$rmse
165 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
166 | r2 <- round(summary(lm(LeafN.PLSR.dataset$PLSR_LeafN_g_g~
167 |                          LeafN.PLSR.dataset[,inVar]))$adj.r.squared,2)
168 | expr <- vector("expression", 3)
169 | expr[[1]] <- bquote(R^2==.(r2))
170 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
171 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
172 | rng_vals <- c(min(LeafN.PLSR.dataset$LPI), max(LeafN.PLSR.dataset$UPI))
173 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
174 | plotrix::plotCI(LeafN.PLSR.dataset$PLSR_LeafN_g_g,LeafN.PLSR.dataset[,inVar], 
175 |                 li=LeafN.PLSR.dataset$LPI, ui=LeafN.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 
176 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
177 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
178 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
179 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
180 |                 cex.axis=1.5,cex.lab=1.8)
181 | abline(0,1,lty=2,lw=2)
182 | plotrix::plotCI(LeafN.PLSR.dataset$PLSR_LeafN_g_g,LeafN.PLSR.dataset[,inVar], 
183 |                 li=LeafN.PLSR.dataset$LCI, ui=LeafN.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 
184 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
185 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
186 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
187 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
188 |                 cex.axis=1.5,cex.lab=1.8, add=T)
189 | legend("topleft", legend=expr, bty="n", cex=1.5)
190 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
191 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
192 | box(lwd=2.2)
193 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
194 |          height=2800, width=3200,  res=340)
195 | dev.off();
196 | #--------------------------------------------------------------------------------------------------#
197 | 
198 | #--------------------------------------------------------------------------------------------------#
199 | ### EOF


--------------------------------------------------------------------------------
/inst/scripts/simple_spectra-trait_plsr_example.R:
--------------------------------------------------------------------------------
  1 | ####################################################################################################
  2 | #
  3 | #  
  4 | #   A simple example "How-to" script illustrating the use of basic PLSR modeling to develop a 
  5 | #   spectra-trait algorithm to estimate leaf mass area with leaf-level spectroscopy data. The 
  6 | #   example is built from published data source from the EcoSIS spectral database. 
  7 | #
  8 | #   Spectra and trait data source:
  9 | #   https://ecosis.org/package/fresh-leaf-spectra-to-estimate-lma-over-neon-domains-in-eastern-united-states
 10 | #
 11 | #    Notes:
 12 | #    * Provided as a basic example of how to apply the model to new spectra observations
 13 | #    * The author notes the code is not the most elegant or clean, but is functional 
 14 | #    * Questions, comments, or concerns can be sent to sserbin@bnl.gov
 15 | #    * Code is provided under GNU General Public License v3.0 
 16 | #
 17 | #
 18 | ####################################################################################################
 19 | 
 20 | 
 21 | #--------------------------------------------------------------------------------------------------#
 22 | ### Load libraries
 23 | list.of.packages <- c("pls","here","dplyr","plotrix","ggplot2","gridExtra","spectratrait")
 24 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 25 | #--------------------------------------------------------------------------------------------------#
 26 | 
 27 | 
 28 | #--------------------------------------------------------------------------------------------------#
 29 | ### Setup options
 30 | 
 31 | # Script options
 32 | pls::pls.options(plsralg = "oscorespls")
 33 | pls::pls.options("plsralg")
 34 | 
 35 | # Default par options
 36 | opar <- par(no.readonly = T)
 37 | 
 38 | # What is the target variable?
 39 | inVar <- "LMA_gDW_m2"
 40 | 
 41 | # What is the source dataset from EcoSIS?
 42 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d"
 43 | 
 44 | # Specify output directory, output_dir 
 45 | # Options: 
 46 | # tempdir - use a OS-specified temporary directory 
 47 | # user defined PATH - e.g. "~/scratch/PLSR"
 48 | output_dir <- "tempdir"
 49 | #--------------------------------------------------------------------------------------------------#
 50 | 
 51 | 
 52 | #--------------------------------------------------------------------------------------------------#
 53 | ### Set working directory
 54 | if (output_dir=="tempdir") {
 55 |   outdir <- tempdir()
 56 | } else {
 57 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 58 |   outdir <- file.path(path.expand(output_dir))
 59 | }
 60 | setwd(outdir) # set working directory
 61 | getwd()  # check wd
 62 | #--------------------------------------------------------------------------------------------------#
 63 | 
 64 | 
 65 | #--------------------------------------------------------------------------------------------------#
 66 | ### Get source dataset from EcoSIS
 67 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
 68 | head(dat_raw)
 69 | names(dat_raw)[1:40]
 70 | #--------------------------------------------------------------------------------------------------#
 71 | 
 72 | 
 73 | #--------------------------------------------------------------------------------------------------#
 74 | ### Create plsr dataset
 75 | Start.wave <- 500
 76 | End.wave <- 2400
 77 | wv <- seq(Start.wave,End.wave,1)
 78 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
 79 | colnames(Spectra) <- c(paste0("Wave_",wv))
 80 | head(Spectra)[1:6,1:10]
 81 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
 82 | head(sample_info)
 83 | 
 84 | sample_info2 <- sample_info %>%
 85 |   select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA)
 86 | head(sample_info2)
 87 | 
 88 | plsr_data <- data.frame(sample_info2,Spectra)
 89 | rm(sample_info,sample_info2,Spectra)
 90 | dim(plsr_data)
 91 | #--------------------------------------------------------------------------------------------------#
 92 | 
 93 | 
 94 | #--------------------------------------------------------------------------------------------------#
 95 | #### Example data cleaning.  End user needs to do what's appropriate for their 
 96 | #### data.  This may be an iterative process.
 97 | # Keep only complete rows of inVar and spec data before fitting
 98 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
 99 |                                                   c(inVar,paste0("Wave_",wv))]),]
100 | #--------------------------------------------------------------------------------------------------#
101 | 
102 | 
103 | #--------------------------------------------------------------------------------------------------#
104 | #### Prepare data for fitting
105 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))])
106 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))],
107 |                             Spectra=I(spec))
108 | head(plsr_data)[1:5]
109 | #--------------------------------------------------------------------------------------------------#
110 | 
111 | 
112 | #--------------------------------------------------------------------------------------------------#
113 | ### plot cal and val spectra
114 | par(mfrow=c(1,1)) # B, L, T, R
115 | spectratrait::f.plot.spec(Z=plsr_data$Spectra,wv=seq(Start.wave,End.wave,1),
116 |                           plot_label="CONUS NEON Data")
117 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
118 |          height=2500,width=4900, res=340)
119 | dev.off();
120 | par(mfrow=c(1,1))
121 | #--------------------------------------------------------------------------------------------------#
122 | 
123 | 
124 | #--------------------------------------------------------------------------------------------------#
125 | ### Use permutation to determine the optimal number of components
126 | if(grepl("Windows", sessionInfo()$running)){
127 |   pls.options(parallel = NULL)
128 | } else {
129 |   pls.options(parallel = parallel::detectCores()-1)
130 | }
131 | 
132 | method <- "firstPlateau" #pls, firstPlateau, firstMin
133 | random_seed <- 2356812
134 | seg <- 250
135 | maxComps <- 20
136 | iterations <- 40
137 | prop <- 0.70
138 | if (method=="pls") {
139 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
140 |                                                   method=method, maxComps=maxComps, seg=seg, 
141 |                                                   random_seed=random_seed)
142 |   print(paste0("*** Optimal number of components: ", nComps))
143 | } else {
144 |   nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 
145 |                                                   method=method, maxComps=maxComps, iterations=iterations, 
146 |                                                   seg=seg, prop=prop, random_seed=random_seed)
147 | }
148 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
149 |          height=2800, width=3400,  res=340)
150 | dev.off();
151 | #--------------------------------------------------------------------------------------------------#
152 | 
153 | 
154 | #--------------------------------------------------------------------------------------------------#
155 | ### Fit final model
156 | segs <- 100
157 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="CV",
158 |                  segments=segs, segment.type="interleaved",trace=FALSE,data=plsr_data)
159 | fit <- plsr.out$fitted.values[,1,nComps]
160 | pls.options(parallel = NULL)
161 | 
162 | # External validation fit stats
163 | par(mfrow=c(1,2)) # B, L, T, R
164 | pls::RMSEP(plsr.out)
165 | plot(pls::RMSEP(plsr.out), main="MODEL RMSEP",
166 |      xlab="Number of Components",ylab="Model RMSEP",lty=1,col="black",cex=1.5,lwd=2)
167 | box(lwd=2.2)
168 | 
169 | pls::R2(plsr.out)
170 | plot(pls::R2(plsr.out), main="MODEL R2",
171 |      xlab="Number of Components",ylab="Model R2",lty=1,col="black",cex=1.5,lwd=2)
172 | box(lwd=2.2)
173 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_RMSEP_R2_by_Component.png"))), 
174 |          height=2800, width=4800,  res=340)
175 | dev.off();
176 | par(opar)
177 | #--------------------------------------------------------------------------------------------------#
178 | 
179 | 
180 | #--------------------------------------------------------------------------------------------------#
181 | ### PLSR fit observed vs. predicted plot data
182 | plsr_data.output <- data.frame(plsr_data[, which(names(plsr_data) %notin% "Spectra")], 
183 |                               PLSR_Predicted=fit,
184 |                               PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
185 | plsr_data.output <- plsr_data.output %>%
186 |   mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
187 | head(plsr_data.output)
188 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
189 | cal.RMSEP <- round(sqrt(mean(plsr_data.output$PLSR_CV_Residuals^2)),2)
190 | 
191 | rng_quant <- quantile(plsr_data.output[,inVar], probs = c(0.001, 0.999))
192 | cal_scatter_plot <- ggplot(plsr_data.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
193 |   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
194 |                                           linetype="dashed", linewidth=1.5) + 
195 |   xlim(rng_quant[1], rng_quant[2]) + 
196 |   ylim(rng_quant[1], rng_quant[2]) +
197 |   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
198 |        y=paste0("Observed ", paste(inVar), " (units)"),
199 |        title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", cal.RMSEP))) +
200 |   theme(axis.text=element_text(size=18), legend.position="none",
201 |         axis.title=element_text(size=20, face="bold"), 
202 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
203 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
204 | 
205 | cal_resid_histogram <- ggplot(plsr_data.output, aes(x=PLSR_CV_Residuals)) +
206 |   geom_histogram(alpha=.5, position="identity") + 
207 |   geom_vline(xintercept = 0, color="black", 
208 |              linetype="dashed", linewidth=1) + theme_bw() + 
209 |   theme(axis.text=element_text(size=18), legend.position="none",
210 |         axis.title=element_text(size=20, face="bold"), 
211 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
212 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
213 | 
214 | # plot cal/val side-by-side
215 | scatterplots <- grid.arrange(cal_scatter_plot, cal_resid_histogram, nrow=2, ncol=1)
216 | ggsave(paste0(inVar,"Obs_vs_Pred_scatterplot.png"), plot = scatterplots, device="png", 
217 |        width = 32, 
218 |        height = 30, units = "cm",
219 |        dpi = 300)
220 | #--------------------------------------------------------------------------------------------------#
221 | 
222 | 
223 | 
224 | #--------------------------------------------------------------------------------------------------#
225 | ### eof


--------------------------------------------------------------------------------
/R/find_optimal_components.R:
--------------------------------------------------------------------------------
  1 | ##' Applies different methods for the determination of the optimal number of PLSR model components
  2 | ##' 
  3 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
  4 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
  5 | ##' Usually the "inVar" variable set at the beginning of a PLS script
  6 | ##' @param method Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin
  7 | ##' @param maxComps maximum number of components to consider
  8 | ##' @param iterations how many different permutations to run
  9 | ##' @param seg For the built-in pls method, how many different data segments to select from the input dataset
 10 | ##' @param prop proportion of data to preserve for each permutation
 11 | ##' @param random_seed random seed to use for splitting data
 12 | ##' 
 13 | ##' @importFrom stats as.formula coef predict quantile t.test
 14 | ##' @importFrom pls plsr selectNcomp
 15 | ##' @importFrom reshape2 melt
 16 | ##' @import ggplot2
 17 | ##' 
 18 | ##' @return nComps the optimal number of PLSR components
 19 | ##' 
 20 | ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin
 21 | ##' @export 
 22 | find_optimal_components <- function(dataset=NULL, targetVariable=NULL, method="pls", maxComps=20, 
 23 |                                     iterations=20, seg=100, prop=0.70, random_seed=123456789) {
 24 |   
 25 |   set.seed(random_seed)
 26 |   inVar <- targetVariable
 27 |   print("*** Identifying optimal number of PLSR components ***")
 28 |   
 29 |   if(method=="pls") {
 30 |     print("*** Running PLS permutation test ***")
 31 |     
 32 |     plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, ncomp=maxComps, 
 33 |                           validation="CV", segments = seg, segment.type="interleaved", trace=FALSE, 
 34 |                           jackknife=TRUE, data=dataset)
 35 |     nComps <- pls::selectNcomp(plsr.out, method = "onesigma", plot = TRUE)
 36 |   }
 37 |   if(method=="firstPlateau") {
 38 |     press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 
 39 |                                                iterations=iterations, prop=prop)
 40 |     # PRESS plot
 41 |     pressDF <- as.data.frame(press.out$PRESS)
 42 |     names(pressDF) <- as.character(seq(maxComps))
 43 |     pressDFres <- reshape2::melt(pressDF)
 44 |     results <- NULL
 45 |     for(i in 1:(maxComps-1)){
 46 |       p_value <- t.test(press.out$PRESS[,i], press.out$PRESS[,(i+1)])$p.value
 47 |       temp_results <- data.frame(Component=(i+1), P.value= round(p_value, 6))
 48 |       results <- rbind(results, temp_results)
 49 |     }
 50 |     nComps <- min(results[results$P.value > 0.05, "Component"])
 51 |     print(paste0("*** Optimal number of components based on t.test: ", nComps))
 52 |     bp <- ggplot(pressDFres, aes(x=variable, y=value)) + theme_bw() + 
 53 |       geom_boxplot(notch=FALSE) + labs(x="Number of Components", y="PRESS") + 
 54 |       stat_boxplot(geom = "errorbar", width = 0.2) +
 55 |       geom_vline(xintercept = nComps, linetype="dashed", 
 56 |                  color = "blue", size=1.0)
 57 |     theme(axis.text=element_text(size=18), legend.position="none",
 58 |           axis.title=element_text(size=20, face="bold"), 
 59 |           axis.text.x = element_text(angle = 0,vjust = 0.5),
 60 |           panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
 61 |     print(bp)
 62 |   }
 63 |   if(method=="firstMin") {
 64 |     press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 
 65 |                                                iterations=iterations, prop=prop)
 66 |     # PRESS plot
 67 |     pressDF <- as.data.frame(press.out$PRESS)
 68 |     names(pressDF) <- as.character(seq(maxComps))
 69 |     pressDFres <- reshape2::melt(pressDF)
 70 |     # find lowest press
 71 |     mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean)
 72 |     lowest_PRESS <- which.min(mean_PRESS_comp)
 73 |     results <- as.vector(array(data="NA", dim=c(lowest_PRESS-1,1)))
 74 |     for (i in seq_along(1:(lowest_PRESS-1))) {
 75 |       comp1 <- i; comp2 <- lowest_PRESS
 76 |       ttest <- t.test(pressDFres$value[which(pressDFres$variable==comp1)],
 77 |                       pressDFres$value[which(pressDFres$variable==comp2)])
 78 |       #print(i)
 79 |       results[i] <- round(unlist(ttest$p.value),8)
 80 |     }
 81 |     results <- data.frame(seq(1,lowest_PRESS-1,1),results)
 82 |     names(results) <- c("Component", "P.value")
 83 |     first <- min(which(as.numeric(as.character(results$P.value)) > 0.05))
 84 |     nComps <- results$Component[first]
 85 |     print(paste0("*** Optimal number of components based on t.test: ", nComps))
 86 |     bp <- ggplot(pressDFres, aes(x=variable, y=value)) + theme_bw() + 
 87 |       geom_boxplot(notch=FALSE) + labs(x="Number of Components", y="PRESS") + 
 88 |       stat_boxplot(geom = "errorbar", width = 0.2) +
 89 |       geom_vline(xintercept = nComps, linetype="dashed", 
 90 |                  color = "blue", size=1.0)
 91 |     theme(axis.text=element_text(size=18), legend.position="none",
 92 |           axis.title=element_text(size=20, face="bold"), 
 93 |           axis.text.x = element_text(angle = 0,vjust = 0.5),
 94 |           panel.border = element_rect(linetype = "solid", fill = NA, size=1.5))
 95 |     print(bp)
 96 |   }
 97 |   return(nComps)
 98 | }
 99 | 
100 | ##' Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components,
101 | ##' by group (i.e. optimal selection by stratification)
102 | ##' 
103 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset
104 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 
105 | ##' Usually the "inVar" variable set at the beginning of a PLS script
106 | ##' @param method Which approach to use to find optimal components. Options: firstPlateau, firstMin
107 | ##' @param maxComps maximum number of components to consider
108 | ##' @param iterations how many different permutations to run
109 | ##' @param prop proportion of data to preserve for each permutation
110 | ##' @param random_seed random seed to use for splitting data
111 | ##' @param group_variables group_variables character vector of the form c("var1", "var2"..."varn") 
112 | ##' providing the factors used for stratified sampling. 
113 | ##' 
114 | ##' @importFrom stats as.formula coef predict quantile t.test
115 | ##' @import ggplot2
116 | ##' @importFrom reshape2 melt
117 | ##' 
118 | ##' @return nComps the optimal number of PLSR components
119 | ##' 
120 | ##' @author asierrl, Shawn P. Serbin
121 | ##' @export 
122 | find_optimal_comp_by_groups <- function (dataset = NULL, targetVariable = NULL, method = "firstPlateau",
123 |                                          maxComps = 20, iterations = 20, prop = 0.7, random_seed = 123456789, 
124 |                                          group_variables = NULL) {
125 |   set.seed(random_seed)
126 |   inVar <- targetVariable
127 |   # TODO - really should merge this with the original and have an if/else if not NULL and select either
128 |   # pls_permutation OR pls_permutation_by_groups.
129 |   print("*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***")
130 |   if (method == "pls") {
131 |     stop("*** Please select either the firstMin and firstPlateau. The pls package approach is not compatible ***")
132 |   }
133 |   if (method == "firstPlateau") {
134 |     press.out <- spectratrait::pls_permutation_by_groups(dataset=dataset, targetVariable=inVar,
135 |                                                     maxComps=maxComps, iterations=iterations, 
136 |                                                     prop=prop, group_variables=group_variables)
137 |     pressDF <- as.data.frame(press.out$PRESS)
138 |     names(pressDF) <- as.character(seq(maxComps))
139 |     pressDFres <- reshape2::melt(pressDF)
140 |     results <- NULL
141 |     for (i in 1:(maxComps - 1)) {
142 |       p_value <- t.test(press.out$PRESS[, i], press.out$PRESS[, (i + 1)])$p.value
143 |       temp_results <- data.frame(Component = (i + 1), P.value = round(p_value, 6))
144 |       results <- rbind(results, temp_results)
145 |     }
146 |     nComps <- min(results[results$P.value > 0.05, "Component"])
147 |     print(paste0("*** Optimal number of components based on t.test: ", nComps))
148 |     bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 
149 |       theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 
150 |                                                       y = "PRESS") + 
151 |       stat_boxplot(geom = "errorbar", width = 0.2) + 
152 |       geom_vline(xintercept = nComps, linetype = "dashed", 
153 |                  color = "blue", size = 1)
154 |     theme(axis.text = element_text(size = 18), legend.position = "none", 
155 |           axis.title = element_text(size = 20, face = "bold"), 
156 |           axis.text.x = element_text(angle = 0, vjust = 0.5), 
157 |           panel.border = element_rect(linetype = "solid", 
158 |                                       fill = NA, size = 1.5))
159 |     print(bp)
160 |   }
161 |   if (method == "firstMin") {
162 |     press.out <- spectratrait::pls_permutation_by_groups(dataset = dataset, targetVariable=inVar,
163 |                                                    maxComps=maxComps, iterations=iterations, 
164 |                                                    prop=prop, group_variables=group_variables)
165 |     pressDF <- as.data.frame(press.out$PRESS)
166 |     names(pressDF) <- as.character(seq(maxComps))
167 |     pressDFres <- reshape2::melt(pressDF)
168 |     mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean)
169 |     lowest_PRESS <- which.min(mean_PRESS_comp)
170 |     results <- as.vector(array(data = "NA", dim = c(lowest_PRESS - 1, 1)))
171 |     for (i in seq_along(1:(lowest_PRESS - 1))) {
172 |       comp1 <- i
173 |       comp2 <- lowest_PRESS
174 |       ttest <- t.test(pressDFres$value[which(pressDFres$variable == comp1)], 
175 |                       pressDFres$value[which(pressDFres$variable == comp2)])
176 |       results[i] <- round(unlist(ttest$p.value), 8)
177 |     }
178 |     results <- data.frame(seq(1, lowest_PRESS - 1, 1), results)
179 |     names(results) <- c("Component", "P.value")
180 |     first <- min(which(as.numeric(as.character(results$P.value)) > 0.05))
181 |     nComps <- results$Component[first]
182 |     print(paste0("*** Optimal number of components based on t.test: ", nComps))
183 |     bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 
184 |       theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 
185 |                                                       y = "PRESS") + 
186 |       stat_boxplot(geom = "errorbar", width = 0.2) + 
187 |       geom_vline(xintercept = nComps, linetype = "dashed", 
188 |                  color = "blue", size = 1)
189 |     theme(axis.text = element_text(size = 18), legend.position = "none", 
190 |           axis.title = element_text(size = 20, face = "bold"), 
191 |           axis.text.x = element_text(angle = 0, vjust = 0.5), 
192 |           panel.border = element_rect(linetype = "solid", 
193 |                                       fill = NA, size = 1.5))
194 |     print(bp)
195 |   }
196 |   return(nComps)
197 | }


--------------------------------------------------------------------------------
/vignettes/sserbin2019_plsr_ex9.md:
--------------------------------------------------------------------------------
  1 | An example showing how to apply an existing PLSR model to new data. In
  2 | this case applying the LMA model from Serbin et al., (2019; DOI -
  3 | 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites
  4 | ================
  5 | Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson
  6 | 2025-10-29
  7 | 
  8 | ### Getting Started
  9 | 
 10 | ### Load libraries
 11 | 
 12 | ``` r
 13 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra",
 14 |                       "spectratrait")
 15 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 16 | ```
 17 | 
 18 |     ## 
 19 |     ## Attaching package: 'pls'
 20 | 
 21 |     ## The following object is masked from 'package:stats':
 22 |     ## 
 23 |     ##     loadings
 24 | 
 25 |     ## 
 26 |     ## Attaching package: 'dplyr'
 27 | 
 28 |     ## The following objects are masked from 'package:stats':
 29 |     ## 
 30 |     ##     filter, lag
 31 | 
 32 |     ## The following objects are masked from 'package:base':
 33 |     ## 
 34 |     ##     intersect, setdiff, setequal, union
 35 | 
 36 |     ## here() starts at /Users/sserbin/Data/Github/spectratrait
 37 | 
 38 |     ## 
 39 |     ## Attaching package: 'gridExtra'
 40 | 
 41 |     ## The following object is masked from 'package:dplyr':
 42 |     ## 
 43 |     ##     combine
 44 | 
 45 | ### Setup other functions and options
 46 | 
 47 | ``` r
 48 | ### Setup options
 49 | 
 50 | # Script options
 51 | pls::pls.options(plsralg = "oscorespls")
 52 | pls::pls.options("plsralg")
 53 | ```
 54 | 
 55 |     ## $plsralg
 56 |     ## [1] "oscorespls"
 57 | 
 58 | ``` r
 59 | # Default par options
 60 | opar <- par(no.readonly = T)
 61 | 
 62 | # What is the target variable?
 63 | inVar <- "LMA_gDW_m2"
 64 | 
 65 | # What is the source dataset from EcoSIS?
 66 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d"
 67 | 
 68 | # Specify output directory, output_dir 
 69 | # Options: 
 70 | # tempdir - use a OS-specified temporary directory 
 71 | # user defined PATH - e.g. "~/scratch/PLSR"
 72 | output_dir <- "tempdir"
 73 | ```
 74 | 
 75 | ### Set working directory (scratch space)
 76 | 
 77 |     ## [1] "/private/var/folders/tq/tydmhlwn1bdf_0pmpcq70r2c0000gn/T/RtmpyEgmZw"
 78 | 
 79 | ### Grab PLSR Coefficients from GitHub
 80 | 
 81 | ``` r
 82 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/"
 83 | print("**** Downloading PLSR coefficients ****")
 84 | ```
 85 | 
 86 |     ## [1] "**** Downloading PLSR coefficients ****"
 87 | 
 88 | ``` r
 89 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv")
 90 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL)
 91 | rm(githubURL)
 92 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv")
 93 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL)
 94 | rm(githubURL)
 95 | ```
 96 | 
 97 | ### Get source dataset from EcoSIS
 98 | 
 99 | ``` r
100 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id)
101 | ```
102 | 
103 |     ## [1] "**** Downloading Ecosis data ****"
104 | 
105 |     ## Downloading data...
106 | 
107 |     ## Rows: 6312 Columns: 2162
108 |     ## ── Column specification ────────────────────────────────────────────────────────
109 |     ## Delimiter: ","
110 |     ## chr   (10): Affiliation, Common Name, Domain, Functional_type, Latin Genus, ...
111 |     ## dbl (2152): LMA, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361,...
112 |     ## 
113 |     ## ℹ Use `spec()` to retrieve the full column specification for this data.
114 |     ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
115 |     ## Download complete!
116 | 
117 | ``` r
118 | head(dat_raw)
119 | ```
120 | 
121 |     ## # A tibble: 6 × 2,162
122 |     ##   Affiliation           `Common Name` Domain Functional_type   LMA `Latin Genus`
123 |     ##   <chr>                 <chr>         <chr>  <chr>           <dbl> <chr>        
124 |     ## 1 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
125 |     ## 2 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
126 |     ## 3 University of Wiscon… black walnut  D02    broadleaf        60.8 Juglans      
127 |     ## 4 University of Wiscon… black walnut  D02    broadleaf        60.8 Juglans      
128 |     ## 5 University of Wiscon… black walnut  D02    broadleaf        85.9 Juglans      
129 |     ## 6 University of Wiscon… black walnut  D02    broadleaf        85.9 Juglans      
130 |     ## # ℹ 2,156 more variables: `Latin Species` <chr>, PI <chr>, Project <chr>,
131 |     ## #   Sample_ID <chr>, `USDA Symbol` <chr>, `350` <dbl>, `351` <dbl>,
132 |     ## #   `352` <dbl>, `353` <dbl>, `354` <dbl>, `355` <dbl>, `356` <dbl>,
133 |     ## #   `357` <dbl>, `358` <dbl>, `359` <dbl>, `360` <dbl>, `361` <dbl>,
134 |     ## #   `362` <dbl>, `363` <dbl>, `364` <dbl>, `365` <dbl>, `366` <dbl>,
135 |     ## #   `367` <dbl>, `368` <dbl>, `369` <dbl>, `370` <dbl>, `371` <dbl>,
136 |     ## #   `372` <dbl>, `373` <dbl>, `374` <dbl>, `375` <dbl>, `376` <dbl>, …
137 | 
138 | ``` r
139 | names(dat_raw)[1:40]
140 | ```
141 | 
142 |     ##  [1] "Affiliation"     "Common Name"     "Domain"          "Functional_type"
143 |     ##  [5] "LMA"             "Latin Genus"     "Latin Species"   "PI"             
144 |     ##  [9] "Project"         "Sample_ID"       "USDA Symbol"     "350"            
145 |     ## [13] "351"             "352"             "353"             "354"            
146 |     ## [17] "355"             "356"             "357"             "358"            
147 |     ## [21] "359"             "360"             "361"             "362"            
148 |     ## [25] "363"             "364"             "365"             "366"            
149 |     ## [29] "367"             "368"             "369"             "370"            
150 |     ## [33] "371"             "372"             "373"             "374"            
151 |     ## [37] "375"             "376"             "377"             "378"
152 | 
153 | ### Prepare new data for estimation
154 | 
155 | ``` r
156 | Start.wave <- 500
157 | End.wave <- 2400
158 | wv <- seq(Start.wave,End.wave,1)
159 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv])
160 | colnames(Spectra) <- c(paste0("Wave_",wv))
161 | head(Spectra)[1:6,1:10]
162 | ```
163 | 
164 |     ##      Wave_500 Wave_501 Wave_502 Wave_503 Wave_504 Wave_505 Wave_506 Wave_507
165 |     ## [1,] 0.044226 0.044605 0.044927 0.045473 0.046241 0.046878 0.047826 0.049090
166 |     ## [2,] 0.046855 0.047601 0.047944 0.048478 0.049381 0.050235 0.051161 0.052191
167 |     ## [3,] 0.043758 0.044171 0.044869 0.045465 0.045984 0.046933 0.047993 0.049090
168 |     ## [4,] 0.041154 0.041603 0.042088 0.042408 0.042639 0.043260 0.044140 0.045058
169 |     ## [5,] 0.037296 0.037944 0.038209 0.038677 0.039388 0.039948 0.040630 0.041501
170 |     ## [6,] 0.043878 0.044257 0.044723 0.045295 0.045949 0.046575 0.047378 0.048357
171 |     ##      Wave_508 Wave_509
172 |     ## [1,] 0.050268 0.051525
173 |     ## [2,] 0.053322 0.054357
174 |     ## [3,] 0.050168 0.051441
175 |     ## [4,] 0.045700 0.046476
176 |     ## [5,] 0.042613 0.043731
177 |     ## [6,] 0.049392 0.050387
178 | 
179 | ``` r
180 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)]
181 | head(sample_info)
182 | ```
183 | 
184 |     ## # A tibble: 6 × 11
185 |     ##   Affiliation           `Common Name` Domain Functional_type   LMA `Latin Genus`
186 |     ##   <chr>                 <chr>         <chr>  <chr>           <dbl> <chr>        
187 |     ## 1 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
188 |     ## 2 University of Wiscon… black walnut  D02    broadleaf        72.9 Juglans      
189 |     ## 3 University of Wiscon… black walnut  D02    broadleaf        60.8 Juglans      
190 |     ## 4 University of Wiscon… black walnut  D02    broadleaf        60.8 Juglans      
191 |     ## 5 University of Wiscon… black walnut  D02    broadleaf        85.9 Juglans      
192 |     ## 6 University of Wiscon… black walnut  D02    broadleaf        85.9 Juglans      
193 |     ## # ℹ 5 more variables: `Latin Species` <chr>, PI <chr>, Project <chr>,
194 |     ## #   Sample_ID <chr>, `USDA Symbol` <chr>
195 | 
196 | ``` r
197 | sample_info2 <- sample_info %>%
198 |   select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA)
199 | head(sample_info2)
200 | ```
201 | 
202 |     ## # A tibble: 6 × 5
203 |     ##   Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2
204 |     ##   <chr>  <chr>           <chr>     <chr>                  <dbl>
205 |     ## 1 D02    broadleaf       P0001     JUNI                    72.9
206 |     ## 2 D02    broadleaf       L0001     JUNI                    72.9
207 |     ## 3 D02    broadleaf       P0002     JUNI                    60.8
208 |     ## 4 D02    broadleaf       L0002     JUNI                    60.8
209 |     ## 5 D02    broadleaf       P0003     JUNI                    85.9
210 |     ## 6 D02    broadleaf       L0003     JUNI                    85.9
211 | 
212 | ``` r
213 | plsr_data <- data.frame(sample_info2,Spectra)
214 | rm(sample_info,sample_info2,Spectra)
215 | ```
216 | 
217 | #### Example data cleaning.
218 | 
219 | ``` r
220 | #### End user needs to do what's appropriate for their data.  This may be an iterative process.
221 | # Keep only complete rows of inVar and spec data before fitting
222 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 
223 |                                                   c(inVar,paste0("Wave_",wv))]),]
224 | ```
225 | 
226 | #### Prepare PLSR model
227 | 
228 | ``` r
229 | print("**** Applying PLSR model to estimate LMA from spectral observations ****")
230 | ```
231 | 
232 |     ## [1] "**** Applying PLSR model to estimate LMA from spectral observations ****"
233 | 
234 | ``` r
235 | # setup model
236 | dims <- dim(LeafLMA.plsr.coeffs)
237 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,]
238 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],])
239 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs")
240 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2])
241 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 
242 |                                                    paste0("Wave_",seq(Start.wave,End.wave,1)))])
243 | ```
244 | 
245 | #### Apply PLSR model
246 | 
247 | ``` r
248 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2]
249 | leafLMA <- plsr_pred[,1]^2  # convert to standard LMA units from sqrt(LMA)
250 | names(leafLMA) <- "PLSR_LMA_gDW_m2"
251 | 
252 | # organize output
253 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 
254 |                                                       paste0("Wave_",seq(Start.wave,End.wave,1)))],
255 |                                    PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar])
256 | head(LeafLMA.PLSR.dataset)
257 | ```
258 | 
259 |     ##   Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_LMA_gDW_m2
260 |     ## 1    D02       broadleaf     P0001              JUNI      72.87        96.26243
261 |     ## 2    D02       broadleaf     L0001              JUNI      72.87        90.09453
262 |     ## 3    D02       broadleaf     P0002              JUNI      60.77        77.16475
263 |     ## 4    D02       broadleaf     L0002              JUNI      60.77        60.99039
264 |     ## 5    D02       broadleaf     P0003              JUNI      85.92       101.22709
265 |     ## 6    D02       broadleaf     L0003              JUNI      85.92        97.13018
266 |     ##   PLSR_Residuals
267 |     ## 1     23.3924343
268 |     ## 2     17.2245326
269 |     ## 3     16.3947533
270 |     ## 4      0.2203913
271 |     ## 5     15.3070857
272 |     ## 6     11.2101840
273 | 
274 | #### Generate PLSR uncertainty estimates
275 | 
276 | ``` r
277 | print("**** Generate PLSR uncertainty estimates ****")
278 | ```
279 | 
280 |     ## [1] "**** Generate PLSR uncertainty estimates ****"
281 | 
282 | ``` r
283 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]])
284 | jk_coef <- t(jk_coef)
285 | head(jk_coef)[,1:6]
286 | ```
287 | 
288 |     ##               [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
289 |     ## Wave_500 1.0005875 0.9952840 0.5652908 0.9793160 1.1052207 0.9370473
290 |     ## Wave_501 0.9584235 0.9631434 0.5230544 0.9330803 1.0477469 0.9042780
291 |     ## Wave_502 0.8960202 0.9065954 0.4597413 0.8710298 0.9658130 0.8628370
292 |     ## Wave_503 0.8722135 0.8936197 0.4420696 0.8456098 0.9272967 0.8513741
293 |     ## Wave_504 0.8452831 0.8644923 0.4159567 0.8110004 0.8903192 0.8320347
294 |     ## Wave_505 0.8240743 0.8378399 0.3902871 0.7829891 0.8570048 0.8150339
295 | 
296 | ``` r
297 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2])
298 | head(jk_int)[,1:6]
299 | ```
300 | 
301 |     ## [1] 7.787098 7.959443 8.015161 8.018586 7.658080 7.998432
302 | 
303 | ``` r
304 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 
305 |                                                     byrow=TRUE, ncol=length(jk_int))
306 | jk_pred <- jk_pred^2
307 | head(jk_pred)[,1:6]
308 | ```
309 | 
310 |     ##       [,1]      [,2]      [,3]     [,4]      [,5]      [,6]
311 |     ## 1 94.28721  96.77712  96.44452 95.11992  96.72830  95.33877
312 |     ## 2 90.36051  90.57120  90.77562 89.77821  90.24826  89.61806
313 |     ## 3 75.71088  77.91861  76.42730 76.11473  77.67179  76.68756
314 |     ## 4 61.37001  61.30963  60.56606 60.72330  61.63712  60.69649
315 |     ## 5 99.24456 101.75948 101.22916 99.96305 101.70397 100.16758
316 |     ## 6 97.40414  97.65463  97.52687 97.00817  97.33677  96.08535
317 | 
318 | ``` r
319 | dim(jk_pred)
320 | ```
321 | 
322 |     ## [1] 6312 1000
323 | 
324 | ``` r
325 | interval <- c(0.025,0.975)
326 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 
327 |                        probs=c(interval[1], interval[2]))
328 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd)
329 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals)
330 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
331 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,]
332 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,]
333 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot
334 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot
335 | head(LeafLMA.PLSR.dataset)
336 | ```
337 | 
338 |     ##   Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_LMA_gDW_m2
339 |     ## 1    D02       broadleaf     P0001              JUNI      72.87        96.26243
340 |     ## 2    D02       broadleaf     L0001              JUNI      72.87        90.09453
341 |     ## 3    D02       broadleaf     P0002              JUNI      60.77        77.16475
342 |     ## 4    D02       broadleaf     L0002              JUNI      60.77        60.99039
343 |     ## 5    D02       broadleaf     P0003              JUNI      85.92       101.22709
344 |     ## 6    D02       broadleaf     L0003              JUNI      85.92        97.13018
345 |     ##   PLSR_Residuals      LCI       UCI      LPI      UPI
346 |     ## 1     23.3924343 93.95423  99.03625 71.30476 121.2201
347 |     ## 2     17.2245326 88.81329  92.00078 65.21071 114.9784
348 |     ## 3     16.3947533 74.79509  79.85715 52.19722 102.1323
349 |     ## 4      0.2203913 59.80058  62.29402 36.12678  85.8540
350 |     ## 5     15.3070857 98.86570 103.97701 76.26586 126.1883
351 |     ## 6     11.2101840 95.50843  99.66865 72.20971 122.0507
352 | 
353 | #### Generate PLSR estimated LMA observed vs predicted plot
354 | 
355 | ``` r
356 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 
357 |                                               inVar = inVar, 
358 |                                               residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 
359 |                                               range="full")
360 | RMSEP <- rmsep_percrmsep$rmse
361 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
362 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~
363 |                          LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2)
364 | expr <- vector("expression", 3)
365 | expr[[1]] <- bquote(R^2==.(r2))
366 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
367 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
368 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI))
369 | par(mfrow=c(1,1), mar=c(4,5.3,1,0.4), oma=c(0.1, 0.1, 0.1, 0.2))
370 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
371 |                 li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 
372 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
373 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
374 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
375 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
376 |                 cex.axis=1.5,cex.lab=1.8)
377 | abline(0,1,lty=2,lw=2)
378 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 
379 |                 li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 
380 |                 lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
381 |                 err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
382 |                 cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
383 |                 ylab=paste0("Observed ", paste(inVar), " (units)"),
384 |                 cex.axis=1.5,cex.lab=1.8, add=T)
385 | legend("topleft", legend=expr, bty="n", cex=1.5)
386 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
387 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
388 | box(lwd=2.2)
389 | ```
390 | 
391 | ![](sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
392 | 
393 | ``` r
394 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
395 |          height=2800, width=3200,  res=340)
396 | ```
397 | 
398 |     ## quartz_off_screen 
399 |     ##                 3
400 | 
401 | ``` r
402 | dev.off();
403 | ```
404 | 
405 |     ## quartz_off_screen 
406 |     ##                 2
407 | 
408 | ``` r
409 | print(paste("Output directory: ", outdir))
410 | ```
411 | 
412 |     ## [1] "Output directory:  /var/folders/tq/tydmhlwn1bdf_0pmpcq70r2c0000gn/T//RtmpyEgmZw"
413 | 
414 | ``` r
415 | # Observed versus predicted
416 | write.csv(LeafLMA.PLSR.dataset,file=file.path(outdir,
417 |                                          paste0(inVar,'_PLSR_Estimates.csv')),
418 |           row.names=FALSE)
419 | ```
420 | 
421 | ### Confirm files were written to temp space
422 | 
423 | ``` r
424 | print("**** PLSR output files: ")
425 | ```
426 | 
427 |     ## [1] "**** PLSR output files: "
428 | 
429 | ``` r
430 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
431 | ```
432 | 
433 |     ## [1] "LMA_gDW_m2_PLSR_Estimates.csv"             
434 |     ## [2] "LMA_gDW_m2_PLSR_Validation_Scatterplot.png"
435 | 


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex2.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. This example illustrates running the PLSR permutation by group
  3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document: default
  7 |   html_notebook: default
  8 |   github_document: default
  9 |   html_document:
 10 |     df_print: paged
 11 |     keep_md: true
 12 |   rmarkdown: html_vignette
 13 | vignette: >
 14 |   %\VignetteIndexEntry{Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. This example illustrates running the PLSR permutation by group}
 15 |   %\usepackage[utf8]{inputenc}
 16 |   %\VignetteEngine{knitr::knitr}
 17 | ---
 18 | 
 19 | ```{r setup, include=FALSE, echo=FALSE}
 20 | knitr::opts_chunk$set(echo = TRUE)
 21 | ```
 22 | 
 23 | ### Overview
 24 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an
 25 | internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 
 26 | and fit a plsr model for leaf nitrogen content (Narea, g/m2)
 27 | 
 28 | ### Getting Started
 29 | ### Load libraries
 30 | ```{r, eval=TRUE, echo=TRUE}
 31 | list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 32 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 33 | ```
 34 | 
 35 | ### Setup other functions and options
 36 | ```{r, echo=TRUE}
 37 | ### Setup options
 38 | 
 39 | # Script options
 40 | pls::pls.options(plsralg = "oscorespls")
 41 | pls::pls.options("plsralg")
 42 | 
 43 | # Default par options
 44 | opar <- par(no.readonly = T)
 45 | 
 46 | # Specify output directory, output_dir 
 47 | # Options: 
 48 | # tempdir - use a OS-specified temporary directory 
 49 | # user defined PATH - e.g. "~/scratch/PLSR"
 50 | output_dir <- "tempdir"
 51 | ```
 52 | 
 53 | ### Load internal Ely et al 2019 dataset
 54 | ```{r, echo=TRUE}
 55 | data("ely_plsr_data")
 56 | head(ely_plsr_data)[,1:8]
 57 | 
 58 | # What is the target variable?
 59 | inVar <- "N_g_m2"
 60 | ```
 61 | 
 62 | ### Set working directory (scratch space)
 63 | ```{r, echo=FALSE}
 64 | if (output_dir=="tempdir") {
 65 |   outdir <- tempdir()
 66 | } else {
 67 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 68 |   outdir <- file.path(path.expand(output_dir))
 69 | }
 70 | setwd(outdir) # set working directory
 71 | getwd()  # check wd
 72 | ```
 73 | 
 74 | ### Full PLSR dataset
 75 | ```{r, echo=TRUE}
 76 | Start.wave <- 500
 77 | End.wave <- 2400
 78 | wv <- seq(Start.wave,End.wave,1)
 79 | plsr_data <- ely_plsr_data
 80 | head(plsr_data)[,1:6]
 81 | ```
 82 | ### Create cal/val datasets
 83 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
 84 | ### Create cal/val datasets
 85 | ## Make a stratified random sampling in the strata USDA_Species_Code and Domain
 86 | 
 87 | method <- "base" #base/dplyr
 88 | # base R - a bit slow
 89 | # dplyr - much faster
 90 | split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
 91 |                                               split_seed=23452135, prop=0.7, 
 92 |                                               group_variables="Species_Code")
 93 | names(split_data)
 94 | cal.plsr.data <- split_data$cal_data
 95 | head(cal.plsr.data)[1:8]
 96 | val.plsr.data <- split_data$val_data
 97 | head(val.plsr.data)[1:8]
 98 | rm(split_data)
 99 | 
100 | # Datasets:
101 | print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
102 | print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
103 | 
104 | cal_hist_plot <- ggplot(data = cal.plsr.data, 
105 |                         aes(x = cal.plsr.data[,paste0(inVar)])) + 
106 |   geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 
107 |   labs(title=paste0("Calibration Histogram for ",inVar), x = paste0(inVar), 
108 |        y = "Count")
109 | val_hist_plot <- ggplot(data = val.plsr.data, 
110 |                         aes(x = val.plsr.data[,paste0(inVar)])) +
111 |   geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 
112 |   labs(title=paste0("Validation Histogram for ",inVar), x = paste0(inVar), 
113 |        y = "Count")
114 | histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
115 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
116 |        plot = histograms, 
117 |        device="png", width = 30, 
118 |        height = 12, units = "cm",
119 |        dpi = 300)
120 | # output cal/val data
121 | write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
122 |           row.names=FALSE)
123 | write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
124 |           row.names=FALSE)
125 | ```
126 | 
127 | ### Create calibration and validation PLSR datasets
128 | ```{r, echo=TRUE}
129 | ### Format PLSR data for model fitting 
130 | cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
131 | cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
132 |                             Spectra=I(cal_spec))
133 | head(cal.plsr.data)[1:5]
134 | 
135 | val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
136 | val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
137 |                             Spectra=I(val_spec))
138 | head(val.plsr.data)[1:5]
139 | ```
140 | 
141 | ### plot cal and val spectra
142 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
143 | par(mfrow=c(1,2)) # B, L, T, R
144 | spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
145 | spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
146 | 
147 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
148 |          height=2500,width=4900, res=340)
149 | dev.off();
150 | par(mfrow=c(1,1))
151 | ```
152 | 
153 | ### Use permutation to determine optimal number of components
154 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
155 | ### Use permutation to determine the optimal number of components
156 | if(grepl("Windows", sessionInfo()$running)){
157 |   pls.options(parallel = NULL)
158 | } else {
159 |   pls.options(parallel = parallel::detectCores()-1)
160 | }
161 | 
162 | method <- "firstMin" #firstPlateau, firstMin
163 | random_seed <- 1245565
164 | seg <- 50
165 | maxComps <- 16
166 | iterations <- 80
167 | prop <- 0.70
168 | nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, 
169 |                                                     method=method, maxComps=maxComps, 
170 |                                                     iterations=iterations, prop=prop, 
171 |                                                     random_seed=random_seed,
172 |                                                     group_variables="Species_Code")
173 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
174 |          height=2800, width=3400, res=340)
175 | dev.off();
176 | ```
177 | 
178 | ### Fit final model
179 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
180 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
181 |                  trace=FALSE,data=cal.plsr.data)
182 | fit <- plsr.out$fitted.values[,1,nComps]
183 | pls.options(parallel = NULL)
184 | 
185 | # External validation fit stats
186 | par(mfrow=c(1,2)) # B, L, T, R
187 | pls::RMSEP(plsr.out, newdata = val.plsr.data)
188 | plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
189 |      xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
190 | box(lwd=2.2)
191 | 
192 | pls::R2(plsr.out, newdata = val.plsr.data)
193 | plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
194 |      xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
195 | box(lwd=2.2)
196 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
197 |          height=2800, width=4800,  res=340)
198 | dev.off();
199 | par(opar)
200 | ```
201 | 
202 | ### PLSR fit observed vs. predicted plot data
203 | ```{r, fig.height = 15, fig.width = 15, echo=TRUE}  
204 | #calibration
205 | cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
206 |                               PLSR_Predicted=fit,
207 |                               PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
208 | cal.plsr.output <- cal.plsr.output %>%
209 |   mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
210 | head(cal.plsr.output)
211 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
212 | cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
213 | 
214 | val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
215 |                               PLSR_Predicted=as.vector(predict(plsr.out, 
216 |                                                                newdata = val.plsr.data, 
217 |                                                                ncomp=nComps, type="response")[,,1]))
218 | val.plsr.output <- val.plsr.output %>%
219 |   mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
220 | head(val.plsr.output)
221 | val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
222 | val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
223 | 
224 | rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
225 | cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
226 |   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
227 |                                           linetype="dashed", linewidth=1.5) + 
228 |   xlim(rng_quant[1], rng_quant[2]) + 
229 |   ylim(rng_quant[1], rng_quant[2]) +
230 |   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
231 |        y=paste0("Observed ", paste(inVar), " (units)"),
232 |        title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
233 |                                                                             cal.RMSEP))) +
234 |   theme(axis.text=element_text(size=18), legend.position="none",
235 |         axis.title=element_text(size=20, face="bold"), 
236 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
237 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
238 | 
239 | cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
240 |   geom_histogram(alpha=.5, position="identity") + 
241 |   geom_vline(xintercept = 0, color="black", 
242 |              linetype="dashed", linewidth=1) + theme_bw() + 
243 |   theme(axis.text=element_text(size=18), legend.position="none",
244 |         axis.title=element_text(size=20, face="bold"), 
245 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
246 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
247 | 
248 | rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
249 | val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
250 |   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
251 |                                           linetype="dashed", linewidth=1.5) + 
252 |   xlim(rng_quant[1], rng_quant[2]) + 
253 |   ylim(rng_quant[1], rng_quant[2]) +
254 |   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
255 |        y=paste0("Observed ", paste(inVar), " (units)"),
256 |        title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
257 |                                                                            val.RMSEP))) +
258 |   theme(axis.text=element_text(size=18), legend.position="none",
259 |         axis.title=element_text(size=20, face="bold"), 
260 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
261 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
262 | 
263 | val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
264 |   geom_histogram(alpha=.5, position="identity") + 
265 |   geom_vline(xintercept = 0, color="black", 
266 |              linetype="dashed", linewidth=1) + theme_bw() + 
267 |   theme(axis.text=element_text(size=18), legend.position="none",
268 |         axis.title=element_text(size=20, face="bold"), 
269 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
270 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
271 | 
272 | # plot cal/val side-by-side
273 | scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
274 |                              val_resid_histogram, nrow=2,ncol=2)
275 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
276 |        plot = scatterplots, device="png", 
277 |        width = 32, 
278 |        height = 30, units = "cm",
279 |        dpi = 300)
280 | ```
281 | 
282 | ### Generate Coefficient and VIP plots
283 | ```{r, fig.height = 9, fig.width = 10, echo=TRUE}
284 | vips <- spectratrait::VIP(plsr.out)[nComps,]
285 | par(mfrow=c(2,1))
286 | plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
287 |      ylab="Regression coefficients",legendpos = "bottomright",
288 |      ncomp=nComps,lwd=2)
289 | box(lwd=2.2)
290 | plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
291 | lines(seq(Start.wave,End.wave,1),vips,lwd=3)
292 | abline(h=0.8,lty=2,col="dark grey")
293 | box(lwd=2.2)
294 | dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
295 |          height=3100, width=4100, res=340)
296 | dev.off();
297 | ```
298 | 
299 | ### Bootstrap validation
300 | ```{r, echo=TRUE}
301 | if(grepl("Windows", sessionInfo()$running)){
302 |   pls.options(parallel =NULL)
303 | } else {
304 |   pls.options(parallel = parallel::detectCores()-1)
305 | }
306 | 
307 | ### PLSR bootstrap permutation uncertainty analysis
308 | iterations <- 500    # how many permutation iterations to run
309 | prop <- 0.70          # fraction of training data to keep for each iteration
310 | plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, 
311 |                                                             targetVariable=inVar,
312 |                                                             maxComps=nComps, 
313 |                                                             iterations=iterations, 
314 |                                                             prop=prop, group_variables="Species_Code", 
315 |                                                             verbose=FALSE)
316 | bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
317 | bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
318 |                                               ,nComps]
319 | rm(plsr_permutation)
320 | 
321 | # apply coefficients to left-out validation data
322 | interval <- c(0.025,0.975)
323 | Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
324 |   matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
325 |          ncol=length(bootstrap_intercept))
326 | Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
327 |                        probs=c(interval[1], interval[2]))
328 | sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
329 | sd_res <- sd(val.plsr.output$PLSR_Residuals)
330 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
331 | val.plsr.output$LCI <- Interval_Conf[1,]
332 | val.plsr.output$UCI <- Interval_Conf[2,]
333 | val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
334 | val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
335 | head(val.plsr.output)
336 | ```
337 | 
338 | ### Jackknife coefficient plot
339 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
340 | # Bootstrap regression coefficient plot
341 | spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
342 |             plot_label="Bootstrap regression coefficients",position = 'bottomleft')
343 | abline(h=0,lty=2,col="grey50")
344 | box(lwd=2.2)
345 | dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
346 |          height=2100, width=3800, res=340)
347 | dev.off();
348 | ```
349 | 
350 | ### Bootstrap validation plot
351 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE}
352 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 
353 |                                               inVar = inVar, 
354 |                                               residuals = val.plsr.output$PLSR_Residuals, 
355 |                                               range="full")
356 | RMSEP <- rmsep_percrmsep$rmse
357 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
358 | r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2)
359 | expr <- vector("expression", 3)
360 | expr[[1]] <- bquote(R^2==.(r2))
361 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
362 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
363 | rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
364 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
365 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
366 |        li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
367 |        lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
368 |        err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
369 |        cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
370 |        ylab=paste0("Observed ", paste(inVar), " (units)"),
371 |        cex.axis=1.5,cex.lab=1.8)
372 | abline(0,1,lty=2,lw=2)
373 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
374 |        li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
375 |        lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
376 |        err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
377 |        cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
378 |        ylab=paste0("Observed ", paste(inVar), " (units)"),
379 |        cex.axis=1.5,cex.lab=1.8, add=T)
380 | legend("topleft", legend=expr, bty="n", cex=1.5)
381 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
382 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
383 | box(lwd=2.2)
384 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
385 |          height=2800, width=3200,  res=340)
386 | dev.off();
387 | ```
388 | 
389 | ### Output bootstrap results
390 | ```{r, echo=TRUE}
391 | # Bootstrap Coefficients
392 | out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
393 |                            Intercept=bootstrap_intercept,t(bootstrap_coef))
394 | names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
395 | head(out.jk.coefs)[1:6]
396 | write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
397 |                                                     '_Bootstrap_PLSR_Coefficients.csv')),
398 |           row.names=FALSE)
399 | ```
400 | 
401 | ### Create core PLSR outputs
402 | ```{r, echo=TRUE}
403 | print(paste("Output directory: ", outdir))
404 | 
405 | # Observed versus predicted
406 | write.csv(cal.plsr.output,file=file.path(outdir,
407 |                                          paste0(inVar,'_Observed_PLSR_CV_Pred_',
408 |                                                 nComps,'comp.csv')),
409 |           row.names=FALSE)
410 | 
411 | # Validation data
412 | write.csv(val.plsr.output,file=file.path(outdir,
413 |                                          paste0(inVar,'_Validation_PLSR_Pred_',
414 |                                                 nComps,'comp.csv')),
415 |           row.names=FALSE)
416 | 
417 | # Model coefficients
418 | coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
419 | write.csv(coefs,file=file.path(outdir,
420 |                                paste0(inVar,'_PLSR_Coefficients_',
421 |                                       nComps,'comp.csv')),
422 |           row.names=TRUE)
423 | 
424 | # PLSR VIP
425 | write.csv(vips,file=file.path(outdir,
426 |                               paste0(inVar,'_PLSR_VIPs_',
427 |                                      nComps,'comp.csv')))
428 | ```
429 | 
430 | ### Confirm files were written to temp space
431 | ```{r, echo=TRUE}
432 | print("**** PLSR output files: ")
433 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
434 | ```
435 | 


--------------------------------------------------------------------------------
/vignettes/ely_etal_ex1.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory
  3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document: default
  7 |   html_document:
  8 |     df_print: paged
  9 |     keep_md: true
 10 |   github_document: default
 11 |   html_notebook: default
 12 |   rmarkdown: html_vignette
 13 | vignette: >
 14 |   %\VignetteIndexEntry{Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory}
 15 |   %\usepackage[utf8]{inputenc}
 16 |   %\VignetteEngine{knitr::knitr}
 17 | ---
 18 | 
 19 | ```{r setup, include=FALSE, echo=FALSE}
 20 | knitr::opts_chunk$set(echo = TRUE)
 21 | ```
 22 | 
 23 | ### Overview
 24 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an
 25 | internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 
 26 | and fit a plsr model for leaf nitrogen content (Narea, g/m2)
 27 | 
 28 | ### Getting Started
 29 | ### Load libraries
 30 | ```{r, eval=TRUE, echo=TRUE}
 31 | list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait")
 32 | invisible(lapply(list.of.packages, library, character.only = TRUE))
 33 | ```
 34 | 
 35 | ### Setup other functions and options
 36 | ```{r, echo=TRUE}
 37 | ### Setup options
 38 | 
 39 | # Script options
 40 | pls::pls.options(plsralg = "oscorespls")
 41 | pls::pls.options("plsralg")
 42 | 
 43 | # Default par options
 44 | opar <- par(no.readonly = T)
 45 | 
 46 | # Specify output directory, output_dir 
 47 | # Options: 
 48 | # tempdir - use a OS-specified temporary directory 
 49 | # user defined PATH - e.g. "~/scratch/PLSR"
 50 | output_dir <- "tempdir"
 51 | ```
 52 | 
 53 | ### Load internal Ely et al 2019 dataset
 54 | ```{r, echo=TRUE}
 55 | data("ely_plsr_data")
 56 | head(ely_plsr_data)[,1:8]
 57 | 
 58 | # What is the target variable?
 59 | inVar <- "N_g_m2"
 60 | ```
 61 | 
 62 | ### Set working directory (scratch space)
 63 | ```{r, echo=FALSE}
 64 | if (output_dir=="tempdir") {
 65 |   outdir <- tempdir()
 66 | } else {
 67 |   if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE)
 68 |   outdir <- file.path(path.expand(output_dir))
 69 | }
 70 | setwd(outdir) # set working directory
 71 | getwd()  # check wd
 72 | ```
 73 | 
 74 | ### Full PLSR dataset
 75 | ```{r, echo=TRUE}
 76 | Start.wave <- 500
 77 | End.wave <- 2400
 78 | wv <- seq(Start.wave,End.wave,1)
 79 | plsr_data <- ely_plsr_data
 80 | head(plsr_data)[,1:6]
 81 | ```
 82 | ### Create cal/val datasets
 83 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
 84 | ### Create cal/val datasets
 85 | ## Make a stratified random sampling in the strata USDA_Species_Code and Domain
 86 | 
 87 | method <- "base" #base/dplyr
 88 | # base R - a bit slow
 89 | # dplyr - much faster
 90 | split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 
 91 |                                               split_seed=23452135, prop=0.7, 
 92 |                                               group_variables="Species_Code")
 93 | names(split_data)
 94 | cal.plsr.data <- split_data$cal_data
 95 | head(cal.plsr.data)[1:8]
 96 | val.plsr.data <- split_data$val_data
 97 | head(val.plsr.data)[1:8]
 98 | rm(split_data)
 99 | 
100 | # Datasets:
101 | print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep=""))
102 | print(paste("Val observations: ",dim(val.plsr.data)[1],sep=""))
103 | 
104 | cal_hist_plot <- ggplot(data = cal.plsr.data, 
105 |                         aes(x = cal.plsr.data[,paste0(inVar)])) + 
106 |   geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 
107 |   labs(title=paste0("Calibration Histogram for ",inVar), x = paste0(inVar), 
108 |        y = "Count")
109 | val_hist_plot <- ggplot(data = val.plsr.data, 
110 |                         aes(x = val.plsr.data[,paste0(inVar)])) +
111 |   geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 
112 |   labs(title=paste0("Validation Histogram for ",inVar), x = paste0(inVar), 
113 |        y = "Count")
114 | histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2)
115 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 
116 |        plot = histograms, 
117 |        device="png", width = 30, 
118 |        height = 12, units = "cm",
119 |        dpi = 300)
120 | # output cal/val data
121 | write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')),
122 |           row.names=FALSE)
123 | write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')),
124 |           row.names=FALSE)
125 | ```
126 | 
127 | ### Create calibration and validation PLSR datasets
128 | ```{r, echo=TRUE}
129 | ### Format PLSR data for model fitting 
130 | cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))])
131 | cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))],
132 |                             Spectra=I(cal_spec))
133 | head(cal.plsr.data)[1:5]
134 | 
135 | val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))])
136 | val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))],
137 |                             Spectra=I(val_spec))
138 | head(val.plsr.data)[1:5]
139 | ```
140 | 
141 | ### plot cal and val spectra
142 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
143 | par(mfrow=c(1,2)) # B, L, T, R
144 | spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration")
145 | spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation")
146 | 
147 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 
148 |          height=2500,width=4900, res=340)
149 | dev.off();
150 | par(mfrow=c(1,1))
151 | ```
152 | 
153 | ### Use permutation to determine optimal number of components
154 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
155 | ### Use permutation to determine the optimal number of components
156 | if(grepl("Windows", sessionInfo()$running)){
157 |   pls.options(parallel = NULL)
158 | } else {
159 |   pls.options(parallel = parallel::detectCores()-1)
160 | }
161 | 
162 | method <- "pls" #pls, firstPlateau, firstMin
163 | random_seed <- 1245565
164 | seg <- 50
165 | maxComps <- 16
166 | iterations <- 80
167 | prop <- 0.70
168 | if (method=="pls") {
169 |   nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
170 |                                                   method=method, 
171 |                                                   maxComps=maxComps, seg=seg, 
172 |                                                   random_seed=random_seed)
173 |   print(paste0("*** Optimal number of components: ", nComps))
174 | } else {
175 |   nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar,
176 |                                                   method=method,
177 |                                                   maxComps=maxComps, iterations=iterations, 
178 |                                                   seg=seg, prop=prop, 
179 |                                                   random_seed=random_seed)
180 | }
181 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 
182 |          height=2800, width=3400,  res=340)
183 | dev.off();
184 | ```
185 | 
186 | ### Fit final model
187 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE}
188 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO",
189 |                  trace=FALSE,data=cal.plsr.data)
190 | fit <- plsr.out$fitted.values[,1,nComps]
191 | pls.options(parallel = NULL)
192 | 
193 | # External validation fit stats
194 | par(mfrow=c(1,2)) # B, L, T, R
195 | pls::RMSEP(plsr.out, newdata = val.plsr.data)
196 | plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP",
197 |      xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2)
198 | box(lwd=2.2)
199 | 
200 | pls::R2(plsr.out, newdata = val.plsr.data)
201 | plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2",
202 |      xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2)
203 | box(lwd=2.2)
204 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 
205 |          height=2800, width=4800,  res=340)
206 | dev.off();
207 | par(opar)
208 | ```
209 | 
210 | ### PLSR fit observed vs. predicted plot data
211 | ```{r, fig.height = 15, fig.width = 15, echo=TRUE}  
212 | #calibration
213 | cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")],
214 |                               PLSR_Predicted=fit,
215 |                               PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps]))
216 | cal.plsr.output <- cal.plsr.output %>%
217 |   mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar))
218 | head(cal.plsr.output)
219 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2)
220 | cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2)
221 | 
222 | val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")],
223 |                               PLSR_Predicted=as.vector(predict(plsr.out, 
224 |                                                                newdata = val.plsr.data, 
225 |                                                                ncomp=nComps, type="response")[,,1]))
226 | val.plsr.output <- val.plsr.output %>%
227 |   mutate(PLSR_Residuals = PLSR_Predicted-get(inVar))
228 | head(val.plsr.output)
229 | val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2)
230 | val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2)
231 | 
232 | rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999))
233 | cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 
234 |   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
235 |                                           linetype="dashed", linewidth=1.5) + 
236 |   xlim(rng_quant[1], rng_quant[2]) + 
237 |   ylim(rng_quant[1], rng_quant[2]) +
238 |   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
239 |        y=paste0("Observed ", paste(inVar), " (units)"),
240 |        title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 
241 |                                                                             cal.RMSEP))) +
242 |   theme(axis.text=element_text(size=18), legend.position="none",
243 |         axis.title=element_text(size=20, face="bold"), 
244 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
245 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
246 | 
247 | cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) +
248 |   geom_histogram(alpha=.5, position="identity") + 
249 |   geom_vline(xintercept = 0, color="black", 
250 |              linetype="dashed", linewidth=1) + theme_bw() + 
251 |   theme(axis.text=element_text(size=18), legend.position="none",
252 |         axis.title=element_text(size=20, face="bold"), 
253 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
254 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
255 | 
256 | rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999))
257 | val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 
258 |   theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 
259 |                                           linetype="dashed", linewidth=1.5) + 
260 |   xlim(rng_quant[1], rng_quant[2]) + 
261 |   ylim(rng_quant[1], rng_quant[2]) +
262 |   labs(x=paste0("Predicted ", paste(inVar), " (units)"),
263 |        y=paste0("Observed ", paste(inVar), " (units)"),
264 |        title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 
265 |                                                                            val.RMSEP))) +
266 |   theme(axis.text=element_text(size=18), legend.position="none",
267 |         axis.title=element_text(size=20, face="bold"), 
268 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
269 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
270 | 
271 | val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) +
272 |   geom_histogram(alpha=.5, position="identity") + 
273 |   geom_vline(xintercept = 0, color="black", 
274 |              linetype="dashed", linewidth=1) + theme_bw() + 
275 |   theme(axis.text=element_text(size=18), legend.position="none",
276 |         axis.title=element_text(size=20, face="bold"), 
277 |         axis.text.x = element_text(angle = 0,vjust = 0.5),
278 |         panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5))
279 | 
280 | # plot cal/val side-by-side
281 | scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 
282 |                              val_resid_histogram, nrow=2,ncol=2)
283 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 
284 |        plot = scatterplots, device="png", 
285 |        width = 32, 
286 |        height = 30, units = "cm",
287 |        dpi = 300)
288 | ```
289 | 
290 | ### Generate Coefficient and VIP plots
291 | ```{r, fig.height = 9, fig.width = 10, echo=TRUE}
292 | vips <- spectratrait::VIP(plsr.out)[nComps,]
293 | par(mfrow=c(2,1))
294 | plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)",
295 |      ylab="Regression coefficients",legendpos = "bottomright",
296 |      ncomp=nComps,lwd=2)
297 | box(lwd=2.2)
298 | plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01)
299 | lines(seq(Start.wave,End.wave,1),vips,lwd=3)
300 | abline(h=0.8,lty=2,col="dark grey")
301 | box(lwd=2.2)
302 | dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 
303 |          height=3100, width=4100, res=340)
304 | dev.off();
305 | ```
306 | 
307 | ### Bootstrap validation
308 | ```{r, echo=TRUE}
309 | if(grepl("Windows", sessionInfo()$running)){
310 |   pls.options(parallel =NULL)
311 | } else {
312 |   pls.options(parallel = parallel::detectCores()-1)
313 | }
314 | 
315 | ### PLSR bootstrap permutation uncertainty analysis
316 | iterations <- 500    # how many permutation iterations to run
317 | prop <- 0.70          # fraction of training data to keep for each iteration
318 | plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar,
319 |                                                   maxComps=nComps, 
320 |                                                   iterations=iterations, prop=prop,
321 |                                                   verbose = FALSE)
322 | bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps]
323 | bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]),
324 |                                               ,nComps]
325 | rm(plsr_permutation)
326 | 
327 | # apply coefficients to left-out validation data
328 | interval <- c(0.025,0.975)
329 | Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 
330 |   matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 
331 |          ncol=length(bootstrap_intercept))
332 | Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 
333 |                        probs=c(interval[1], interval[2]))
334 | sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd)
335 | sd_res <- sd(val.plsr.output$PLSR_Residuals)
336 | sd_tot <- sqrt(sd_mean^2+sd_res^2)
337 | val.plsr.output$LCI <- Interval_Conf[1,]
338 | val.plsr.output$UCI <- Interval_Conf[2,]
339 | val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot
340 | val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot
341 | head(val.plsr.output)
342 | ```
343 | 
344 | ### Jackknife coefficient plot
345 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE}
346 | # Bootstrap regression coefficient plot
347 | spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 
348 |             plot_label="Bootstrap regression coefficients",position = 'bottomleft')
349 | abline(h=0,lty=2,col="grey50")
350 | box(lwd=2.2)
351 | dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 
352 |          height=2100, width=3800, res=340)
353 | dev.off();
354 | ```
355 | 
356 | ### Bootstrap validation plot
357 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE}
358 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 
359 |                                               inVar = inVar, 
360 |                                               residuals = val.plsr.output$PLSR_Residuals, 
361 |                                               range="full")
362 | RMSEP <- rmsep_percrmsep$rmse
363 | perc_RMSEP <- rmsep_percrmsep$perc_rmse
364 | r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2)
365 | expr <- vector("expression", 3)
366 | expr[[1]] <- bquote(R^2==.(r2))
367 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2)))
368 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2)))
369 | rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI))
370 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2))
371 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
372 |        li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 
373 |        lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
374 |        err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80",
375 |        cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
376 |        ylab=paste0("Observed ", paste(inVar), " (units)"),
377 |        cex.axis=1.5,cex.lab=1.8)
378 | abline(0,1,lty=2,lw=2)
379 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 
380 |        li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 
381 |        lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 
382 |        err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black",
383 |        cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"),
384 |        ylab=paste0("Observed ", paste(inVar), " (units)"),
385 |        cex.axis=1.5,cex.lab=1.8, add=T)
386 | legend("topleft", legend=expr, bty="n", cex=1.5)
387 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 
388 |        lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5)
389 | box(lwd=2.2)
390 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 
391 |          height=2800, width=3200,  res=340)
392 | dev.off();
393 | ```
394 | 
395 | ### Output bootstrap results
396 | ```{r, echo=TRUE}
397 | # Bootstrap Coefficients
398 | out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1),
399 |                            Intercept=bootstrap_intercept,t(bootstrap_coef))
400 | names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv))
401 | head(out.jk.coefs)[1:6]
402 | write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar,
403 |                                                     '_Bootstrap_PLSR_Coefficients.csv')),
404 |           row.names=FALSE)
405 | ```
406 | 
407 | ### Create core PLSR outputs
408 | ```{r, echo=TRUE}
409 | print(paste("Output directory: ", outdir))
410 | 
411 | # Observed versus predicted
412 | write.csv(cal.plsr.output,file=file.path(outdir,
413 |                                          paste0(inVar,'_Observed_PLSR_CV_Pred_',
414 |                                                 nComps,'comp.csv')),
415 |           row.names=FALSE)
416 | 
417 | # Validation data
418 | write.csv(val.plsr.output,file=file.path(outdir,
419 |                                          paste0(inVar,'_Validation_PLSR_Pred_',
420 |                                                 nComps,'comp.csv')),
421 |           row.names=FALSE)
422 | 
423 | # Model coefficients
424 | coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE)
425 | write.csv(coefs,file=file.path(outdir,
426 |                                paste0(inVar,'_PLSR_Coefficients_',
427 |                                       nComps,'comp.csv')),
428 |           row.names=TRUE)
429 | 
430 | # PLSR VIP
431 | write.csv(vips,file=file.path(outdir,
432 |                               paste0(inVar,'_PLSR_VIPs_',
433 |                                      nComps,'comp.csv')))
434 | ```
435 | 
436 | ### Confirm files were written to temp space
437 | ```{r, echo=TRUE}
438 | print("**** PLSR output files: ")
439 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))])
440 | ```
441 | 


--------------------------------------------------------------------------------