├── .Rbuildignore ├── tests ├── testthat.R └── testthat │ ├── test.get_ecosis_data.R │ ├── test.simple_plsr.R │ ├── test.create_data_split.R │ └── test.optimal_components.R ├── data └── ely_plsr_data.rda ├── spectratrait_1.2.6.pdf ├── vignettes ├── kit_sla_ex3.pdf ├── ely_etal_ex1.pdf ├── ely_etal_ex2.pdf ├── neon_lma_ex4.pdf ├── reseco_leafN_ex6.pdf ├── reseco_leafN_ex7.pdf ├── reseco_lma_ex8.pdf ├── neon_canopy_leafN_ex5.pdf ├── neon_canopy_lma_ex10.pdf ├── sserbin2019_plsr_ex9.pdf ├── kit_sla_ex3_files │ └── figure-gfm │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-9-1.png │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ └── unnamed-chunk-16-1.png ├── ely_etal_ex1_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-8-1.png │ │ └── unnamed-chunk-9-1.png ├── ely_etal_ex2_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-8-1.png │ │ └── unnamed-chunk-9-1.png ├── neon_lma_ex4_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-8-1.png │ │ └── unnamed-chunk-9-1.png ├── reseco_leafN_ex6_files │ └── figure-gfm │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-9-1.png │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ └── unnamed-chunk-16-1.png ├── reseco_leafN_ex7_files │ └── figure-gfm │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-9-1.png │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ └── unnamed-chunk-16-1.png ├── reseco_lma_ex8_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-17-1.png │ │ ├── unnamed-chunk-18-1.png │ │ └── unnamed-chunk-7-1.png ├── neon_canopy_leafN_ex5_files │ └── figure-gfm │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-9-1.png │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ └── unnamed-chunk-16-1.png ├── neon_canopy_lma_ex10_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-16-1.png │ │ ├── unnamed-chunk-7-1.png │ │ └── unnamed-chunk-9-1.png ├── sserbin2019_plsr_ex9_files │ └── figure-gfm │ │ └── unnamed-chunk-11-1.png ├── sserbin2019_plsr_ex9.Rmd ├── sserbin2019_plsr_ex9.md ├── ely_etal_ex2.Rmd └── ely_etal_ex1.Rmd ├── R ├── plsr_data.R ├── get_ecosis_data.R ├── f.coef.valid.R ├── vip.R ├── f.plot.coef.R ├── f.plot.spec.R ├── utils.R ├── create_data_split.R ├── pls_permutation.R └── find_optimal_components.R ├── man ├── grapes-notin-grapes.Rd ├── testForPackage.Rd ├── VIP.Rd ├── VIPjh.Rd ├── ely_plsr_data.Rd ├── source_GitHubData.Rd ├── f.coef.valid.Rd ├── get_ecosis_data.Rd ├── f.plot.coef.Rd ├── percent_rmse.Rd ├── f.plot.spec.Rd ├── create_data_split.Rd ├── pls_permutation.Rd ├── find_optimal_components.Rd ├── find_optimal_comp_by_groups.Rd └── pls_permutation_by_groups.Rd ├── inst └── scripts │ ├── install_dependencies.R │ ├── pull_data_from_ecosis.R │ ├── apply_sserbin2019_lma_plsr_to_ely_example.R │ ├── apply_sserbin2019_lma_plsr_to_neon_example.R │ ├── apply_sserbin2014_leafN_plsr_to_ext_data_example.R │ └── simple_spectra-trait_plsr_example.R ├── .gitignore ├── NAMESPACE ├── data-raw └── plsr_data.R ├── .github └── workflows │ ├── run_ecosis_pull_example.yaml │ ├── check-r-release.yaml │ ├── run_plsr_example_auto.yaml │ ├── ci-run_plsr_example.yaml │ ├── ci-weekly.yaml │ └── check-os.yaml ├── DESCRIPTION └── README.md /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw$ 2 | ^doc$ 3 | ^Meta$ 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | test_check("spectratrait") -------------------------------------------------------------------------------- /data/ely_plsr_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/data/ely_plsr_data.rda -------------------------------------------------------------------------------- /spectratrait_1.2.6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/spectratrait_1.2.6.pdf -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3.pdf -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1.pdf -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2.pdf -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4.pdf -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6.pdf -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7.pdf -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8.pdf -------------------------------------------------------------------------------- /R/plsr_data.R: -------------------------------------------------------------------------------- 1 | #' Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061 2 | "ely_plsr_data" -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5.pdf -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10.pdf -------------------------------------------------------------------------------- /vignettes/sserbin2019_plsr_ex9.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/sserbin2019_plsr_ex9.pdf -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex1_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/ely_etal_ex2_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/kit_sla_ex3_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_lma_ex4_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_lma_ex8_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex6_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/reseco_leafN_ex7_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_lma_ex10_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /vignettes/sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plantphys/spectratrait/HEAD/vignettes/neon_canopy_leafN_ex5_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /tests/testthat/test.get_ecosis_data.R: -------------------------------------------------------------------------------- 1 | context("Grabbing data via the EcoSIS API works") 2 | 3 | test_that("Downloading data from EcoSIS doesnt throw an error", { 4 | ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9" 5 | dat_raw <- get_ecosis_data(ecosis_id = ecosis_id) 6 | expect_true(!is.null(dat_raw)) 7 | }) -------------------------------------------------------------------------------- /man/grapes-notin-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{\%notin\%} 4 | \alias{\%notin\%} 5 | \title{Not \%in\% function} 6 | \usage{ 7 | x \%notin\% table 8 | } 9 | \arguments{ 10 | \item{x}{initial list} 11 | 12 | \item{table}{list to check against} 13 | } 14 | \description{ 15 | Not \%in\% function 16 | } 17 | -------------------------------------------------------------------------------- /man/testForPackage.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{testForPackage} 4 | \alias{testForPackage} 5 | \title{Function to check for installed package} 6 | \usage{ 7 | testForPackage(pkg) 8 | } 9 | \arguments{ 10 | \item{pkg}{name of package to check if installed 11 | not presently used} 12 | } 13 | \description{ 14 | Function to check for installed package 15 | } 16 | -------------------------------------------------------------------------------- /man/VIP.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vip.R 3 | \name{VIP} 4 | \alias{VIP} 5 | \title{VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix.} 6 | \usage{ 7 | VIP(object) 8 | } 9 | \arguments{ 10 | \item{object}{fitted pls::plsr object} 11 | } 12 | \description{ 13 | VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix. 14 | } 15 | -------------------------------------------------------------------------------- /man/VIPjh.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vip.R 3 | \name{VIPjh} 4 | \alias{VIPjh} 5 | \title{VIPjh returns the VIP of variable j with h components} 6 | \usage{ 7 | VIPjh(object, j, h) 8 | } 9 | \arguments{ 10 | \item{object}{fitted pls::plsr object} 11 | 12 | \item{j}{which variable in the fitted pls::plsr object} 13 | 14 | \item{h}{the number of components in the fitted pls::plsr object to calculate the VIP} 15 | } 16 | \description{ 17 | VIPjh returns the VIP of variable j with h components 18 | } 19 | -------------------------------------------------------------------------------- /man/ely_plsr_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plsr_data.R 3 | \docType{data} 4 | \name{ely_plsr_data} 5 | \alias{ely_plsr_data} 6 | \title{Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061} 7 | \format{ 8 | An object of class \code{data.frame} with 178 rows and 1908 columns. 9 | } 10 | \usage{ 11 | ely_plsr_data 12 | } 13 | \description{ 14 | Ely et al (2019) example leaf-level PLSR dataset. DOI: https://doi.org/10.1093/jxb/erz061 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/source_GitHubData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{source_GitHubData} 4 | \alias{source_GitHubData} 5 | \title{Function to source text data from GitHub} 6 | \usage{ 7 | source_GitHubData(url, sep = ",", header = TRUE) 8 | } 9 | \arguments{ 10 | \item{url}{http/https URL to the github dataset} 11 | 12 | \item{sep}{dataset file delimiter} 13 | 14 | \item{header}{TRUE/FALSE does the file have a column header?} 15 | } 16 | \description{ 17 | Function to source text data from GitHub 18 | } 19 | \author{ 20 | gist.github.com/christophergandrud/4466237 21 | } 22 | -------------------------------------------------------------------------------- /man/f.coef.valid.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/f.coef.valid.R 3 | \name{f.coef.valid} 4 | \alias{f.coef.valid} 5 | \title{f.coef.valid} 6 | \usage{ 7 | f.coef.valid(plsr.out, data_plsr, ncomp, inVar) 8 | } 9 | \arguments{ 10 | \item{plsr.out}{plsr model obtained with jaccknife = TRUE} 11 | 12 | \item{data_plsr}{data used for the plsr model with Spectra the matrix of spectra} 13 | 14 | \item{ncomp}{number of selection components} 15 | 16 | \item{inVar}{Name of the PLSR model response variable} 17 | } 18 | \value{ 19 | B returns the intercept and the coefficients of the jackknife or bootstrap validation 20 | } 21 | \description{ 22 | f.coef.valid 23 | } 24 | \author{ 25 | Julien Lamour 26 | } 27 | -------------------------------------------------------------------------------- /man/get_ecosis_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_ecosis_data.R 3 | \name{get_ecosis_data} 4 | \alias{get_ecosis_data} 5 | \title{Function to pull data from EcoSIS using the EcoSIS API} 6 | \usage{ 7 | get_ecosis_data(ecosis_id = NULL) 8 | } 9 | \arguments{ 10 | \item{ecosis_id}{the alphanumeric EcoSIS API dataset ID} 11 | } 12 | \value{ 13 | EcoSIS spectral dataset object 14 | } 15 | \description{ 16 | Function to pull data from EcoSIS using the EcoSIS API 17 | } 18 | \examples{ 19 | \dontrun{ 20 | ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9" 21 | dat_raw <- get_ecosis_data(ecosis_id = ecosis_id) 22 | head(dat_raw) 23 | names(dat_raw)[1:40] 24 | } 25 | 26 | } 27 | \author{ 28 | Shawn P. Serbin, Alexey Shiklomanov 29 | } 30 | -------------------------------------------------------------------------------- /inst/scripts/install_dependencies.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # Install dependencies 3 | # 4 | #################################################################################################### 5 | 6 | 7 | #--------------------------------------------------------------------------------------------------# 8 | req.packages <- c("devtools","remotes","readr","RCurl","httr","pls","dplyr","reshape2","here", 9 | "plotrix","scales","ggplot2","gridExtra") 10 | new.packages <- req.packages[!(req.packages %in% installed.packages()[,"Package"])] 11 | if(length(new.packages)) install.packages(new.packages, dependencies=TRUE) 12 | #--------------------------------------------------------------------------------------------------# -------------------------------------------------------------------------------- /man/f.plot.coef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/f.plot.coef.R 3 | \name{f.plot.coef} 4 | \alias{f.plot.coef} 5 | \title{f.plot.coef} 6 | \usage{ 7 | f.plot.coef( 8 | Z, 9 | wv, 10 | xlim = NULL, 11 | position = "topright", 12 | type = "Coefficient", 13 | plot_label = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{Z}{Coefficient matrix with each row corresponding to the coefficients and wavelength in columns} 18 | 19 | \item{wv}{vector of wavelengths} 20 | 21 | \item{xlim}{vector to change the default xlim of the plots (ex xlim = c(500, 2400))} 22 | 23 | \item{position}{Position of the legend (see base function legend for help)} 24 | 25 | \item{type}{Name of the y axis and of the legend} 26 | 27 | \item{plot_label}{optional plot label to include with the figure} 28 | } 29 | \description{ 30 | f.plot.coef 31 | } 32 | \author{ 33 | Julien Lamour 34 | } 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # R test files 2 | Rplots.pdf 3 | 4 | # History files 5 | .Rhistory 6 | .Rapp.history 7 | 8 | # Session Data files 9 | .RData 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.nb.html 26 | ## commented out to preserve pdf for display in GitHub 27 | #vignettes/*.pdf 28 | #vignettes/*_files/ 29 | 30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 31 | .httr-oauth 32 | 33 | # knitr and R markdown default cache directories 34 | /*_cache/ 35 | /cache/ 36 | 37 | # Temporary files created by R markdown 38 | *.utf8.md 39 | *.knit.md 40 | 41 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 42 | rsconnect/ 43 | 44 | # MacOSX 45 | *.DS* 46 | *._* 47 | /doc/ 48 | /Meta/ 49 | -------------------------------------------------------------------------------- /man/percent_rmse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{percent_rmse} 4 | \alias{percent_rmse} 5 | \title{Calculate RMSE and percent RMSE with PLSR model results} 6 | \usage{ 7 | percent_rmse( 8 | plsr_dataset = NULL, 9 | inVar = NULL, 10 | residuals = NULL, 11 | range = "full" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plsr_dataset}{input plsr dataset} 16 | 17 | \item{inVar}{the trait variable used in the calculation of RMSE} 18 | 19 | \item{residuals}{predicted minus observed residual vector from either 20 | a cross-validation CV or independent validation} 21 | 22 | \item{range}{calculate over the full data range or the 95\% of data range. 23 | options full or 95perc} 24 | } 25 | \value{ 26 | output a list containing the rmse and perc_rmse. 27 | output <- list(rmse = rmse, perc_rmse = perc_rmse) 28 | } 29 | \description{ 30 | Calculate RMSE and percent RMSE with PLSR model results 31 | } 32 | \author{ 33 | Shawn P. Serbin 34 | } 35 | -------------------------------------------------------------------------------- /R/get_ecosis_data.R: -------------------------------------------------------------------------------- 1 | ##' Function to pull data from EcoSIS using the EcoSIS API 2 | ##' 3 | ##' @param ecosis_id the alphanumeric EcoSIS API dataset ID 4 | ##' 5 | ##' @examples 6 | ##' \dontrun{ 7 | ##' ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9" 8 | ##' dat_raw <- get_ecosis_data(ecosis_id = ecosis_id) 9 | ##' head(dat_raw) 10 | ##' names(dat_raw)[1:40] 11 | ##' } 12 | ##' 13 | ##' @importFrom readr read_csv 14 | ##' 15 | ##' @return EcoSIS spectral dataset object 16 | ##' 17 | ##' @author Shawn P. Serbin, Alexey Shiklomanov 18 | ##' @export 19 | get_ecosis_data <- function(ecosis_id = NULL) { 20 | if(!is.null(ecosis_id)) { 21 | print("**** Downloading Ecosis data ****") 22 | ecosis_id <- ecosis_id 23 | ecosis_file <- sprintf( 24 | "https://ecosis.org/api/package/%s/export?metadata=true", 25 | ecosis_id) 26 | message("Downloading data...") 27 | dat_raw <- readr::read_csv(ecosis_file) 28 | message("Download complete!") 29 | return(dat_raw) 30 | } else { 31 | stop("**** No EcoSIS ID provided. Please provide a valid ID before proceeding ****") 32 | } 33 | } -------------------------------------------------------------------------------- /man/f.plot.spec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/f.plot.spec.R 3 | \name{f.plot.spec} 4 | \alias{f.plot.spec} 5 | \title{f.plot.spec} 6 | \usage{ 7 | f.plot.spec( 8 | Z, 9 | wv, 10 | xlim = NULL, 11 | position = "topright", 12 | type = "Reflectance", 13 | plot_label = NULL, 14 | CI = 95 15 | ) 16 | } 17 | \arguments{ 18 | \item{Z}{Spectra matrix with each row corresponding to a spectra and wavelength in columns} 19 | 20 | \item{wv}{vector of wavelengths corresponding to the column of the spectra matrix Z} 21 | 22 | \item{xlim}{vector to change the default xlim of the plots (ex xlim = c(500, 2400))} 23 | 24 | \item{position}{Position of the legend (see base function legend for help)} 25 | 26 | \item{type}{Name of the y axis and of the legend. E.g. Reflectance, Transmittance} 27 | 28 | \item{plot_label}{optional plot label to include with the figure} 29 | 30 | \item{CI}{Desired confidence interval for the spectra plot. Options are: 95 31 | or 90. Default is: 95} 32 | } 33 | \description{ 34 | f.plot.spec 35 | } 36 | \author{ 37 | Julien Lamour, Shawn P. Serbin, Andrés Baresch 38 | } 39 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%notin%") 4 | export(VIP) 5 | export(VIPjh) 6 | export(create_data_split) 7 | export(f.coef.valid) 8 | export(f.plot.coef) 9 | export(f.plot.spec) 10 | export(find_optimal_comp_by_groups) 11 | export(find_optimal_components) 12 | export(get_ecosis_data) 13 | export(percent_rmse) 14 | export(pls_permutation) 15 | export(pls_permutation_by_groups) 16 | export(source_GitHubData) 17 | import(ggplot2) 18 | import(httr) 19 | importFrom(dplyr,all_of) 20 | importFrom(dplyr,group_by_at) 21 | importFrom(dplyr,mutate) 22 | importFrom(dplyr,n) 23 | importFrom(dplyr,row_number) 24 | importFrom(dplyr,slice) 25 | importFrom(dplyr,vars) 26 | importFrom(graphics,box) 27 | importFrom(graphics,legend) 28 | importFrom(graphics,lines) 29 | importFrom(graphics,polygon) 30 | importFrom(magrittr,"%>%") 31 | importFrom(pls,plsr) 32 | importFrom(pls,selectNcomp) 33 | importFrom(readr,read_csv) 34 | importFrom(reshape2,melt) 35 | importFrom(stats,as.formula) 36 | importFrom(stats,coef) 37 | importFrom(stats,predict) 38 | importFrom(stats,quantile) 39 | importFrom(stats,t.test) 40 | importFrom(utils,flush.console) 41 | importFrom(utils,read.table) 42 | importFrom(utils,setTxtProgressBar) 43 | importFrom(utils,txtProgressBar) 44 | -------------------------------------------------------------------------------- /man/create_data_split.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/create_data_split.R 3 | \name{create_data_split} 4 | \alias{create_data_split} 5 | \title{Create a calibration (training) / validation data split for PLSR model fitting and testing} 6 | \usage{ 7 | create_data_split( 8 | dataset = NULL, 9 | approach = NULL, 10 | split_seed = 123456789, 11 | prop = 0.8, 12 | group_variables = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{dataset}{input full PLSR dataset to split into cal/val datasets} 17 | 18 | \item{approach}{approach to splitting the dataset. Options: base or dplyr} 19 | 20 | \item{split_seed}{random seed to use for splitting data} 21 | 22 | \item{prop}{the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). 23 | This sets the calibration proportion} 24 | 25 | \item{group_variables}{Use factor variables to conduct a stratified sampling for cal/val} 26 | } 27 | \value{ 28 | output_list A list containing the calibration dataset (cal_data) 29 | and validation dataset (val_data) 30 | } 31 | \description{ 32 | Create a calibration (training) / validation data split for PLSR model fitting and testing 33 | } 34 | \author{ 35 | Julien Lamour, Jeremiah Anderson, Shawn P. Serbin 36 | } 37 | -------------------------------------------------------------------------------- /data-raw/plsr_data.R: -------------------------------------------------------------------------------- 1 | getwd() 2 | devtools::load_all() 3 | library(dplyr) 4 | `%notin%` <- Negate(`%in%`) 5 | 6 | # get data 7 | ecosis_id <- "25770ad9-d47c-428b-bf99-d1543a4b0ec9" 8 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 9 | head(dat_raw) 10 | names(dat_raw)[1:60] 11 | 12 | # setup data 13 | Start.wave <- 500 14 | End.wave <- 2400 15 | wv <- seq(Start.wave,End.wave,1) 16 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 17 | colnames(Spectra) <- c(paste0("Wave_",wv)) 18 | head(Spectra)[1:6,1:10] 19 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 20 | head(sample_info) 21 | 22 | sample_info2 <- sample_info %>% 23 | select(Species_Code=`USDA Symbol`, Common_Name=`Common Name`, C_N_mass, C_g_m2, 24 | H20_g_m2, LMA_g_m2, N_g_m2, QC) 25 | 26 | plsr_data <- data.frame(sample_info2,Spectra) 27 | plsr_data <- plsr_data %>% 28 | filter(is.na(QC)) %>% 29 | select(-QC) 30 | plsr_data <- plsr_data[complete.cases(plsr_data),] 31 | 32 | write.csv(x = plsr_data, file = "ely_plsr_data.csv", row.names = F) 33 | ely_plsr_data <- read.csv("ely_plsr_data.csv", header = T) 34 | 35 | usethis::use_data(ely_plsr_data, overwrite = TRUE, internal = FALSE) 36 | 37 | 38 | -------------------------------------------------------------------------------- /R/f.coef.valid.R: -------------------------------------------------------------------------------- 1 | ## Return the intercept and the coefficients of the jackknife validation 2 | ##' @title f.coef.valid 3 | ##' 4 | ##' @param plsr.out plsr model obtained with jaccknife = TRUE 5 | ##' @param data_plsr data used for the plsr model with Spectra the matrix of spectra 6 | ##' @param ncomp number of selection components 7 | ##' @param inVar Name of the PLSR model response variable 8 | ##' 9 | ##' @return B returns the intercept and the coefficients of the jackknife or bootstrap validation 10 | ##' 11 | ##' @author Julien Lamour 12 | ##' @export 13 | f.coef.valid <- function(plsr.out, data_plsr, ncomp, inVar) { 14 | ## Only work in the case where center=TRUE in the plsr model 15 | B <- plsr.out$validation$coefficients[, , ncomp,, drop = FALSE] 16 | dB <- dim(B) 17 | dB[1] <- dB[1] + 1 18 | dnB <- dimnames(B) 19 | dnB[[1]] <- c("(Intercept)", dnB[[1]]) 20 | BInt <- array(dim = dB, dimnames = dnB) 21 | BInt[-1, , ,] <- B 22 | nseg=dB[[4]] 23 | for (i in 1:nseg){ 24 | Y<-data_plsr[,inVar] 25 | Y<-Y[-plsr.out$validation$segments[[i]]] 26 | Ymeans<-mean(Y) 27 | X<-data_plsr$Spectra 28 | X<-X[-plsr.out$validation$segments[[i]],] 29 | Xmeans<-colMeans(X) 30 | BInt[1, , ,i] <- Ymeans - Xmeans %*% B[, , , i] 31 | } 32 | B <- BInt 33 | return(B) 34 | } -------------------------------------------------------------------------------- /tests/testthat/test.simple_plsr.R: -------------------------------------------------------------------------------- 1 | context("*** Test simple PLSR model fit using Ely et al. (2019) data ***") 2 | 3 | ### Setup data for tests 4 | #Load Ely et al 2019 dataset 5 | data("ely_plsr_data") 6 | inVar <- "N_g_m2" 7 | Start.wave <- 500 8 | End.wave <- 2400 9 | wv <- seq(Start.wave,End.wave,1) 10 | plsr_data <- ely_plsr_data 11 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))]) 12 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))], 13 | Spectra=I(spec)) 14 | 15 | test_that("Test fitting simple PLSR model", { 16 | method <- "firstPlateau" 17 | random_seed <- 1245565 18 | seg <- 50 19 | maxComps <- 20 20 | iterations <- 80 21 | prop <- 0.70 22 | 23 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 24 | method=method, maxComps=maxComps, 25 | iterations=iterations, seg=seg, prop=prop, 26 | random_seed=random_seed) 27 | 28 | segs <- 100 29 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="CV", 30 | segments=segs, segment.type="interleaved",trace=FALSE,data=plsr_data) 31 | fit <- plsr.out$fitted.values[,1,nComps] 32 | expect_type(fit, "double") 33 | }) -------------------------------------------------------------------------------- /.github/workflows/run_ecosis_pull_example.yaml: -------------------------------------------------------------------------------- 1 | name: run_ecosis_pull_example 2 | 3 | on: 4 | schedule: 5 | - cron: '15 1 */5 * *' 6 | 7 | env: 8 | R_LIBS_USER: /usr/local/lib/R/site-library 9 | LC_ALL: en_US.UTF-8 10 | NCPUS: 2 11 | 12 | jobs: 13 | run-plsr: 14 | # The type of runner that the job will run on 15 | runs-on: ubuntu-latest 16 | 17 | env: 18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 19 | R_KEEP_PKG_SOURCE: yes 20 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - uses: r-lib/actions/setup-r-dependencies@v2 30 | 31 | - name: Query dependencies 32 | run: | 33 | install.packages('remotes') 34 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 35 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 36 | shell: Rscript {0} 37 | 38 | - name: Install dependencies 39 | run: | 40 | Rscript -e 'remotes::install_github(repo="TESTgroup-BNL/spectratrait", dependencies=TRUE)' 41 | 42 | # Run R script 43 | - name: Run EcoSIS API Pull Example 44 | run: | 45 | source("inst/scripts/pull_data_from_ecosis.R") 46 | shell: Rscript {0} -------------------------------------------------------------------------------- /man/pls_permutation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls_permutation.R 3 | \name{pls_permutation} 4 | \alias{pls_permutation} 5 | \title{Run a PLSR model permutation analysis. Can be used to determine the optimal number of components 6 | or conduct a boostrap uncertainty analysis} 7 | \usage{ 8 | pls_permutation( 9 | dataset = NULL, 10 | targetVariable = NULL, 11 | maxComps = 20, 12 | iterations = 20, 13 | prop = 0.7, 14 | verbose = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} 19 | 20 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? 21 | Usually the "inVar" variable set at the beginning of a PLS script} 22 | 23 | \item{maxComps}{maximum number of components to use for each PLSR fit} 24 | 25 | \item{iterations}{how many different permutations to run} 26 | 27 | \item{prop}{proportion of data to preserve for each permutation} 28 | 29 | \item{verbose}{Should the function report the current iteration status/progress to the terminal 30 | or run silently? TRUE/FALSE. Default FALSE} 31 | } 32 | \value{ 33 | output a list containing the PRESS and coef_array. 34 | output <- list(PRESS=press.out, coef_array=coefs) 35 | } 36 | \description{ 37 | See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123 38 | } 39 | \author{ 40 | Julien Lamour, Shawn P. Serbin 41 | } 42 | -------------------------------------------------------------------------------- /man/find_optimal_components.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_optimal_components.R 3 | \name{find_optimal_components} 4 | \alias{find_optimal_components} 5 | \title{Applies different methods for the determination of the optimal number of PLSR model components} 6 | \usage{ 7 | find_optimal_components( 8 | dataset = NULL, 9 | targetVariable = NULL, 10 | method = "pls", 11 | maxComps = 20, 12 | iterations = 20, 13 | seg = 100, 14 | prop = 0.7, 15 | random_seed = 123456789 16 | ) 17 | } 18 | \arguments{ 19 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} 20 | 21 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? 22 | Usually the "inVar" variable set at the beginning of a PLS script} 23 | 24 | \item{method}{Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin} 25 | 26 | \item{maxComps}{maximum number of components to consider} 27 | 28 | \item{iterations}{how many different permutations to run} 29 | 30 | \item{seg}{For the built-in pls method, how many different data segments to select from the input dataset} 31 | 32 | \item{prop}{proportion of data to preserve for each permutation} 33 | 34 | \item{random_seed}{random seed to use for splitting data} 35 | } 36 | \value{ 37 | nComps the optimal number of PLSR components 38 | } 39 | \description{ 40 | Applies different methods for the determination of the optimal number of PLSR model components 41 | } 42 | \author{ 43 | Julien Lamour, Jeremiah Anderson, Shawn P. Serbin 44 | } 45 | -------------------------------------------------------------------------------- /.github/workflows/check-r-release.yaml: -------------------------------------------------------------------------------- 1 | name: R-CMD-check-R-release 2 | 3 | # CI to test package on different versions of R, rel-1, rel, rel+1 4 | on: 5 | push: 6 | branches: [main, master] 7 | pull_request: 8 | branches: [main, master] 9 | 10 | env: 11 | R_LIBS_USER: /usr/local/lib/R/site-library 12 | LC_ALL: en_US.UTF-8 13 | NCPUS: 2 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ubuntu-latest 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | R_KEEP_PKG_SOURCE: yes 21 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | R: 27 | - "4.0" 28 | - "4.1" 29 | - "4.2" 30 | - "4.3" 31 | - "4.4" 32 | - "4.5" 33 | 34 | steps: 35 | #check out source code 36 | - uses: actions/checkout@v4 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | # rcmdcheck but do not build vignettes 48 | - name: Run Fast Build Check Across R Releases 49 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error") 50 | shell: Rscript {0} 51 | 52 | - name: Upload check results 53 | if: failure() 54 | uses: actions/upload-artifact@v4 55 | with: 56 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 57 | path: check -------------------------------------------------------------------------------- /.github/workflows/run_plsr_example_auto.yaml: -------------------------------------------------------------------------------- 1 | name: run_PLSR_example-auto 2 | 3 | on: 4 | schedule: 5 | - cron: '0 0 */2 * *' 6 | 7 | env: 8 | R_LIBS_USER: /usr/local/lib/R/site-library 9 | LC_ALL: en_US.UTF-8 10 | NCPUS: 2 11 | 12 | jobs: 13 | run-plsr: 14 | # The type of runner that the job will run on 15 | runs-on: ubuntu-latest 16 | 17 | env: 18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 19 | R_KEEP_PKG_SOURCE: yes 20 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | R: 26 | - "4.0" 27 | - "4.1" 28 | - "4.2" 29 | - "4.3" 30 | - "4.4" 31 | - "4.5" 32 | 33 | steps: 34 | #check out source code 35 | - uses: actions/checkout@v4 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | use-public-rspm: true 40 | 41 | - uses: r-lib/actions/setup-r-dependencies@v2 42 | 43 | - name: Query dependencies 44 | run: | 45 | install.packages('remotes') 46 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 47 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 48 | shell: Rscript {0} 49 | 50 | - name: Install dependencies 51 | run: | 52 | Rscript -e 'remotes::install_github(repo="TESTgroup-BNL/spectratrait", dependencies=TRUE)' 53 | 54 | # Run R script 55 | - name: Run Ely et al. (2019) PLSR Example 56 | run: | 57 | source("inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R") 58 | shell: Rscript {0} -------------------------------------------------------------------------------- /R/vip.R: -------------------------------------------------------------------------------- 1 | ##' @title VIP returns all VIP values for all variables and all number of components, as a ncomp x nvars matrix. 2 | ##' @param object fitted pls::plsr object 3 | ##' @export 4 | VIP <- function(object) { 5 | ## VIP returns all VIP values for all variables and all number of components, 6 | ## as a ncomp x nvars matrix. 7 | if (object$method != "oscorespls") 8 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'") 9 | if (nrow(object$Yloadings) > 1) 10 | stop("Only implemented for single-response models") 11 | 12 | SS <- c(object$Yloadings)^2 * colSums(object$scores^2) 13 | Wnorm2 <- colSums(object$loading.weights^2) 14 | SSW <- sweep(object$loading.weights^2, 2, SS / Wnorm2, "*") # Replace with matrix mult. 15 | sqrt(nrow(SSW) * apply(SSW, 1, cumsum) / cumsum(SS)) 16 | } 17 | 18 | ##' @title VIPjh returns the VIP of variable j with h components 19 | ##' @param object fitted pls::plsr object 20 | ##' @param j which variable in the fitted pls::plsr object 21 | ##' @param h the number of components in the fitted pls::plsr object to calculate the VIP 22 | ##' @export 23 | VIPjh <- function(object, j, h) { 24 | ## VIPjh returns the VIP of variable j with h components 25 | if (object$method != "oscorespls") 26 | stop("Only implemented for orthogonal scores algorithm. Refit with 'method = \"oscorespls\"'") 27 | if (nrow(object$Yloadings) > 1) 28 | stop("Only implemented for single-response models") 29 | 30 | b <- c(object$Yloadings)[1:h] 31 | T <- object$scores[,1:h, drop = FALSE] 32 | SS <- b^2 * colSums(T^2) 33 | W <- object$loading.weights[,1:h, drop = FALSE] 34 | Wnorm2 <- colSums(W^2) 35 | sqrt(nrow(W) * sum(SS * W[j,]^2 / Wnorm2) / sum(SS)) 36 | } -------------------------------------------------------------------------------- /.github/workflows/ci-run_plsr_example.yaml: -------------------------------------------------------------------------------- 1 | name: ci-run_PLSR_example 2 | 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | env: 10 | R_LIBS_USER: /usr/local/lib/R/site-library 11 | LC_ALL: en_US.UTF-8 12 | NCPUS: 2 13 | 14 | jobs: 15 | run-plsr: 16 | # The type of runner that the job will run on 17 | runs-on: ubuntu-latest 18 | 19 | env: 20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 21 | R_KEEP_PKG_SOURCE: yes 22 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | R: 28 | - "4.0" 29 | - "4.1" 30 | - "4.2" 31 | - "4.3" 32 | - "4.4" 33 | - "4.5" 34 | 35 | steps: 36 | #check out source code 37 | - uses: actions/checkout@v4 38 | 39 | - uses: r-lib/actions/setup-r@v2 40 | with: 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | 45 | - name: Query dependencies 46 | run: | 47 | install.packages('remotes') 48 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 49 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 50 | shell: Rscript {0} 51 | 52 | - name: Install dependencies 53 | run: | 54 | Rscript -e 'remotes::install_github(repo="plantphys/spectratrait", dependencies=TRUE)' 55 | 56 | # Run R script 57 | - name: Run Ely et al. (2019) PLSR Example 58 | run: | 59 | source("inst/scripts/spectra-trait_ely_leafN_plsr_bootstrap_example.R") 60 | shell: Rscript {0} -------------------------------------------------------------------------------- /man/find_optimal_comp_by_groups.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_optimal_components.R 3 | \name{find_optimal_comp_by_groups} 4 | \alias{find_optimal_comp_by_groups} 5 | \title{Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, 6 | by group (i.e. optimal selection by stratification)} 7 | \usage{ 8 | find_optimal_comp_by_groups( 9 | dataset = NULL, 10 | targetVariable = NULL, 11 | method = "firstPlateau", 12 | maxComps = 20, 13 | iterations = 20, 14 | prop = 0.7, 15 | random_seed = 123456789, 16 | group_variables = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} 21 | 22 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? 23 | Usually the "inVar" variable set at the beginning of a PLS script} 24 | 25 | \item{method}{Which approach to use to find optimal components. Options: firstPlateau, firstMin} 26 | 27 | \item{maxComps}{maximum number of components to consider} 28 | 29 | \item{iterations}{how many different permutations to run} 30 | 31 | \item{prop}{proportion of data to preserve for each permutation} 32 | 33 | \item{random_seed}{random seed to use for splitting data} 34 | 35 | \item{group_variables}{group_variables character vector of the form c("var1", "var2"..."varn") 36 | providing the factors used for stratified sampling.} 37 | } 38 | \value{ 39 | nComps the optimal number of PLSR components 40 | } 41 | \description{ 42 | Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, 43 | by group (i.e. optimal selection by stratification) 44 | } 45 | \author{ 46 | asierrl, Shawn P. Serbin 47 | } 48 | -------------------------------------------------------------------------------- /man/pls_permutation_by_groups.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pls_permutation.R 3 | \name{pls_permutation_by_groups} 4 | \alias{pls_permutation_by_groups} 5 | \title{Run a PLSR model permutation analysis stratified by selected "groups". Can be used to 6 | determine the optimal number of components or conduct a boostrap uncertainty analysis} 7 | \usage{ 8 | pls_permutation_by_groups( 9 | dataset = NULL, 10 | targetVariable = NULL, 11 | maxComps = 20, 12 | iterations = 20, 13 | prop = 0.7, 14 | group_variables = NULL, 15 | verbose = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{dataset}{input full PLSR dataset. Usually just the calibration dataset} 20 | 21 | \item{targetVariable}{What object or variable to use as the Y (predictand) in the PLSR model? 22 | Usually the "inVar" variable set at the beginning of a PLS script} 23 | 24 | \item{maxComps}{maximum number of components to use for each PLSR fit} 25 | 26 | \item{iterations}{how many different permutations to run} 27 | 28 | \item{prop}{proportion of data to preserve for each permutation} 29 | 30 | \item{group_variables}{Character vector of the form c("var1", "var2"..."varn") 31 | providing the factors used for stratified sampling in the PLSR permutation analysis} 32 | 33 | \item{verbose}{Should the function report the current iteration status/progress to the terminal 34 | or run silently? TRUE/FALSE. Default FALSE} 35 | } 36 | \value{ 37 | output a list containing the PRESS and coef_array. 38 | output <- list(PRESS=press.out, coef_array=coefs) 39 | } 40 | \description{ 41 | Run a PLSR model permutation analysis stratified by selected "groups". Can be used to 42 | determine the optimal number of components or conduct a boostrap uncertainty analysis 43 | } 44 | \author{ 45 | asierrl, Shawn P. Serbin, Julien Lamour 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test.create_data_split.R: -------------------------------------------------------------------------------- 1 | context("*** Test that the create data split function has the expected behavior *** ") 2 | 3 | test_that("Generating a data split using the dplyr approach doesn't throw an error or generate duplicates between cal. and val. data", { 4 | plot<- rep(c("plot1", "plot2", "plot3"),each=42) 5 | season<- rep(1:6, 21) 6 | disease<- c(rep(0,84), rep(1,42)) 7 | d<- seq(1:126) 8 | df <- data.frame(plot,season,disease,d) 9 | df <- df %>% mutate(id=row_number()) 10 | 11 | split_data <- spectratrait::create_data_split(dataset=df, approach="dplyr", 12 | split_seed=7529075, prop=0.8, 13 | group_variables=c("plot", 14 | "season", 15 | "disease")) 16 | expect_false(sum(split_data$cal_data$id %in% split_data$val_data$id)>0) 17 | }) 18 | 19 | test_that("Generating a data split using the base approach doesn't throw an error or generate duplicates between cal. and val. data", { 20 | plot<- rep(c("plot1", "plot2", "plot3"), each=42) 21 | season<- rep(1:6, 21) 22 | disease<- c(rep(0,84), rep(1,42)) 23 | d<- seq(1:126) 24 | df <- data.frame(plot, season, disease, d) 25 | df <- df %>% mutate(id=row_number()) 26 | 27 | split_data <- spectratrait::create_data_split(dataset=df, approach="base", 28 | split_seed=7529075, prop=0.8, 29 | group_variables=c("plot", 30 | "season", 31 | "disease")) 32 | expect_false(sum(split_data$cal_data$id %in% split_data$val_data$id)>0) 33 | }) -------------------------------------------------------------------------------- /R/f.plot.coef.R: -------------------------------------------------------------------------------- 1 | ##' @title f.plot.coef 2 | ##' 3 | ##' @param Z Coefficient matrix with each row corresponding to the coefficients and wavelength in columns 4 | ##' @param wv vector of wavelengths 5 | ##' @param xlim vector to change the default xlim of the plots (ex xlim = c(500, 2400)) 6 | ##' @param position Position of the legend (see base function legend for help) 7 | ##' @param type Name of the y axis and of the legend 8 | ##' @param plot_label optional plot label to include with the figure 9 | ##' 10 | ##' @importFrom stats quantile 11 | ##' @importFrom graphics polygon lines legend box 12 | ##' 13 | ##' @author Julien Lamour 14 | ##' @export 15 | f.plot.coef <- function( 16 | Z, ## Coefficient matrix with each row corresponding to the coefficients and wavelength in columns 17 | wv, ## vector of wavelengths 18 | xlim=NULL, ## vector to change the default xlim of the plots (ex xlim = c(500, 2400)) 19 | position='topright',## Position of the legend (see base function legend for help) 20 | type='Coefficient', ## Name of the y axis and of the legend 21 | plot_label=NULL ## optional label for plot 22 | ){ 23 | 24 | if(is.null(xlim)){xlim=c(min(wv),max(wv))} 25 | mean_spec <- colMeans(Z) 26 | spectra_quantiles <- apply(Z,2,quantile,na.rm=T,probs=c(0,0.01,0.025,0.05,0.5,0.95,0.975,0.99,1)) 27 | 28 | plot(x=NULL,y=NULL,xlim=xlim,ylim=c(min(Z),max(Z)),xlab="Wavelength (nm)", 29 | ylab=type,main=plot_label) 30 | polygon(c(wv ,rev(wv)),c(spectra_quantiles[9,], rev(spectra_quantiles[1,])), 31 | col="grey60",border=NA) 32 | polygon(c(wv ,rev(wv)),c(spectra_quantiles[6,], rev(spectra_quantiles[4,])), 33 | col="#99CC99",border=NA) 34 | lines(wv,mean_spec,lwd=2, lty=1, col="black") 35 | lines(wv,spectra_quantiles[1,], lty=3, col="grey60") 36 | lines(wv,spectra_quantiles[9,], lty=3, col="grey60") 37 | legend(position,legend=c(paste("Mean",type),"Min/Max (range)", "95% CI"),lty=c(1,1,1), 38 | lwd=c(2,10,10),col=c("black","grey50","#99CC99"),bty="n") 39 | box(lwd=2.2) 40 | } -------------------------------------------------------------------------------- /.github/workflows/ci-weekly.yaml: -------------------------------------------------------------------------------- 1 | name: R-CMD-check-Weekly 2 | 3 | permissions: read-all 4 | 5 | on: 6 | # every Monday at 4:30 AM 7 | schedule: 8 | - cron: '30 4 * * 1' 9 | 10 | env: 11 | NCPUS: 2 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macOS-latest, r: 'oldrel-1'} 24 | - {os: macOS-latest, r: 'release'} 25 | - {os: windows-latest, r: 'release'} 26 | # use 4.1 to check with rtools40's older compiler 27 | - {os: windows-latest, r: '4.1'} 28 | - {os: ubuntu-latest, r: 'oldrel-3'} 29 | - {os: ubuntu-latest, r: 'oldrel-2'} 30 | - {os: ubuntu-latest, r: 'oldrel-1'} 31 | - {os: ubuntu-latest, r: 'release'} 32 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 33 | 34 | env: 35 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 36 | R_KEEP_PKG_SOURCE: yes 37 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 38 | 39 | steps: 40 | - uses: actions/checkout@v4 41 | 42 | - uses: r-lib/actions/setup-pandoc@v2 43 | 44 | - uses: r-lib/actions/setup-r@v2 45 | with: 46 | r-version: ${{ matrix.config.r }} 47 | http-user-agent: ${{ matrix.config.http-user-agent }} 48 | use-public-rspm: true 49 | 50 | - uses: r-lib/actions/setup-r-dependencies@v2 51 | with: 52 | extra-packages: any::rcmdcheck 53 | needs: check 54 | 55 | # rcmdcheck but do not build vignettes 56 | - name: Build Check Across OS & R Releases 57 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error") 58 | shell: Rscript {0} 59 | 60 | - name: Upload check results 61 | if: failure() 62 | uses: actions/upload-artifact@v4 63 | with: 64 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 65 | path: check -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: spectratrait 2 | Title: A simple add-on package to aid in the fitting of leaf or canopy scale spectra-trait PLSR models 3 | Version: 1.2.6 4 | Authors@R: 5 | c(person(given = "Julien", 6 | family = "Lamour", 7 | role = c("aut", "ctb"), 8 | email = "jlamour.sci@gmail.com", 9 | comment = c(ORCID = "0000-0002-4410-507X")), 10 | person(given = "Jeremiah", 11 | family = "Anderson", 12 | role = "ctb", 13 | email = "jeremiah.a.anderson@nasa.gov", 14 | comment = c(ORCID = "0000-0001-8925-5226")), 15 | person(given = "Kenneth", 16 | family = "Davidson", 17 | role = "ctb", 18 | email = "kdavidson@americanforests.org", 19 | comment = c(ORCID = "0000-0001-5745-9689")), 20 | person(given = "Kim", 21 | family = "Ely", 22 | role = "ctb", 23 | email = "kely@bnl.gov", 24 | comment = c(ORCID = "0000-0002-3915-001X")), 25 | person(given = "Shawn", 26 | family = "Serbin", 27 | role = c("aut", "cre", "ctb"), 28 | email = "shawn.p.serbin@nasa.gov", 29 | comment = c(ORCID = "0000-0003-4136-8971"))) 30 | Maintainer: Shawn P. Serbin 31 | Description: This package provides functions to conduct standardized spectra-trait PLSR model fitting, including uncertainty analysis that follows DOI: https://doi.org/10.1111/nph.16123 32 | License: MIT + file LICENSE 33 | Encoding: UTF-8 34 | LazyData: true 35 | Roxygen: list(markdown = TRUE) 36 | RoxygenNote: 7.3.1 37 | Imports: 38 | httr (>= 1.4.2), 39 | readr (>= 1.3.1), 40 | pls (>= 2.7-2), 41 | dplyr (>= 1.0.1), 42 | magrittr (>= 2.0.1), 43 | reshape2 (>= 1.4.4), 44 | here (>= 0.1), 45 | plotrix (>= 3.7-8), 46 | ggplot2 (>= 3.3.2), 47 | gridExtra (>= 2.3), 48 | scales (>= 1.1.1), 49 | testthat (>= 3.1.2) 50 | Suggests: 51 | devtools (>= 2.3.1), 52 | remotes (>= 2.2.0), 53 | RCurl (>= 1.98-1.2), 54 | knitr (>= 1.37), 55 | markdown, 56 | rmarkdown 57 | Depends: 58 | R (>= 4.0) 59 | VignetteBuilder: knitr 60 | NeedsCompilation: no 61 | -------------------------------------------------------------------------------- /tests/testthat/test.optimal_components.R: -------------------------------------------------------------------------------- 1 | context("*** Test methods for finding optimal number of PLSR components ***") 2 | 3 | ### Setup data for tests 4 | #Load Ely et al 2019 dataset 5 | data("ely_plsr_data") 6 | inVar <- "N_g_m2" 7 | Start.wave <- 500 8 | End.wave <- 2400 9 | wv <- seq(Start.wave,End.wave,1) 10 | plsr_data <- ely_plsr_data 11 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))]) 12 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))], 13 | Spectra=I(spec)) 14 | ### 15 | 16 | test_that("Finding optimal components using the built-in PLS package approach", { 17 | method <- "pls" 18 | random_seed <- 1245565 19 | seg <- 50 20 | maxComps <- 20 21 | iterations <- 80 22 | prop <- 0.70 23 | 24 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 25 | method=method, maxComps=maxComps, seg=seg, 26 | random_seed=random_seed) 27 | expect_gte(nComps, 12) 28 | }) 29 | 30 | test_that("Finding optimal components using the firstMin approach", { 31 | method <- "firstMin" 32 | random_seed <- 1245565 33 | seg <- 50 34 | maxComps <- 20 35 | iterations <- 80 36 | prop <- 0.70 37 | 38 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 39 | method=method, maxComps=maxComps, 40 | iterations=iterations, seg=seg, prop=prop, 41 | random_seed=random_seed) 42 | expect_gte(nComps, 12) 43 | 44 | }) 45 | 46 | test_that("Finding optimal components using the firstPlateau approach", { 47 | method <- "firstPlateau" 48 | random_seed <- 1245565 49 | seg <- 50 50 | maxComps <- 20 51 | iterations <- 80 52 | prop <- 0.70 53 | 54 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 55 | method=method, maxComps=maxComps, 56 | iterations=iterations, seg=seg, prop=prop, 57 | random_seed=random_seed) 58 | expect_gte(nComps, 12) 59 | 60 | }) -------------------------------------------------------------------------------- /.github/workflows/check-os.yaml: -------------------------------------------------------------------------------- 1 | name: R-CMD-check-OS-R 2 | 3 | # Check OS and R releases 4 | on: 5 | push: 6 | branches: [main, master] 7 | pull_request: 8 | branches: [main, master] 9 | 10 | permissions: read-all 11 | 12 | env: 13 | NCPUS: 2 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: macOS-latest, r: 'oldrel-1'} 26 | - {os: macos-latest, r: 'release'} 27 | - {os: windows-latest, r: 'release'} 28 | # use 4.1 to check with rtools40's older compiler 29 | - {os: windows-latest, r: '4.1'} 30 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 31 | - {os: ubuntu-latest, r: 'release'} 32 | - {os: ubuntu-latest, r: 'oldrel-1'} 33 | 34 | env: 35 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 36 | R_KEEP_PKG_SOURCE: yes 37 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 38 | RSPM: ${{ matrix.config.rspm }} 39 | 40 | steps: 41 | - uses: actions/checkout@v4 42 | 43 | - uses: r-lib/actions/setup-pandoc@v2 44 | 45 | - uses: r-lib/actions/setup-r@v2 46 | with: 47 | r-version: ${{ matrix.config.r }} 48 | http-user-agent: ${{ matrix.config.http-user-agent }} 49 | use-public-rspm: true 50 | 51 | - name: Query dependencies 52 | run: | 53 | install.packages('remotes') 54 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 55 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 56 | shell: Rscript {0} 57 | 58 | - uses: r-lib/actions/setup-r-dependencies@v2 59 | with: 60 | extra-packages: any::rcmdcheck 61 | needs: check 62 | 63 | # rcmdcheck but do not build vignettes 64 | - name: Build Check Across OS & R Releases 65 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--no-build-vignettes"), build_args = c("--no-manual", "--no-build-vignettes"), error_on = "error") 66 | shell: Rscript {0} 67 | 68 | - name: Upload check results 69 | if: failure() 70 | uses: actions/upload-artifact@v4 71 | with: 72 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 73 | path: check -------------------------------------------------------------------------------- /R/f.plot.spec.R: -------------------------------------------------------------------------------- 1 | ##' @title f.plot.spec 2 | ##' 3 | ##' @param Z Spectra matrix with each row corresponding to a spectra and wavelength in columns 4 | ##' @param wv vector of wavelengths corresponding to the column of the spectra matrix Z 5 | ##' @param xlim vector to change the default xlim of the plots (ex xlim = c(500, 2400)) 6 | ##' @param position Position of the legend (see base function legend for help) 7 | ##' @param type Name of the y axis and of the legend. E.g. Reflectance, Transmittance 8 | ##' @param plot_label optional plot label to include with the figure 9 | ##' @param CI Desired confidence interval for the spectra plot. Default is: 95; can be changed dynamically 10 | ##' 11 | ##' @importFrom stats quantile 12 | ##' @importFrom graphics polygon lines legend box 13 | ##' 14 | ##' @author Julien Lamour, Shawn P. Serbin, Andrés Baresch 15 | ##' @export 16 | f.plot.spec <- function( 17 | Z, ## Spectra matrix with each row corresponding to a spectra and wavelength in columns 18 | wv, ## vector of wavelengths corresponding to the column of the spectra matrix Z 19 | xlim=NULL, ## vector to change the default xlim of the plots (ex xlim = c(500, 2400)) 20 | position='topright',## Position of the legend (see base function legend for help) 21 | type='Reflectance', ## Name of the y axis and of the legend 22 | plot_label=NULL , ## optional label for plot 23 | CI = 95 ## confidence interval as percentage, default 95% ; can be prescribed 24 | ){ 25 | if(mean(as.matrix(Z),na.rm=TRUE)>1){Z=Z/100} ## Check if the spectra are in pc [0,100] or in [0,1] 26 | if(is.null(xlim)){xlim=c(min(wv),max(wv))} 27 | mean_spec <- colMeans(Z,na.rm=TRUE) 28 | 29 | CI_l=0+((100-95)/200) # lower bound for the confidence interval 30 | CI_u=1-((100-95)/200) # upper bound for the confidence interval 31 | 32 | spectra_quantiles <- apply(Z,2,quantile,na.rm=T,probs=c(0,CI_l,CI_u,1)) 33 | 34 | plot(x=NULL,y=NULL,ylim=c(0,100),xlim=xlim,xlab="Wavelength (nm)", 35 | ylab=paste0(type," (%)"),main=plot_label) 36 | 37 | polygon(c(wv ,rev(wv)),c(spectra_quantiles[3,]*100, rev(spectra_quantiles[2,]*100)), 38 | col="#99CC99",border=NA) 39 | 40 | 41 | ci_text=paste0(CI,"% CI") 42 | lines(wv,mean_spec*100,lwd=2, lty=1, col="black") 43 | lines(wv,spectra_quantiles[1,]*100, lty=3, col="grey40") 44 | lines(wv,spectra_quantiles[4,]*100, lty=3, col="grey40") 45 | legend(position,legend=c(paste("Mean",type),"Min/Max", ci_text),lty=c(1,3,1), 46 | lwd=c(2,1,10),col=c("black","grey40","#99CC99"),bty="n") 47 | box(lwd=2.2) 48 | } 49 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | ##' Function to source text data from GitHub 2 | ##' 3 | ##' @param url http/https URL to the github dataset 4 | ##' @param sep dataset file delimiter 5 | ##' @param header TRUE/FALSE does the file have a column header? 6 | ##' 7 | ##' @import httr 8 | ##' 9 | ##' @author gist.github.com/christophergandrud/4466237 10 | ##' @export 11 | source_GitHubData <- function(url, sep = ",", header = TRUE) { 12 | # define function to grab PLSR model from GitHub 13 | #devtools::source_gist("gist.github.com/christophergandrud/4466237") 14 | request <- httr::GET(url) 15 | httr::stop_for_status(request) 16 | handle <- textConnection(httr::content(request, as = 'text')) 17 | on.exit(close(handle)) 18 | read.table(handle, sep = sep, header = header) 19 | } 20 | 21 | ##' Calculate RMSE and percent RMSE with PLSR model results 22 | ##' 23 | ##' @param plsr_dataset input plsr dataset 24 | ##' @param inVar the trait variable used in the calculation of RMSE 25 | ##' @param residuals predicted minus observed residual vector from either 26 | ##' a cross-validation CV or independent validation 27 | ##' @param range calculate over the full data range or the 95% of data range. 28 | ##' options full or 95perc 29 | ##' @return output a list containing the rmse and perc_rmse. 30 | ##' output <- list(rmse = rmse, perc_rmse = perc_rmse) 31 | ##' 32 | ##' @author Shawn P. Serbin 33 | ##' 34 | ##' @export 35 | percent_rmse <- function(plsr_dataset = NULL, inVar = NULL, 36 | residuals = NULL, range = "full") { 37 | rmse <- sqrt(mean(residuals^2, na.rm = TRUE)) 38 | val_data_range <- range(plsr_dataset[,inVar], na.rm = TRUE) 39 | val_95perc_data_range <- quantile(plsr_dataset[,inVar], 40 | probs = c(0.025,0.975), 41 | na.rm = TRUE) 42 | if (range=="full") { 43 | perc_rmse <- (rmse/(val_data_range[2]-val_data_range[1]))*100 44 | } else if (range=="95perc") { 45 | perc_rmse <- (rmse/(val_95perc_data_range[[2]]-val_95perc_data_range[[1]]))*100 46 | } else { 47 | print("Not a valid option, defaulting to full") 48 | perc_rmse <- (rmse/(val_data_range[2]-val_data_range[1]))*100 49 | } 50 | 51 | # output rmse list 52 | output <- list(rmse = rmse, perc_rmse = perc_rmse) 53 | return(output) 54 | } 55 | 56 | 57 | ##' Not %in% function 58 | ##' @param x initial list 59 | ##' @param table list to check against 60 | ##' 61 | ##' @export 62 | #`%notin%` <- Negate(`%in%`) 63 | `%notin%` <- function(x, table) { 64 | !(x %in% table) 65 | } 66 | 67 | ##' Function to check for installed package 68 | ##' @param pkg name of package to check if installed 69 | ##' not presently used 70 | testForPackage <- function(pkg) { 71 | if (!requireNamespace(pkg)) { 72 | stop("Package", pkg, "required but not installed") 73 | } 74 | } -------------------------------------------------------------------------------- /R/create_data_split.R: -------------------------------------------------------------------------------- 1 | ##' Create a calibration (training) / validation data split for PLSR model fitting and testing 2 | ##' 3 | ##' @param dataset input full PLSR dataset to split into cal/val datasets 4 | ##' @param approach approach to splitting the dataset. Options: base or dplyr 5 | ##' @param split_seed random seed to use for splitting data 6 | ##' @param prop the proportion of data to preserve for calibration (e.g. 0.8) and validation (0.2). 7 | ##' This sets the calibration proportion 8 | ##' @param group_variables Use factor variables to conduct a stratified sampling for cal/val 9 | ##' 10 | ##' @return output_list A list containing the calibration dataset (cal_data) 11 | ##' and validation dataset (val_data) 12 | ##' 13 | ##' @importFrom magrittr %>% 14 | ##' @importFrom dplyr mutate group_by_at slice n vars all_of row_number 15 | ##' 16 | ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin 17 | ##' @export 18 | create_data_split <- function(dataset=NULL, approach=NULL, split_seed=123456789, prop=0.8, 19 | group_variables=NULL) { 20 | # TODO: import only required functions from dplyr 21 | set.seed(split_seed) 22 | 23 | # outer if/else to stop if approach set to NULL 24 | if(!is.null(approach)) { 25 | 26 | ## base R data split function 27 | if (approach=="base") { 28 | dataset$CalVal <- NA 29 | split_var <- group_variables 30 | if(length(group_variables) > 1) { 31 | dataset$ID <- apply(dataset[, group_variables], MARGIN = 1, FUN = function(x) paste(x, collapse = " ")) 32 | } else { 33 | dataset$ID <- dataset[, group_variables] 34 | } 35 | split_var_list <- unique(dataset$ID) 36 | for(i in 1:length(split_var_list)) { 37 | temp <- row.names(dataset[dataset$ID == split_var_list[i], ]) 38 | ## there should probably be more than 4 obs I'm guessing, so this may need adjusting 39 | if(length(temp) > 3) { 40 | Cal <- sample(temp,round(prop*length(temp))) 41 | Val <- temp[temp %notin% Cal] 42 | dataset$CalVal[ row.names(dataset) %in% Cal ] <- "Cal" 43 | dataset$CalVal[ row.names(dataset) %in% Val ] <- "Val" 44 | p_cal <- length(Cal)/length(temp) * 100 45 | message(paste0(split_var_list[i], " ", "Cal", ": ", round(p_cal,3), "%")) 46 | } else { 47 | message(paste(split_var_list[i], "Not enough observations")) 48 | } 49 | } 50 | dataset$ID <- NULL 51 | # drop NA's in CalVal 52 | dataset <- dataset[!is.na(dataset$CalVal), ] 53 | cal.plsr.data <- dataset[dataset$CalVal== "Cal",] 54 | val.plsr.data <- dataset[dataset$CalVal== "Val",] 55 | 56 | # Remove temporary CalVal column 57 | cal.plsr.data <- cal.plsr.data[,-which(names(cal.plsr.data)=="CalVal")] 58 | val.plsr.data <- val.plsr.data[,-which(names(val.plsr.data)=="CalVal")] 59 | 60 | # dplyr based data split function 61 | } else if (approach=="dplyr") { 62 | dataset <- dataset %>% mutate(ids=row_number()) 63 | cal.plsr.data <- dataset %>% 64 | group_by_at(vars(all_of(group_variables))) %>% 65 | slice(sample(1:n(), prop*n())) %>% 66 | data.frame() 67 | val.plsr.data <- dataset[dataset$ids %notin% cal.plsr.data$ids,] 68 | cal.plsr.data <- cal.plsr.data[,-which(colnames(cal.plsr.data)=="ids")] 69 | val.plsr.data <- val.plsr.data[,-which(colnames(val.plsr.data)=="ids")] 70 | } else { 71 | stop("**** Please set approach to either base R or dplyr data split ****") 72 | } 73 | output_list <- list(cal_data=cal.plsr.data, val_data=val.plsr.data) 74 | return(output_list) 75 | } 76 | 77 | # if approach is set to NULL (i.e. not set) return error message 78 | stop("**** Please set approach to either base R or dplyr data split ****") 79 | 80 | } -------------------------------------------------------------------------------- /inst/scripts/pull_data_from_ecosis.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # 3 | # 4 | # A simple example showing how to pull spectra and trait data from EcoSIS and plot it up 5 | # 6 | # Spectra and trait data source: 7 | # https://ecosis.org/package/ngee-arctic-2016-leaf-spectral-reflectance-kougarok-road-seward-peninsula-alaska-2016 8 | # 9 | # Notes: 10 | # * Provided as a basic example of how to apply the model to new spectra observations 11 | # * The author notes the code is not the most elegant or clean, but is functional 12 | # * Questions, comments, or concerns can be sent to sserbin@bnl.gov 13 | # * Code is provided under GNU General Public License v3.0 14 | # 15 | # 16 | #################################################################################################### 17 | 18 | 19 | #--------------------------------------------------------------------------------------------------# 20 | ### Load libraries 21 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 22 | "spectratrait") 23 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 24 | #--------------------------------------------------------------------------------------------------# 25 | 26 | 27 | #--------------------------------------------------------------------------------------------------# 28 | ### Setup options 29 | 30 | # What is the source dataset from EcoSIS? 31 | ecosis_id <- "960dbb0c-144e-4563-8117-9e23d14f4aa9" 32 | 33 | # Specify output directory, output_dir 34 | # Options: 35 | # tempdir - use a OS-specified temporary directory 36 | # user defined PATH - e.g. "~/scratch/PLSR" 37 | output_dir <- "tempdir" 38 | #--------------------------------------------------------------------------------------------------# 39 | 40 | 41 | #--------------------------------------------------------------------------------------------------# 42 | ### Set working directory 43 | if (output_dir=="tempdir") { 44 | outdir <- tempdir() 45 | } else { 46 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 47 | outdir <- file.path(path.expand(output_dir)) 48 | } 49 | setwd(outdir) # set working directory 50 | getwd() # check wd 51 | #--------------------------------------------------------------------------------------------------# 52 | 53 | 54 | #--------------------------------------------------------------------------------------------------# 55 | ### Example dataset 56 | # 57 | # URL: 58 | # https://ecosis.org/package/ngee-arctic-2016-leaf-spectral-reflectance-kougarok-road-watershed-seward-peninsula-alaska 59 | # 60 | #--------------------------------------------------------------------------------------------------# 61 | 62 | 63 | #--------------------------------------------------------------------------------------------------# 64 | ### Get source dataset from EcoSIS 65 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 66 | head(dat_raw) 67 | names(dat_raw)[1:40] 68 | #--------------------------------------------------------------------------------------------------# 69 | 70 | 71 | #--------------------------------------------------------------------------------------------------# 72 | ### Prepare data 73 | Start.wave <- 500 74 | End.wave <- 2400 75 | wv <- seq(Start.wave,End.wave,1) 76 | spectra <- data.frame(dat_raw[,names(dat_raw) %in% wv]) 77 | names(spectra) <- c(paste0("Wave_",wv)) 78 | head(spectra)[,1:5] 79 | 80 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 81 | head(sample_info) 82 | names(sample_info) 83 | #--------------------------------------------------------------------------------------------------# 84 | 85 | 86 | #--------------------------------------------------------------------------------------------------# 87 | ### Plot spectra 88 | spectratrait::f.plot.spec(Z=spectra,wv=wv,plot_label="NGEE-Arctic Leaf Spectra") 89 | dev.copy(png,file.path(outdir,'Leaf_Spectra.png'), 90 | height=2500,width=4900, res=340) 91 | dev.off(); 92 | par(mfrow=c(1,1)) 93 | #--------------------------------------------------------------------------------------------------# 94 | 95 | 96 | #--------------------------------------------------------------------------------------------------# 97 | ### Plot trait data 98 | print("**** Plotting Ecosis trait data. Writing to scratch space ****") 99 | 100 | # Organize leaf trait data 101 | trait_data <- sample_info %>% 102 | select(Site,Sample_ID,USDA_Species_Code=`USDA Symbol`, 103 | Measurement_Date=`Sample Collection Date`,Cmass_g_g,Nmass_g_g,CN_Ratio, 104 | LMA_g_m2) 105 | head(trait_data) 106 | 107 | # Prepare data for ggplot 108 | trait_data <- reshape2::melt(data = trait_data, id.vars = "USDA_Species_Code", measure.vars = c("LMA_g_m2", 109 | "Cmass_g_g", 110 | "Nmass_g_g", 111 | "CN_Ratio")) 112 | head(trait_data) 113 | 114 | # Graph the trait data and save a file to the scratch space 115 | p2 <- ggplot(trait_data, aes(x=USDA_Species_Code, y=value)) + 116 | geom_boxplot() + 117 | facet_wrap(~variable, scale="free") 118 | print(p2) 119 | ggsave(filename = file.path(outdir,"NGEE-Arctic_2016_Kougarok_Trait_data.png"), plot = p2, 120 | width = 40, height = 20, units = "cm") 121 | #--------------------------------------------------------------------------------------------------# 122 | 123 | ### EOF -------------------------------------------------------------------------------- /R/pls_permutation.R: -------------------------------------------------------------------------------- 1 | ##' Run a PLSR model permutation analysis. Can be used to determine the optimal number of components 2 | ##' or conduct a boostrap uncertainty analysis 3 | ##' 4 | ##' See Serbin et al. (2019). DOI: https://doi.org/10.1111/nph.16123 5 | ##' 6 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset 7 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 8 | ##' Usually the "inVar" variable set at the beginning of a PLS script 9 | ##' @param maxComps maximum number of components to use for each PLSR fit 10 | ##' @param iterations how many different permutations to run 11 | ##' @param prop proportion of data to preserve for each permutation 12 | ##' @param verbose Should the function report the current iteration status/progress to the terminal 13 | ##' or run silently? TRUE/FALSE. Default FALSE 14 | ##' @return output a list containing the PRESS and coef_array. 15 | ##' output <- list(PRESS=press.out, coef_array=coefs) 16 | ##' 17 | ##' @importFrom pls plsr 18 | ##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar 19 | ##' 20 | ##' @author Julien Lamour, Shawn P. Serbin 21 | ##' @export 22 | pls_permutation <- function(dataset=NULL, targetVariable=NULL, maxComps=20, iterations=20, 23 | prop=0.70, verbose=FALSE) { 24 | inVar <- targetVariable 25 | coefs <- array(0,dim=c((ncol(dataset$Spectra)+1),iterations,maxComps)) 26 | press.out <- array(data=NA, dim=c(iterations,maxComps)) 27 | print("*** Running permutation test. Please hang tight, this can take awhile ***") 28 | print("Options:") 29 | print(paste("Max Components:",maxComps, "Iterations:", iterations, 30 | "Data Proportion (percent):", prop*100, sep=" ")) 31 | 32 | if (verbose) { 33 | j <- 1 34 | pb <- txtProgressBar(min = 0, max = iterations, 35 | char="*",width=70,style = 3) 36 | } 37 | 38 | for (i in seq_along(1:iterations)) { 39 | rows <- sample(1:nrow(dataset),floor(prop*nrow(dataset))) 40 | sub.data <- dataset[rows,] 41 | val.sub.data <- dataset[-rows,] 42 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, 43 | ncomp=maxComps, validation="none", data=sub.data) 44 | pred_val <- predict(plsr.out,newdata=val.sub.data) 45 | sq_resid <- (pred_val[,,]-val.sub.data[,inVar])^2 46 | press <- apply(X = sq_resid, MARGIN = 2, FUN = sum) 47 | press.out[i,] <- press 48 | coefs[,i,] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps) 49 | rm(rows,sub.data,val.sub.data,plsr.out,pred_val,sq_resid,press) 50 | 51 | ### Display progress to console 52 | if (verbose) { 53 | setTxtProgressBar(pb, j) 54 | j <- j+1 55 | flush.console() 56 | } 57 | } 58 | if (verbose) { 59 | close(pb) 60 | } 61 | 62 | # create a new list with PRESS and permuted coefficients x wavelength x component number 63 | print("*** Providing PRESS and coefficient array output ***") 64 | output <- list(PRESS=press.out, coef_array=coefs) 65 | return(output) 66 | } 67 | 68 | 69 | ##' Run a PLSR model permutation analysis stratified by selected "groups". Can be used to 70 | ##' determine the optimal number of components or conduct a boostrap uncertainty analysis 71 | ##' 72 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset 73 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 74 | ##' Usually the "inVar" variable set at the beginning of a PLS script 75 | ##' @param maxComps maximum number of components to use for each PLSR fit 76 | ##' @param iterations how many different permutations to run 77 | ##' @param prop proportion of data to preserve for each permutation 78 | ##' @param verbose Should the function report the current iteration status/progress to the terminal 79 | ##' or run silently? TRUE/FALSE. Default FALSE 80 | ##' @param group_variables Character vector of the form c("var1", "var2"..."varn") 81 | ##' providing the factors used for stratified sampling in the PLSR permutation analysis 82 | ##' 83 | ##' @return output a list containing the PRESS and coef_array. 84 | ##' output <- list(PRESS=press.out, coef_array=coefs) 85 | ##' 86 | ##' @importFrom magrittr %>% 87 | ##' @importFrom dplyr mutate group_by_at slice n row_number 88 | ##' @importFrom pls plsr 89 | ##' @importFrom utils flush.console read.table setTxtProgressBar txtProgressBar 90 | ##' 91 | ##' @author asierrl, Shawn P. Serbin, Julien Lamour 92 | ##' @export 93 | ##' 94 | pls_permutation_by_groups <- function (dataset = NULL, targetVariable=NULL, maxComps = 20, 95 | iterations = 20, prop = 0.7, group_variables=NULL, 96 | verbose = FALSE) { 97 | inVar <- targetVariable 98 | coefs <- array(0, dim = c((ncol(dataset$Spectra) + 1), iterations, maxComps)) 99 | press.out <- array(data = NA, dim = c(iterations, maxComps)) 100 | print("*** Running permutation test. Please hang tight, this can take awhile ***") 101 | print("Options:") 102 | print(paste("Max Components:", maxComps, "Iterations:", iterations, 103 | "Data Proportion (percent):", prop * 100, sep = " ")) 104 | if (verbose) { 105 | j <- 1 106 | pb <- utils::txtProgressBar(min = 0, max = iterations, 107 | char = "*", width = 70, style = 3) 108 | } 109 | for (i in seq_along(1:iterations)) { 110 | if (!is.null(group_variables)) { 111 | trainset <- dataset %>% 112 | mutate(int_id=row_number()) %>% 113 | group_by_at(group_variables) %>% 114 | slice(sample(1:n(), prop * n())) 115 | rows <- trainset$int_id 116 | } else { 117 | rows <- sample(1:nrow(dataset), floor(prop * nrow(dataset))) 118 | } 119 | sub.data <- dataset[rows, ] 120 | val.sub.data <- dataset[-rows, ] 121 | plsr.out <- plsr(as.formula(paste(inVar, "~", "Spectra")), 122 | scale = FALSE, center = TRUE, ncomp = maxComps, 123 | validation = "none", 124 | data = sub.data) 125 | pred_val <- predict(plsr.out, newdata = val.sub.data) 126 | sq_resid <- (pred_val[, , ] - val.sub.data[, inVar])^2 127 | press <- apply(X = sq_resid, MARGIN = 2, FUN = sum) 128 | press.out[i, ] <- press 129 | coefs[, i, ] <- coef(plsr.out, intercept = TRUE, ncomp = 1:maxComps) 130 | rm(rows, sub.data, val.sub.data, plsr.out, pred_val, sq_resid, press) 131 | if (verbose) { 132 | setTxtProgressBar(pb, j) 133 | j <- j + 1 134 | flush.console() 135 | } 136 | } 137 | if (verbose) { 138 | close(pb) 139 | } 140 | # create a new list with PRESS and permuted coefficients x wavelength x component number 141 | print("*** Providing PRESS and coefficient array output ***") 142 | output <- list(PRESS = press.out, coef_array = coefs) 143 | return(output) 144 | } -------------------------------------------------------------------------------- /inst/scripts/apply_sserbin2019_lma_plsr_to_ely_example.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # 3 | # 4 | # Notes: 5 | # * The author notes the code is not the most elegant or clean, but is functional 6 | # * Questions, comments, or concerns can be sent to sserbin@bnl.gov 7 | # * Code is provided under GNU General Public License v3.0 8 | # 9 | #################################################################################################### 10 | 11 | 12 | #--------------------------------------------------------------------------------------------------# 13 | ### Load libraries 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 15 | "spectratrait") 16 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 17 | #--------------------------------------------------------------------------------------------------# 18 | 19 | 20 | #--------------------------------------------------------------------------------------------------# 21 | ### Setup options 22 | 23 | # Default par options 24 | opar <- par(no.readonly = T) 25 | 26 | # Specify output directory, output_dir 27 | # Options: 28 | # tempdir - use a OS-specified temporary directory 29 | # user defined PATH - e.g. "~/scratch/PLSR" 30 | output_dir <- "tempdir" 31 | #--------------------------------------------------------------------------------------------------# 32 | 33 | 34 | #--------------------------------------------------------------------------------------------------# 35 | ### Load Ely et al 2019 dataset 36 | data("ely_plsr_data") 37 | head(ely_plsr_data)[,1:8] 38 | 39 | # What is the target variable? 40 | inVar <- "LMA_g_m2" 41 | #--------------------------------------------------------------------------------------------------# 42 | 43 | 44 | #--------------------------------------------------------------------------------------------------# 45 | ### Set working directory 46 | if (output_dir=="tempdir") { 47 | outdir <- tempdir() 48 | } else { 49 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 50 | outdir <- file.path(path.expand(output_dir)) 51 | } 52 | setwd(outdir) # set working directory 53 | getwd() # check wd 54 | #--------------------------------------------------------------------------------------------------# 55 | 56 | 57 | #--------------------------------------------------------------------------------------------------# 58 | ### PLSR Coefficients - Grab from GitHub 59 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/" 60 | print("**** Downloading PLSR coefficients ****") 61 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv") 62 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL) 63 | rm(githubURL) 64 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv") 65 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL) 66 | rm(githubURL) 67 | #--------------------------------------------------------------------------------------------------# 68 | 69 | 70 | #--------------------------------------------------------------------------------------------------# 71 | ### Ely et al spectral and trait data 72 | Start.wave <- 500 73 | End.wave <- 2400 74 | wv <- seq(Start.wave,End.wave,1) 75 | plsr_data <- ely_plsr_data 76 | #--------------------------------------------------------------------------------------------------# 77 | 78 | 79 | #--------------------------------------------------------------------------------------------------# 80 | #### Example data cleaning. End user needs to do what's appropriate for their 81 | #### data. This may be an iterative process. 82 | # Keep only complete rows of inVar and spec data before fitting 83 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 84 | c(inVar,paste0("Wave_",wv))]),] 85 | #--------------------------------------------------------------------------------------------------# 86 | 87 | 88 | #--------------------------------------------------------------------------------------------------# 89 | print("**** Applying PLSR model to estimate LMA from spectral observations ****") 90 | # setup model 91 | dims <- dim(LeafLMA.plsr.coeffs) 92 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,] 93 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],]) 94 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs") 95 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2]) 96 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 97 | paste0("Wave_",seq(Start.wave,End.wave,1)))]) 98 | sub_spec <- sub_spec*0.01 # convert to 0-1 99 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2] 100 | leafLMA <- plsr_pred[,1]^2 # convert to standard LMA units from sqrt(LMA) 101 | names(leafLMA) <- "PLSR_LMA_gDW_m2" 102 | 103 | # organize output 104 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 105 | paste0("Wave_",seq(Start.wave,End.wave,1)))], 106 | PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar]) 107 | head(LeafLMA.PLSR.dataset) 108 | #--------------------------------------------------------------------------------------------------# 109 | 110 | 111 | #--------------------------------------------------------------------------------------------------# 112 | print("**** Generate PLSR uncertainty estimates ****") 113 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]]) 114 | jk_coef <- t(jk_coef) 115 | head(jk_coef)[,1:6] 116 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2]) 117 | head(jk_int)[,1:6] 118 | 119 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 120 | byrow=TRUE, ncol=length(jk_int)) 121 | jk_pred <- jk_pred^2 122 | head(jk_pred)[,1:6] 123 | dim(jk_pred) 124 | interval <- c(0.025,0.975) 125 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 126 | probs=c(interval[1], interval[2])) 127 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd) 128 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals) 129 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 130 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,] 131 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,] 132 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot 133 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot 134 | head(LeafLMA.PLSR.dataset) 135 | #--------------------------------------------------------------------------------------------------# 136 | 137 | 138 | #--------------------------------------------------------------------------------------------------# 139 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 140 | inVar = inVar, 141 | residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 142 | range="full") 143 | RMSEP <- rmsep_percrmsep$rmse 144 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 145 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~ 146 | LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2) 147 | expr <- vector("expression", 3) 148 | expr[[1]] <- bquote(R^2==.(r2)) 149 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 150 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 151 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI)) 152 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) 153 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 154 | li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 155 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 156 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 157 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 158 | ylab=paste0("Observed ", paste(inVar), " (units)"), 159 | cex.axis=1.5,cex.lab=1.8) 160 | abline(0,1,lty=2,lw=2) 161 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 162 | li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 163 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 164 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 165 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 166 | ylab=paste0("Observed ", paste(inVar), " (units)"), 167 | cex.axis=1.5,cex.lab=1.8, add=T) 168 | legend("topleft", legend=expr, bty="n", cex=1.5) 169 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 170 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 171 | box(lwd=2.2) 172 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 173 | height=2800, width=3200, res=340) 174 | dev.off(); 175 | #--------------------------------------------------------------------------------------------------# 176 | 177 | #--------------------------------------------------------------------------------------------------# 178 | ### EOF -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PLSR modeling for the estimation of plant functional traits 2 | This repository contains source code and example scripts to illustrate best-practices for fitting, evaluating, and reporting spectra-trait PLSR models. This code and example scripts encompass several possibilities that you may encounter when carrying out PLSR model fitting. Start by reading *Burnett et al. (2021)*, then work through the scripts or vignettes we have provided here to get experience in developing models for estimating plant functional traits using spectral measurements! You can also explore examples available in the [Wiki](https://github.com/plantphys/spectratrait/wiki) pages 3 | 4 | ### Article citation: 5 | Burnett AC, Anderson J, Davidson KD, Ely KS, Lamour J, Li Q, Morrison BD, Yang D, Rogers A, Serbin SP (2021) A best-practice guide to predicting plant traits from leaf-level hyperspectral data using partial least squares regression. Journal of Experimental Botany. https://doi.org/10.1093/jxb/erab295 6 | 7 | ### Source code citation: 8 | [![DOI](https://zenodo.org/badge/222699149.svg)](https://zenodo.org/doi/10.5281/zenodo.4330119) 9 | 10 | ### EcoSML 11 | https://ecosml.org/package/github/TESTgroup-BNL/spectratrait 12 | 13 | ### Getting started, tips and tricks: 14 | * If you are new to R you should start by reading https://support.rstudio.com/hc/en-us/articles/201141096-Getting-Started-with-R & https://www.dataquest.io/blog/tutorial-getting-started-with-r-and-rstudio/ 15 | * Software requirements: R software (version 4.0 or above) and preferred operating environment (e.g. RStudio). 16 | * Install package dependencies and the spectratrait package: See the Depends and INSTALL sections below 17 | * To work with the repository locally, clone the repository to your local machine (https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/cloning-a-repository). Once you have the repository on your local machine you can run the scripts in inst/scripts or vignettes folders. You can also start editing the code yourself or contributing to the development of the package through new pull requests (https://guides.github.com/activities/hello-world/) 18 | * Or if you don't want to obtain the code via cloning the repository, you can click the green "Code" button at the top of this page and select "Download ZIP". Extract the contents of the ZIP in your preferred location. Use RStudio to open your selected example script from the inst/scripts folder and then run or source the code. 19 | 20 | ### Depends: 21 | ggplot2 (>= 3.3.2), remotes (>= 2.2.0), devtools (>= 2.3.1), readr (>= 1.3.1), RCurl (>= 1.98-1.2), 22 | httr (>= 1.4.2), pls (>= 2.7-2), magrittr (>= 2.0.1), dplyr (>= 1.0.1), reshape2 (>= 1.4.4), here (>= 0.1), 23 | plotrix (>= 3.7-8), gridExtra (>= 2.3), scales (>= 1.1.1), knitr (>= 1.4.2) 24 | 25 | ### INSTALL 26 | spectratrait is not currently on CRAN, but you can install from GitHub using devtools(). First, make sure you have all of the package dependencies installed. You can do this either by 1) installing the packages individually using install.packages(), for example: 27 | 28 | ``` r 29 | install.packages("pls") 30 | install.packages("ggplot2") 31 | ... 32 | ``` 33 | 34 | and so forth until all of the dependencies (listed above in the "Depends" section) are installed. **Note** - you should pay careful attention at this stage to any R messages in your terminal alerting you that you need to update existing or install new R packages. These messages usually show up after you attempt to run install.packages() and require you 35 | to respond in your terminal to a y/n or multiple choice question before the install can continue. 36 | 37 | Or 2) you can also run or source the "install_dependencies.R" script located in inst/scripts which should also install all of the required dependencies. **Note** - again you will need to watch for any R prompts to update packages in order for the install to proceed correctly. 38 | 39 | Finally, to complete the installation you will also need to install the spectratrait package itself. You can do this by copying and pasting the command below into your R or RStudio (preferred) terminal. 40 | 41 | #### To install the main (default) branch version 42 | ``` r 43 | devtools::install_github(repo = "plantphys/spectratrait", dependencies=TRUE) 44 | ``` 45 |
46 | 47 | #### To install the main branch version - with Vignettes (though slower) 48 | ``` r 49 | devtools::install_github(repo = "plantphys/spectratrait", dependencies=TRUE, build_vignettes = TRUE) 50 | ``` 51 |
52 | 53 | #### To install a specific release, for example release 1.0.5 54 | ``` r 55 | devtools::install_github(repo = "plantphys/spectratrait@v1.0.5", dependencies=TRUE) 56 | ``` 57 | 58 |
59 | 60 | #### Or install a specific branch, e.g. a branch named devbranch 61 | ``` r 62 | devtools::install_github(repo = "plantphys/spectratrait", ref = "devbranch", dependencies=TRUE) 63 | ``` 64 | 65 |
66 | 67 | ## Contains: 68 | 1. Core package functions are located in the in the main "R" folder 69 | 2. inst/scripts contains example PLSR workflows for fitting example leaf and canopy spectra-trait PLSR models for different leaf traits, including LMA and foliar nitrogen 70 | 3. Example datasets that can be loaded in your R environment using the base load() function can be found in the data/ folder 71 | 4. man - the manual pages that are accessible in R 72 | 5. tests - package tests to check that functions are still operational and produce the expected results 73 | 6. vignettes - example Rmarkdown and github markdown vignettes illustrating the various PLSR model fitting examples. These can be used to learn how to use the PLSR workflow and associated functions for new applications 74 | 7. spectratrait_X.X.X.pdf (where X.X.X is the current release number) is the pdf documentation 75 | 76 | ### Linked dataset citations, DOIs, and EcoSIS IDs/URLs:
77 | 1) Leaf spectra, structural and biochemical leaf traits of eight crop species (Ely et al., 2019)
78 | EcoSIS URL: https://ecosis.org/package/leaf-spectra--structural-and-biochemical-leaf-traits-of-eight-crop-species
79 | EcoSIS ID: 25770ad9-d47c-428b-bf99-d1543a4b0ec9
80 | DOI: https://doi.org/doi:10.21232/C2GM2Z
81 | Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/spectratrait_ex1
82 | Rpubs LeafN bootstrap by group (species) example output: https://rpubs.com/sserbin/spectratrait_ex2
83 | 84 | 2) Leaf reflectance plant functional gradient IFGG/KIT
85 | Target variable: SLA
86 | EcoSIS URL: https://ecosis.org/package/leaf-reflectance-plant-functional-gradient-ifgg-kit
87 | EcoSIS ID: 3cf6b27e-d80e-4bc7-b214-c95506e46daa
88 | Rpubs example output: https://rpubs.com/sserbin/spectratrait_ex3
89 | 90 | 3) Fresh leaf spectra to estimate LMA over NEON domains in eastern United States
91 | Target variable: LMA
92 | EcoSIS URL: https://ecosis.org/package/fresh-leaf-spectra-to-estimate-lma-over-neon-domains-in-eastern-united-states
93 | EcoSIS ID: 5617da17-c925-49fb-b395-45a51291bd2d
94 | DOI: https://doi.org/doi:10.21232/9831-rq60
95 | Rpubs example output: https://rpubs.com/sserbin/spectratrait_ex4
96 | Rpubs example showing Serbin et al. (2019) applied to NEON data: https://rpubs.com/sserbin/spectratrait_ex9
97 | 98 | 4) Canopy spectra to map foliar functional traits over NEON domains in eastern United States
99 | Target variable: leaf nitrogen
100 | EcoSIS URL: https://ecosis.org/package/canopy-spectra-to-map-foliar-functional-traits-over-neon-domains-in-eastern-united-states
101 | EcoSIS ID: b9dbf3db-5b9c-4ab2-88c2-26c8b39d0903
102 | DOI: https://doi.org/doi:10.21232/e2jt-5209
103 | Rpubs leaf nitrogen example output: https://rpubs.com/sserbin/spectratrait_ex5
104 | 105 | 5) Leaf spectra of 36 species growing in Rosa rugosa invaded coastal grassland communities in Belgium
106 | Target variable: LMA, leaf nitrogen
107 | EcoSIS URL: https://ecosis.org/package/leaf-spectra-of-36-species-growing-in-rosa-rugosa-invaded-coastal-grassland-communities-in-belgium
108 | EcoSIS ID: 9db4c5a2-7eac-4e1e-8859-009233648e89
109 | DOI: https://doi.org/doi:10.21232/9nr6-sq54
110 | Rpubs LeafN example output: https://rpubs.com/sserbin/spectratrait_ex6
111 | Rpubs LeafN bootstrap example output: https://rpubs.com/sserbin/spectratrait_ex7
112 | Rpubs LMA example output: https://rpubs.com/sserbin/spectratrait_ex8
113 | 114 | ## Build status 115 | Auto-run PLSR example: 116 | [![.github/workflows/run_plsr_example_auto.yaml](https://github.com/plantphys/spectratrait/actions/workflows/run_plsr_example_auto.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/run_plsr_example_auto.yaml)
117 | CI run PLSR example: 118 | [![ci-run_PLSR_example](https://github.com/plantphys/spectratrait/actions/workflows/ci-run_plsr_example.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/ci-run_plsr_example.yaml)
119 | CI OS and R Release Checks: 120 | [![R-CMD-check-OS-R](https://github.com/plantphys/spectratrait/actions/workflows/check-os.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/check-os.yaml)
121 | Weekly CI Checks: 122 | [![R-CMD-check-Weekly](https://github.com/plantphys/spectratrait/actions/workflows/ci-weekly.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/ci-weekly.yaml)
123 | EcoSIS API Check: 124 | [![run_ecosis_pull_example](https://github.com/plantphys/spectratrait/actions/workflows/run_ecosis_pull_example.yaml/badge.svg?branch=main)](https://github.com/plantphys/spectratrait/actions/workflows/run_ecosis_pull_example.yaml) 125 | -------------------------------------------------------------------------------- /vignettes/sserbin2019_plsr_ex9.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: An example showing how to apply an existing PLSR model to new data. In this case applying the LMA model from Serbin et al., (2019; DOI - 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites 3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" 4 | date: "`r Sys.Date()`" 5 | output: 6 | github_document: default 7 | html_notebook: default 8 | pdf_document: default 9 | html_document: 10 | df_print: paged 11 | keep_md: true 12 | rmarkdown: html_vignette 13 | vignette: > 14 | %\VignetteIndexEntry{An example showing how to apply an existing PLSR model to new data. In this case applying the LMA model from Serbin et al., (2019; DOI - 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites} 15 | %\usepackage[utf8]{inputenc} 16 | %\VignetteEngine{knitr::knitr} 17 | --- 18 | 19 | ```{r setup, include=FALSE, echo=FALSE} 20 | knitr::opts_chunk$set(echo = TRUE) 21 | ``` 22 | 23 | ### Getting Started 24 | ### Load libraries 25 | ```{r, eval=TRUE, echo=TRUE} 26 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 27 | "spectratrait") 28 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 29 | ``` 30 | 31 | ### Setup other functions and options 32 | ```{r, echo=TRUE} 33 | ### Setup options 34 | 35 | # Script options 36 | pls::pls.options(plsralg = "oscorespls") 37 | pls::pls.options("plsralg") 38 | 39 | # Default par options 40 | opar <- par(no.readonly = T) 41 | 42 | # What is the target variable? 43 | inVar <- "LMA_gDW_m2" 44 | 45 | # What is the source dataset from EcoSIS? 46 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d" 47 | 48 | # Specify output directory, output_dir 49 | # Options: 50 | # tempdir - use a OS-specified temporary directory 51 | # user defined PATH - e.g. "~/scratch/PLSR" 52 | output_dir <- "tempdir" 53 | ``` 54 | 55 | ### Set working directory (scratch space) 56 | ```{r, echo=FALSE} 57 | if (output_dir=="tempdir") { 58 | outdir <- tempdir() 59 | } else { 60 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 61 | outdir <- file.path(path.expand(output_dir)) 62 | } 63 | setwd(outdir) # set working directory 64 | getwd() # check wd 65 | ``` 66 | 67 | ### Grab PLSR Coefficients from GitHub 68 | ```{r, echo=TRUE} 69 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/" 70 | print("**** Downloading PLSR coefficients ****") 71 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv") 72 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL) 73 | rm(githubURL) 74 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv") 75 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL) 76 | rm(githubURL) 77 | ``` 78 | 79 | ### Get source dataset from EcoSIS 80 | ```{r, echo=TRUE} 81 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 82 | head(dat_raw) 83 | names(dat_raw)[1:40] 84 | ``` 85 | 86 | ### Prepare new data for estimation 87 | ```{r, echo=TRUE} 88 | Start.wave <- 500 89 | End.wave <- 2400 90 | wv <- seq(Start.wave,End.wave,1) 91 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 92 | colnames(Spectra) <- c(paste0("Wave_",wv)) 93 | head(Spectra)[1:6,1:10] 94 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 95 | head(sample_info) 96 | 97 | sample_info2 <- sample_info %>% 98 | select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA) 99 | head(sample_info2) 100 | 101 | plsr_data <- data.frame(sample_info2,Spectra) 102 | rm(sample_info,sample_info2,Spectra) 103 | ``` 104 | 105 | #### Example data cleaning. 106 | ```{r, echo=TRUE} 107 | #### End user needs to do what's appropriate for their data. This may be an iterative process. 108 | # Keep only complete rows of inVar and spec data before fitting 109 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 110 | c(inVar,paste0("Wave_",wv))]),] 111 | ``` 112 | 113 | #### Prepare PLSR model 114 | ```{r, echo=TRUE} 115 | print("**** Applying PLSR model to estimate LMA from spectral observations ****") 116 | # setup model 117 | dims <- dim(LeafLMA.plsr.coeffs) 118 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,] 119 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],]) 120 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs") 121 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2]) 122 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 123 | paste0("Wave_",seq(Start.wave,End.wave,1)))]) 124 | ``` 125 | 126 | #### Apply PLSR model 127 | ```{r, echo=TRUE} 128 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2] 129 | leafLMA <- plsr_pred[,1]^2 # convert to standard LMA units from sqrt(LMA) 130 | names(leafLMA) <- "PLSR_LMA_gDW_m2" 131 | 132 | # organize output 133 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 134 | paste0("Wave_",seq(Start.wave,End.wave,1)))], 135 | PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar]) 136 | head(LeafLMA.PLSR.dataset) 137 | ``` 138 | 139 | #### Generate PLSR uncertainty estimates 140 | ```{r, echo=TRUE} 141 | print("**** Generate PLSR uncertainty estimates ****") 142 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]]) 143 | jk_coef <- t(jk_coef) 144 | head(jk_coef)[,1:6] 145 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2]) 146 | head(jk_int)[,1:6] 147 | 148 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 149 | byrow=TRUE, ncol=length(jk_int)) 150 | jk_pred <- jk_pred^2 151 | head(jk_pred)[,1:6] 152 | dim(jk_pred) 153 | interval <- c(0.025,0.975) 154 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 155 | probs=c(interval[1], interval[2])) 156 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd) 157 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals) 158 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 159 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,] 160 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,] 161 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot 162 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot 163 | head(LeafLMA.PLSR.dataset) 164 | ``` 165 | 166 | #### Generate PLSR estimated LMA observed vs predicted plot 167 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE} 168 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 169 | inVar = inVar, 170 | residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 171 | range="full") 172 | RMSEP <- rmsep_percrmsep$rmse 173 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 174 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~ 175 | LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2) 176 | expr <- vector("expression", 3) 177 | expr[[1]] <- bquote(R^2==.(r2)) 178 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 179 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 180 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI)) 181 | par(mfrow=c(1,1), mar=c(4,5.3,1,0.4), oma=c(0.1, 0.1, 0.1, 0.2)) 182 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 183 | li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 184 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 185 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 186 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 187 | ylab=paste0("Observed ", paste(inVar), " (units)"), 188 | cex.axis=1.5,cex.lab=1.8) 189 | abline(0,1,lty=2,lw=2) 190 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 191 | li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 192 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 193 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 194 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 195 | ylab=paste0("Observed ", paste(inVar), " (units)"), 196 | cex.axis=1.5,cex.lab=1.8, add=T) 197 | legend("topleft", legend=expr, bty="n", cex=1.5) 198 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 199 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 200 | box(lwd=2.2) 201 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 202 | height=2800, width=3200, res=340) 203 | dev.off(); 204 | ``` 205 | ```{r, echo=TRUE} 206 | print(paste("Output directory: ", outdir)) 207 | 208 | # Observed versus predicted 209 | write.csv(LeafLMA.PLSR.dataset,file=file.path(outdir, 210 | paste0(inVar,'_PLSR_Estimates.csv')), 211 | row.names=FALSE) 212 | ``` 213 | 214 | ### Confirm files were written to temp space 215 | ```{r, echo=TRUE} 216 | print("**** PLSR output files: ") 217 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) 218 | ``` 219 | -------------------------------------------------------------------------------- /inst/scripts/apply_sserbin2019_lma_plsr_to_neon_example.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # 3 | # 4 | # Notes: 5 | # * The author notes the code is not the most elegant or clean, but is functional 6 | # * Questions, comments, or concerns can be sent to sserbin@bnl.gov 7 | # * Code is provided under GNU General Public License v3.0 8 | # 9 | #################################################################################################### 10 | 11 | 12 | #--------------------------------------------------------------------------------------------------# 13 | ### Load libraries 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 15 | "spectratrait") 16 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 17 | #--------------------------------------------------------------------------------------------------# 18 | 19 | 20 | #--------------------------------------------------------------------------------------------------# 21 | ### Setup options 22 | 23 | # Default par options 24 | opar <- par(no.readonly = T) 25 | 26 | # What is the target variable? 27 | inVar <- "LMA_gDW_m2" 28 | 29 | # What is the source dataset from EcoSIS? 30 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d" 31 | 32 | # Specify output directory, output_dir 33 | # Options: 34 | # tempdir - use a OS-specified temporary directory 35 | # user defined PATH - e.g. "~/scratch/PLSR" 36 | output_dir <- "tempdir" 37 | #--------------------------------------------------------------------------------------------------# 38 | 39 | 40 | #--------------------------------------------------------------------------------------------------# 41 | ### Set working directory 42 | if (output_dir=="tempdir") { 43 | outdir <- tempdir() 44 | } else { 45 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 46 | outdir <- file.path(path.expand(output_dir)) 47 | } 48 | setwd(outdir) # set working directory 49 | getwd() # check wd 50 | #--------------------------------------------------------------------------------------------------# 51 | 52 | 53 | #--------------------------------------------------------------------------------------------------# 54 | ### PLSR Coefficients - Grab from GitHub 55 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/" 56 | print("**** Downloading PLSR coefficients ****") 57 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv") 58 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL) 59 | rm(githubURL) 60 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv") 61 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL) 62 | rm(githubURL) 63 | #--------------------------------------------------------------------------------------------------# 64 | 65 | 66 | #--------------------------------------------------------------------------------------------------# 67 | ### Get source dataset from EcoSIS 68 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 69 | head(dat_raw) 70 | names(dat_raw)[1:40] 71 | #--------------------------------------------------------------------------------------------------# 72 | 73 | 74 | #--------------------------------------------------------------------------------------------------# 75 | ### Prepare new data for estimation 76 | Start.wave <- 500 77 | End.wave <- 2400 78 | wv <- seq(Start.wave,End.wave,1) 79 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 80 | colnames(Spectra) <- c(paste0("Wave_",wv)) 81 | head(Spectra)[1:6,1:10] 82 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 83 | head(sample_info) 84 | 85 | sample_info2 <- sample_info %>% 86 | select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA) 87 | head(sample_info2) 88 | 89 | plsr_data <- data.frame(sample_info2,Spectra) 90 | rm(sample_info,sample_info2,Spectra) 91 | #--------------------------------------------------------------------------------------------------# 92 | 93 | 94 | #--------------------------------------------------------------------------------------------------# 95 | #### Example data cleaning. End user needs to do what's appropriate for their 96 | #### data. This may be an iterative process. 97 | # Keep only complete rows of inVar and spec data before fitting 98 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 99 | c(inVar,paste0("Wave_",wv))]),] 100 | #--------------------------------------------------------------------------------------------------# 101 | 102 | 103 | #--------------------------------------------------------------------------------------------------# 104 | print("**** Applying PLSR model to estimate LMA from spectral observations ****") 105 | # setup model 106 | dims <- dim(LeafLMA.plsr.coeffs) 107 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,] 108 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],]) 109 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs") 110 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2]) 111 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 112 | paste0("Wave_",seq(Start.wave,End.wave,1)))]) 113 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2] 114 | leafLMA <- plsr_pred[,1]^2 # convert to standard LMA units from sqrt(LMA) 115 | names(leafLMA) <- "PLSR_LMA_gDW_m2" 116 | 117 | # organize output 118 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 119 | paste0("Wave_",seq(Start.wave,End.wave,1)))], 120 | PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar]) 121 | head(LeafLMA.PLSR.dataset) 122 | #--------------------------------------------------------------------------------------------------# 123 | 124 | 125 | #--------------------------------------------------------------------------------------------------# 126 | print("**** Generate PLSR uncertainty estimates ****") 127 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]]) 128 | jk_coef <- t(jk_coef) 129 | head(jk_coef)[,1:6] 130 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2]) 131 | head(jk_int)[,1:6] 132 | 133 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 134 | byrow=TRUE, ncol=length(jk_int)) 135 | jk_pred <- jk_pred^2 136 | head(jk_pred)[,1:6] 137 | dim(jk_pred) 138 | interval <- c(0.025,0.975) 139 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 140 | probs=c(interval[1], interval[2])) 141 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd) 142 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals) 143 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 144 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,] 145 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,] 146 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot 147 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot 148 | head(LeafLMA.PLSR.dataset) 149 | #--------------------------------------------------------------------------------------------------# 150 | 151 | 152 | #--------------------------------------------------------------------------------------------------# 153 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 154 | inVar = inVar, 155 | residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 156 | range="full") 157 | RMSEP <- rmsep_percrmsep$rmse 158 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 159 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~ 160 | LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2) 161 | expr <- vector("expression", 3) 162 | expr[[1]] <- bquote(R^2==.(r2)) 163 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 164 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 165 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI)) 166 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) 167 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 168 | li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 169 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 170 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 171 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 172 | ylab=paste0("Observed ", paste(inVar), " (units)"), 173 | cex.axis=1.5,cex.lab=1.8) 174 | abline(0,1,lty=2,lw=2) 175 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 176 | li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 177 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 178 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 179 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 180 | ylab=paste0("Observed ", paste(inVar), " (units)"), 181 | cex.axis=1.5,cex.lab=1.8, add=T) 182 | legend("topleft", legend=expr, bty="n", cex=1.5) 183 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 184 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 185 | box(lwd=2.2) 186 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 187 | height=2800, width=3200, res=340) 188 | dev.off(); 189 | #--------------------------------------------------------------------------------------------------# 190 | 191 | 192 | #--------------------------------------------------------------------------------------------------# 193 | ### EOF -------------------------------------------------------------------------------- /inst/scripts/apply_sserbin2014_leafN_plsr_to_ext_data_example.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # 3 | # 4 | # Notes: 5 | # * The author notes the code is not the most elegant or clean, but is functional 6 | # * Questions, comments, or concerns can be sent to sserbin@bnl.gov 7 | # * Code is provided under GNU General Public License v3.0 8 | # 9 | #################################################################################################### 10 | 11 | 12 | #--------------------------------------------------------------------------------------------------# 13 | ### Load libraries 14 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 15 | "spectratrait") 16 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 17 | #--------------------------------------------------------------------------------------------------# 18 | 19 | 20 | #--------------------------------------------------------------------------------------------------# 21 | ### Setup options 22 | 23 | # Default par options 24 | opar <- par(no.readonly = T) 25 | 26 | # Specify output directory, output_dir 27 | # Options: 28 | # tempdir - use a OS-specified temporary directory 29 | # user defined PATH - e.g. "~/scratch/PLSR" 30 | output_dir <- "tempdir" 31 | #--------------------------------------------------------------------------------------------------# 32 | 33 | 34 | #--------------------------------------------------------------------------------------------------# 35 | ### Set ecosis dataset 36 | # https://ecosis.org/package/fresh-leaf-spectra-to-estimate-leaf-traits-for-california-ecosystems 37 | # title: Fresh Leaf Spectra to Estimate Leaf Traits for California Ecosystems 38 | 39 | # What is the target variable? 40 | inVar <- "Nmass_g_g" # unclear if N in this dataset is Nmass or Narea. Assuming Nmass 41 | 42 | # What is the source dataset from EcoSIS? 43 | ecosis_id <- "0fadcc45-f79e-4fd3-a6ca-8afaf26ae299" 44 | #--------------------------------------------------------------------------------------------------# 45 | 46 | 47 | #--------------------------------------------------------------------------------------------------# 48 | ### Set working directory 49 | if (output_dir=="tempdir") { 50 | outdir <- tempdir() 51 | } else { 52 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 53 | outdir <- file.path(path.expand(output_dir)) 54 | } 55 | setwd(outdir) # set working directory 56 | getwd() # check wd 57 | #--------------------------------------------------------------------------------------------------# 58 | 59 | 60 | #--------------------------------------------------------------------------------------------------# 61 | ### Grab ecosis data 62 | print(paste0("Output directory: ",getwd())) # check wd 63 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 64 | #--------------------------------------------------------------------------------------------------# 65 | 66 | 67 | #--------------------------------------------------------------------------------------------------# 68 | ### PLSR Coefficients - Grab from GitHub 69 | git_repo <- "https://raw.githubusercontent.com/serbinsh/NASA_FFT_Leaf_Spectra-Trait_Models/refs/heads/master/" 70 | print("**** Downloading PLSR coefficients ****") 71 | githubURL <- paste0(git_repo,"PLSR_model_coefficients/leaf_Nitrogen/FFT_Leaf_Nitrogen_PLSR_Coefficients_11comp.csv") 72 | LeafN.plsr.coeffs <- spectratrait::source_GitHubData(githubURL) 73 | rm(githubURL) 74 | githubURL <- paste0(git_repo,"PLSR_model_coefficients/leaf_Nitrogen/FFT_Leaf_Nitrogen_Jackkife_PLSR_Coefficients.csv") 75 | LeafN.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL) 76 | rm(githubURL) 77 | #--------------------------------------------------------------------------------------------------# 78 | 79 | 80 | #--------------------------------------------------------------------------------------------------# 81 | ### Setup target spectral and trait data 82 | ### Create plsr dataset 83 | Start.wave <- 1500 84 | End.wave <- 2400 85 | wv <- seq(Start.wave,End.wave,1) 86 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 87 | colnames(Spectra) <- c(paste0("Wave_",wv)) 88 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 89 | head(sample_info) 90 | 91 | sample_info2 <- sample_info %>% 92 | select(Plant_Species=`species`, Nmass_g_g=`Leaf nitrogen content per leaf dry mass`) 93 | head(sample_info2) 94 | 95 | plsr_data <- data.frame(sample_info2,Spectra) 96 | rm(sample_info,sample_info2,Spectra) 97 | #--------------------------------------------------------------------------------------------------# 98 | 99 | 100 | #--------------------------------------------------------------------------------------------------# 101 | #### Example data cleaning. End user needs to do what's appropriate for their 102 | #### data. This may be an iterative process. 103 | # Keep only complete rows of inVar and spec data before fitting 104 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 105 | c(inVar,paste0("Wave_",wv))]),] 106 | #--------------------------------------------------------------------------------------------------# 107 | 108 | 109 | #--------------------------------------------------------------------------------------------------# 110 | print("**** Applying PLSR model to estimate leaf N mass from spectral observations ****") 111 | # setup model 112 | dims <- dim(LeafN.plsr.coeffs) 113 | LeafN.plsr.intercept <- LeafN.plsr.coeffs[1,] 114 | LeafN.plsr.coeffs <- data.frame(LeafN.plsr.coeffs[2:dims[1],]) 115 | names(LeafN.plsr.coeffs) <- c("wavelength","coefs") 116 | LeafN.plsr.coeffs.vec <- as.vector(LeafN.plsr.coeffs[,2]) 117 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 118 | paste0("Wave_",seq(Start.wave,End.wave,1)))]) 119 | #sub_spec <- sub_spec*0.01 # convert to 0-1 120 | plsr_pred <- as.matrix(sub_spec) %*% LeafN.plsr.coeffs.vec + LeafN.plsr.intercept[,2] 121 | leafN <- plsr_pred[,1] 122 | names(leafN) <- "PLSR_LeafN_g_g" 123 | 124 | # organize output 125 | LeafN.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 126 | paste0("Wave_",seq(Start.wave,End.wave,1)))], 127 | PLSR_LeafN_g_g=leafN, PLSR_Residuals=leafN-plsr_data[,inVar]) 128 | head(LeafN.PLSR.dataset) 129 | #--------------------------------------------------------------------------------------------------# 130 | 131 | 132 | #--------------------------------------------------------------------------------------------------# 133 | print("**** Generate PLSR uncertainty estimates ****") 134 | jk_coef <- data.frame(LeafN.plsr.jk.coeffs[,3:dim(LeafN.plsr.jk.coeffs)[2]]) 135 | jk_coef <- t(jk_coef) 136 | head(jk_coef)[,1:6] 137 | jk_int <- t(LeafN.plsr.jk.coeffs[,2]) 138 | head(jk_int)[,1:6] 139 | 140 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 141 | byrow=TRUE, ncol=length(jk_int)) 142 | jk_pred <- jk_pred^2 143 | head(jk_pred)[,1:6] 144 | dim(jk_pred) 145 | interval <- c(0.025,0.975) 146 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 147 | probs=c(interval[1], interval[2])) 148 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd) 149 | sd_res <- sd(LeafN.PLSR.dataset$PLSR_Residuals) 150 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 151 | LeafN.PLSR.dataset$LCI <- Interval_Conf[1,] 152 | LeafN.PLSR.dataset$UCI <- Interval_Conf[2,] 153 | LeafN.PLSR.dataset$LPI <- LeafN.PLSR.dataset$PLSR_LeafN_g_g-1.96*sd_tot 154 | LeafN.PLSR.dataset$UPI <- LeafN.PLSR.dataset$PLSR_LeafN_g_g+1.96*sd_tot 155 | head(LeafN.PLSR.dataset) 156 | #--------------------------------------------------------------------------------------------------# 157 | 158 | 159 | #--------------------------------------------------------------------------------------------------# 160 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafN.PLSR.dataset, 161 | inVar = inVar, 162 | residuals = LeafN.PLSR.dataset$PLSR_Residuals, 163 | range="full") 164 | RMSEP <- rmsep_percrmsep$rmse 165 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 166 | r2 <- round(summary(lm(LeafN.PLSR.dataset$PLSR_LeafN_g_g~ 167 | LeafN.PLSR.dataset[,inVar]))$adj.r.squared,2) 168 | expr <- vector("expression", 3) 169 | expr[[1]] <- bquote(R^2==.(r2)) 170 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 171 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 172 | rng_vals <- c(min(LeafN.PLSR.dataset$LPI), max(LeafN.PLSR.dataset$UPI)) 173 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) 174 | plotrix::plotCI(LeafN.PLSR.dataset$PLSR_LeafN_g_g,LeafN.PLSR.dataset[,inVar], 175 | li=LeafN.PLSR.dataset$LPI, ui=LeafN.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 176 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 177 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 178 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 179 | ylab=paste0("Observed ", paste(inVar), " (units)"), 180 | cex.axis=1.5,cex.lab=1.8) 181 | abline(0,1,lty=2,lw=2) 182 | plotrix::plotCI(LeafN.PLSR.dataset$PLSR_LeafN_g_g,LeafN.PLSR.dataset[,inVar], 183 | li=LeafN.PLSR.dataset$LCI, ui=LeafN.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 184 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 185 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 186 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 187 | ylab=paste0("Observed ", paste(inVar), " (units)"), 188 | cex.axis=1.5,cex.lab=1.8, add=T) 189 | legend("topleft", legend=expr, bty="n", cex=1.5) 190 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 191 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 192 | box(lwd=2.2) 193 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 194 | height=2800, width=3200, res=340) 195 | dev.off(); 196 | #--------------------------------------------------------------------------------------------------# 197 | 198 | #--------------------------------------------------------------------------------------------------# 199 | ### EOF -------------------------------------------------------------------------------- /inst/scripts/simple_spectra-trait_plsr_example.R: -------------------------------------------------------------------------------- 1 | #################################################################################################### 2 | # 3 | # 4 | # A simple example "How-to" script illustrating the use of basic PLSR modeling to develop a 5 | # spectra-trait algorithm to estimate leaf mass area with leaf-level spectroscopy data. The 6 | # example is built from published data source from the EcoSIS spectral database. 7 | # 8 | # Spectra and trait data source: 9 | # https://ecosis.org/package/fresh-leaf-spectra-to-estimate-lma-over-neon-domains-in-eastern-united-states 10 | # 11 | # Notes: 12 | # * Provided as a basic example of how to apply the model to new spectra observations 13 | # * The author notes the code is not the most elegant or clean, but is functional 14 | # * Questions, comments, or concerns can be sent to sserbin@bnl.gov 15 | # * Code is provided under GNU General Public License v3.0 16 | # 17 | # 18 | #################################################################################################### 19 | 20 | 21 | #--------------------------------------------------------------------------------------------------# 22 | ### Load libraries 23 | list.of.packages <- c("pls","here","dplyr","plotrix","ggplot2","gridExtra","spectratrait") 24 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 25 | #--------------------------------------------------------------------------------------------------# 26 | 27 | 28 | #--------------------------------------------------------------------------------------------------# 29 | ### Setup options 30 | 31 | # Script options 32 | pls::pls.options(plsralg = "oscorespls") 33 | pls::pls.options("plsralg") 34 | 35 | # Default par options 36 | opar <- par(no.readonly = T) 37 | 38 | # What is the target variable? 39 | inVar <- "LMA_gDW_m2" 40 | 41 | # What is the source dataset from EcoSIS? 42 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d" 43 | 44 | # Specify output directory, output_dir 45 | # Options: 46 | # tempdir - use a OS-specified temporary directory 47 | # user defined PATH - e.g. "~/scratch/PLSR" 48 | output_dir <- "tempdir" 49 | #--------------------------------------------------------------------------------------------------# 50 | 51 | 52 | #--------------------------------------------------------------------------------------------------# 53 | ### Set working directory 54 | if (output_dir=="tempdir") { 55 | outdir <- tempdir() 56 | } else { 57 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 58 | outdir <- file.path(path.expand(output_dir)) 59 | } 60 | setwd(outdir) # set working directory 61 | getwd() # check wd 62 | #--------------------------------------------------------------------------------------------------# 63 | 64 | 65 | #--------------------------------------------------------------------------------------------------# 66 | ### Get source dataset from EcoSIS 67 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 68 | head(dat_raw) 69 | names(dat_raw)[1:40] 70 | #--------------------------------------------------------------------------------------------------# 71 | 72 | 73 | #--------------------------------------------------------------------------------------------------# 74 | ### Create plsr dataset 75 | Start.wave <- 500 76 | End.wave <- 2400 77 | wv <- seq(Start.wave,End.wave,1) 78 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 79 | colnames(Spectra) <- c(paste0("Wave_",wv)) 80 | head(Spectra)[1:6,1:10] 81 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 82 | head(sample_info) 83 | 84 | sample_info2 <- sample_info %>% 85 | select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA) 86 | head(sample_info2) 87 | 88 | plsr_data <- data.frame(sample_info2,Spectra) 89 | rm(sample_info,sample_info2,Spectra) 90 | dim(plsr_data) 91 | #--------------------------------------------------------------------------------------------------# 92 | 93 | 94 | #--------------------------------------------------------------------------------------------------# 95 | #### Example data cleaning. End user needs to do what's appropriate for their 96 | #### data. This may be an iterative process. 97 | # Keep only complete rows of inVar and spec data before fitting 98 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 99 | c(inVar,paste0("Wave_",wv))]),] 100 | #--------------------------------------------------------------------------------------------------# 101 | 102 | 103 | #--------------------------------------------------------------------------------------------------# 104 | #### Prepare data for fitting 105 | spec <- as.matrix(plsr_data[, which(names(plsr_data) %in% paste0("Wave_",wv))]) 106 | plsr_data <- data.frame(plsr_data[, which(names(plsr_data) %notin% paste0("Wave_",wv))], 107 | Spectra=I(spec)) 108 | head(plsr_data)[1:5] 109 | #--------------------------------------------------------------------------------------------------# 110 | 111 | 112 | #--------------------------------------------------------------------------------------------------# 113 | ### plot cal and val spectra 114 | par(mfrow=c(1,1)) # B, L, T, R 115 | spectratrait::f.plot.spec(Z=plsr_data$Spectra,wv=seq(Start.wave,End.wave,1), 116 | plot_label="CONUS NEON Data") 117 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 118 | height=2500,width=4900, res=340) 119 | dev.off(); 120 | par(mfrow=c(1,1)) 121 | #--------------------------------------------------------------------------------------------------# 122 | 123 | 124 | #--------------------------------------------------------------------------------------------------# 125 | ### Use permutation to determine the optimal number of components 126 | if(grepl("Windows", sessionInfo()$running)){ 127 | pls.options(parallel = NULL) 128 | } else { 129 | pls.options(parallel = parallel::detectCores()-1) 130 | } 131 | 132 | method <- "firstPlateau" #pls, firstPlateau, firstMin 133 | random_seed <- 2356812 134 | seg <- 250 135 | maxComps <- 20 136 | iterations <- 40 137 | prop <- 0.70 138 | if (method=="pls") { 139 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 140 | method=method, maxComps=maxComps, seg=seg, 141 | random_seed=random_seed) 142 | print(paste0("*** Optimal number of components: ", nComps)) 143 | } else { 144 | nComps <- spectratrait::find_optimal_components(dataset=plsr_data, targetVariable=inVar, 145 | method=method, maxComps=maxComps, iterations=iterations, 146 | seg=seg, prop=prop, random_seed=random_seed) 147 | } 148 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 149 | height=2800, width=3400, res=340) 150 | dev.off(); 151 | #--------------------------------------------------------------------------------------------------# 152 | 153 | 154 | #--------------------------------------------------------------------------------------------------# 155 | ### Fit final model 156 | segs <- 100 157 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="CV", 158 | segments=segs, segment.type="interleaved",trace=FALSE,data=plsr_data) 159 | fit <- plsr.out$fitted.values[,1,nComps] 160 | pls.options(parallel = NULL) 161 | 162 | # External validation fit stats 163 | par(mfrow=c(1,2)) # B, L, T, R 164 | pls::RMSEP(plsr.out) 165 | plot(pls::RMSEP(plsr.out), main="MODEL RMSEP", 166 | xlab="Number of Components",ylab="Model RMSEP",lty=1,col="black",cex=1.5,lwd=2) 167 | box(lwd=2.2) 168 | 169 | pls::R2(plsr.out) 170 | plot(pls::R2(plsr.out), main="MODEL R2", 171 | xlab="Number of Components",ylab="Model R2",lty=1,col="black",cex=1.5,lwd=2) 172 | box(lwd=2.2) 173 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_RMSEP_R2_by_Component.png"))), 174 | height=2800, width=4800, res=340) 175 | dev.off(); 176 | par(opar) 177 | #--------------------------------------------------------------------------------------------------# 178 | 179 | 180 | #--------------------------------------------------------------------------------------------------# 181 | ### PLSR fit observed vs. predicted plot data 182 | plsr_data.output <- data.frame(plsr_data[, which(names(plsr_data) %notin% "Spectra")], 183 | PLSR_Predicted=fit, 184 | PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) 185 | plsr_data.output <- plsr_data.output %>% 186 | mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) 187 | head(plsr_data.output) 188 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) 189 | cal.RMSEP <- round(sqrt(mean(plsr_data.output$PLSR_CV_Residuals^2)),2) 190 | 191 | rng_quant <- quantile(plsr_data.output[,inVar], probs = c(0.001, 0.999)) 192 | cal_scatter_plot <- ggplot(plsr_data.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 193 | theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 194 | linetype="dashed", linewidth=1.5) + 195 | xlim(rng_quant[1], rng_quant[2]) + 196 | ylim(rng_quant[1], rng_quant[2]) + 197 | labs(x=paste0("Predicted ", paste(inVar), " (units)"), 198 | y=paste0("Observed ", paste(inVar), " (units)"), 199 | title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", cal.RMSEP))) + 200 | theme(axis.text=element_text(size=18), legend.position="none", 201 | axis.title=element_text(size=20, face="bold"), 202 | axis.text.x = element_text(angle = 0,vjust = 0.5), 203 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 204 | 205 | cal_resid_histogram <- ggplot(plsr_data.output, aes(x=PLSR_CV_Residuals)) + 206 | geom_histogram(alpha=.5, position="identity") + 207 | geom_vline(xintercept = 0, color="black", 208 | linetype="dashed", linewidth=1) + theme_bw() + 209 | theme(axis.text=element_text(size=18), legend.position="none", 210 | axis.title=element_text(size=20, face="bold"), 211 | axis.text.x = element_text(angle = 0,vjust = 0.5), 212 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 213 | 214 | # plot cal/val side-by-side 215 | scatterplots <- grid.arrange(cal_scatter_plot, cal_resid_histogram, nrow=2, ncol=1) 216 | ggsave(paste0(inVar,"Obs_vs_Pred_scatterplot.png"), plot = scatterplots, device="png", 217 | width = 32, 218 | height = 30, units = "cm", 219 | dpi = 300) 220 | #--------------------------------------------------------------------------------------------------# 221 | 222 | 223 | 224 | #--------------------------------------------------------------------------------------------------# 225 | ### eof -------------------------------------------------------------------------------- /R/find_optimal_components.R: -------------------------------------------------------------------------------- 1 | ##' Applies different methods for the determination of the optimal number of PLSR model components 2 | ##' 3 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset 4 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 5 | ##' Usually the "inVar" variable set at the beginning of a PLS script 6 | ##' @param method Which approach to use to find optimal components. Options: pls, firstPlateau, firstMin 7 | ##' @param maxComps maximum number of components to consider 8 | ##' @param iterations how many different permutations to run 9 | ##' @param seg For the built-in pls method, how many different data segments to select from the input dataset 10 | ##' @param prop proportion of data to preserve for each permutation 11 | ##' @param random_seed random seed to use for splitting data 12 | ##' 13 | ##' @importFrom stats as.formula coef predict quantile t.test 14 | ##' @importFrom pls plsr selectNcomp 15 | ##' @importFrom reshape2 melt 16 | ##' @import ggplot2 17 | ##' 18 | ##' @return nComps the optimal number of PLSR components 19 | ##' 20 | ##' @author Julien Lamour, Jeremiah Anderson, Shawn P. Serbin 21 | ##' @export 22 | find_optimal_components <- function(dataset=NULL, targetVariable=NULL, method="pls", maxComps=20, 23 | iterations=20, seg=100, prop=0.70, random_seed=123456789) { 24 | 25 | set.seed(random_seed) 26 | inVar <- targetVariable 27 | print("*** Identifying optimal number of PLSR components ***") 28 | 29 | if(method=="pls") { 30 | print("*** Running PLS permutation test ***") 31 | 32 | plsr.out <- pls::plsr(as.formula(paste(inVar,"~","Spectra")), scale=FALSE, center=TRUE, ncomp=maxComps, 33 | validation="CV", segments = seg, segment.type="interleaved", trace=FALSE, 34 | jackknife=TRUE, data=dataset) 35 | nComps <- pls::selectNcomp(plsr.out, method = "onesigma", plot = TRUE) 36 | } 37 | if(method=="firstPlateau") { 38 | press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 39 | iterations=iterations, prop=prop) 40 | # PRESS plot 41 | pressDF <- as.data.frame(press.out$PRESS) 42 | names(pressDF) <- as.character(seq(maxComps)) 43 | pressDFres <- reshape2::melt(pressDF) 44 | results <- NULL 45 | for(i in 1:(maxComps-1)){ 46 | p_value <- t.test(press.out$PRESS[,i], press.out$PRESS[,(i+1)])$p.value 47 | temp_results <- data.frame(Component=(i+1), P.value= round(p_value, 6)) 48 | results <- rbind(results, temp_results) 49 | } 50 | nComps <- min(results[results$P.value > 0.05, "Component"]) 51 | print(paste0("*** Optimal number of components based on t.test: ", nComps)) 52 | bp <- ggplot(pressDFres, aes(x=variable, y=value)) + theme_bw() + 53 | geom_boxplot(notch=FALSE) + labs(x="Number of Components", y="PRESS") + 54 | stat_boxplot(geom = "errorbar", width = 0.2) + 55 | geom_vline(xintercept = nComps, linetype="dashed", 56 | color = "blue", size=1.0) 57 | theme(axis.text=element_text(size=18), legend.position="none", 58 | axis.title=element_text(size=20, face="bold"), 59 | axis.text.x = element_text(angle = 0,vjust = 0.5), 60 | panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) 61 | print(bp) 62 | } 63 | if(method=="firstMin") { 64 | press.out <- spectratrait::pls_permutation(dataset=dataset, targetVariable=inVar, maxComps=maxComps, 65 | iterations=iterations, prop=prop) 66 | # PRESS plot 67 | pressDF <- as.data.frame(press.out$PRESS) 68 | names(pressDF) <- as.character(seq(maxComps)) 69 | pressDFres <- reshape2::melt(pressDF) 70 | # find lowest press 71 | mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean) 72 | lowest_PRESS <- which.min(mean_PRESS_comp) 73 | results <- as.vector(array(data="NA", dim=c(lowest_PRESS-1,1))) 74 | for (i in seq_along(1:(lowest_PRESS-1))) { 75 | comp1 <- i; comp2 <- lowest_PRESS 76 | ttest <- t.test(pressDFres$value[which(pressDFres$variable==comp1)], 77 | pressDFres$value[which(pressDFres$variable==comp2)]) 78 | #print(i) 79 | results[i] <- round(unlist(ttest$p.value),8) 80 | } 81 | results <- data.frame(seq(1,lowest_PRESS-1,1),results) 82 | names(results) <- c("Component", "P.value") 83 | first <- min(which(as.numeric(as.character(results$P.value)) > 0.05)) 84 | nComps <- results$Component[first] 85 | print(paste0("*** Optimal number of components based on t.test: ", nComps)) 86 | bp <- ggplot(pressDFres, aes(x=variable, y=value)) + theme_bw() + 87 | geom_boxplot(notch=FALSE) + labs(x="Number of Components", y="PRESS") + 88 | stat_boxplot(geom = "errorbar", width = 0.2) + 89 | geom_vline(xintercept = nComps, linetype="dashed", 90 | color = "blue", size=1.0) 91 | theme(axis.text=element_text(size=18), legend.position="none", 92 | axis.title=element_text(size=20, face="bold"), 93 | axis.text.x = element_text(angle = 0,vjust = 0.5), 94 | panel.border = element_rect(linetype = "solid", fill = NA, size=1.5)) 95 | print(bp) 96 | } 97 | return(nComps) 98 | } 99 | 100 | ##' Uses the firstMin and firstPlateau methods for the determination of the optimal number of PLSR model components, 101 | ##' by group (i.e. optimal selection by stratification) 102 | ##' 103 | ##' @param dataset input full PLSR dataset. Usually just the calibration dataset 104 | ##' @param targetVariable What object or variable to use as the Y (predictand) in the PLSR model? 105 | ##' Usually the "inVar" variable set at the beginning of a PLS script 106 | ##' @param method Which approach to use to find optimal components. Options: firstPlateau, firstMin 107 | ##' @param maxComps maximum number of components to consider 108 | ##' @param iterations how many different permutations to run 109 | ##' @param prop proportion of data to preserve for each permutation 110 | ##' @param random_seed random seed to use for splitting data 111 | ##' @param group_variables group_variables character vector of the form c("var1", "var2"..."varn") 112 | ##' providing the factors used for stratified sampling. 113 | ##' 114 | ##' @importFrom stats as.formula coef predict quantile t.test 115 | ##' @import ggplot2 116 | ##' @importFrom reshape2 melt 117 | ##' 118 | ##' @return nComps the optimal number of PLSR components 119 | ##' 120 | ##' @author asierrl, Shawn P. Serbin 121 | ##' @export 122 | find_optimal_comp_by_groups <- function (dataset = NULL, targetVariable = NULL, method = "firstPlateau", 123 | maxComps = 20, iterations = 20, prop = 0.7, random_seed = 123456789, 124 | group_variables = NULL) { 125 | set.seed(random_seed) 126 | inVar <- targetVariable 127 | # TODO - really should merge this with the original and have an if/else if not NULL and select either 128 | # pls_permutation OR pls_permutation_by_groups. 129 | print("*** Identifying optimal number of PLSR components using stratified resampling by group_variables ***") 130 | if (method == "pls") { 131 | stop("*** Please select either the firstMin and firstPlateau. The pls package approach is not compatible ***") 132 | } 133 | if (method == "firstPlateau") { 134 | press.out <- spectratrait::pls_permutation_by_groups(dataset=dataset, targetVariable=inVar, 135 | maxComps=maxComps, iterations=iterations, 136 | prop=prop, group_variables=group_variables) 137 | pressDF <- as.data.frame(press.out$PRESS) 138 | names(pressDF) <- as.character(seq(maxComps)) 139 | pressDFres <- reshape2::melt(pressDF) 140 | results <- NULL 141 | for (i in 1:(maxComps - 1)) { 142 | p_value <- t.test(press.out$PRESS[, i], press.out$PRESS[, (i + 1)])$p.value 143 | temp_results <- data.frame(Component = (i + 1), P.value = round(p_value, 6)) 144 | results <- rbind(results, temp_results) 145 | } 146 | nComps <- min(results[results$P.value > 0.05, "Component"]) 147 | print(paste0("*** Optimal number of components based on t.test: ", nComps)) 148 | bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 149 | theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 150 | y = "PRESS") + 151 | stat_boxplot(geom = "errorbar", width = 0.2) + 152 | geom_vline(xintercept = nComps, linetype = "dashed", 153 | color = "blue", size = 1) 154 | theme(axis.text = element_text(size = 18), legend.position = "none", 155 | axis.title = element_text(size = 20, face = "bold"), 156 | axis.text.x = element_text(angle = 0, vjust = 0.5), 157 | panel.border = element_rect(linetype = "solid", 158 | fill = NA, size = 1.5)) 159 | print(bp) 160 | } 161 | if (method == "firstMin") { 162 | press.out <- spectratrait::pls_permutation_by_groups(dataset = dataset, targetVariable=inVar, 163 | maxComps=maxComps, iterations=iterations, 164 | prop=prop, group_variables=group_variables) 165 | pressDF <- as.data.frame(press.out$PRESS) 166 | names(pressDF) <- as.character(seq(maxComps)) 167 | pressDFres <- reshape2::melt(pressDF) 168 | mean_PRESS_comp <- apply(X = pressDF, MARGIN = 2, FUN = mean) 169 | lowest_PRESS <- which.min(mean_PRESS_comp) 170 | results <- as.vector(array(data = "NA", dim = c(lowest_PRESS - 1, 1))) 171 | for (i in seq_along(1:(lowest_PRESS - 1))) { 172 | comp1 <- i 173 | comp2 <- lowest_PRESS 174 | ttest <- t.test(pressDFres$value[which(pressDFres$variable == comp1)], 175 | pressDFres$value[which(pressDFres$variable == comp2)]) 176 | results[i] <- round(unlist(ttest$p.value), 8) 177 | } 178 | results <- data.frame(seq(1, lowest_PRESS - 1, 1), results) 179 | names(results) <- c("Component", "P.value") 180 | first <- min(which(as.numeric(as.character(results$P.value)) > 0.05)) 181 | nComps <- results$Component[first] 182 | print(paste0("*** Optimal number of components based on t.test: ", nComps)) 183 | bp <- ggplot(pressDFres, aes(x = variable, y = value)) + 184 | theme_bw() + geom_boxplot(notch = FALSE) + labs(x = "Number of Components", 185 | y = "PRESS") + 186 | stat_boxplot(geom = "errorbar", width = 0.2) + 187 | geom_vline(xintercept = nComps, linetype = "dashed", 188 | color = "blue", size = 1) 189 | theme(axis.text = element_text(size = 18), legend.position = "none", 190 | axis.title = element_text(size = 20, face = "bold"), 191 | axis.text.x = element_text(angle = 0, vjust = 0.5), 192 | panel.border = element_rect(linetype = "solid", 193 | fill = NA, size = 1.5)) 194 | print(bp) 195 | } 196 | return(nComps) 197 | } -------------------------------------------------------------------------------- /vignettes/sserbin2019_plsr_ex9.md: -------------------------------------------------------------------------------- 1 | An example showing how to apply an existing PLSR model to new data. In 2 | this case applying the LMA model from Serbin et al., (2019; DOI - 3 | 10.1111/nph.16123) to a dataset collected at CONUS NEON field sites 4 | ================ 5 | Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson 6 | 2025-10-29 7 | 8 | ### Getting Started 9 | 10 | ### Load libraries 11 | 12 | ``` r 13 | list.of.packages <- c("pls","dplyr","reshape2","here","plotrix","ggplot2","gridExtra", 14 | "spectratrait") 15 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 16 | ``` 17 | 18 | ## 19 | ## Attaching package: 'pls' 20 | 21 | ## The following object is masked from 'package:stats': 22 | ## 23 | ## loadings 24 | 25 | ## 26 | ## Attaching package: 'dplyr' 27 | 28 | ## The following objects are masked from 'package:stats': 29 | ## 30 | ## filter, lag 31 | 32 | ## The following objects are masked from 'package:base': 33 | ## 34 | ## intersect, setdiff, setequal, union 35 | 36 | ## here() starts at /Users/sserbin/Data/Github/spectratrait 37 | 38 | ## 39 | ## Attaching package: 'gridExtra' 40 | 41 | ## The following object is masked from 'package:dplyr': 42 | ## 43 | ## combine 44 | 45 | ### Setup other functions and options 46 | 47 | ``` r 48 | ### Setup options 49 | 50 | # Script options 51 | pls::pls.options(plsralg = "oscorespls") 52 | pls::pls.options("plsralg") 53 | ``` 54 | 55 | ## $plsralg 56 | ## [1] "oscorespls" 57 | 58 | ``` r 59 | # Default par options 60 | opar <- par(no.readonly = T) 61 | 62 | # What is the target variable? 63 | inVar <- "LMA_gDW_m2" 64 | 65 | # What is the source dataset from EcoSIS? 66 | ecosis_id <- "5617da17-c925-49fb-b395-45a51291bd2d" 67 | 68 | # Specify output directory, output_dir 69 | # Options: 70 | # tempdir - use a OS-specified temporary directory 71 | # user defined PATH - e.g. "~/scratch/PLSR" 72 | output_dir <- "tempdir" 73 | ``` 74 | 75 | ### Set working directory (scratch space) 76 | 77 | ## [1] "/private/var/folders/tq/tydmhlwn1bdf_0pmpcq70r2c0000gn/T/RtmpyEgmZw" 78 | 79 | ### Grab PLSR Coefficients from GitHub 80 | 81 | ``` r 82 | git_repo <- "https://raw.githubusercontent.com/serbinsh/SSerbin_etal_2019_NewPhytologist/master/" 83 | print("**** Downloading PLSR coefficients ****") 84 | ``` 85 | 86 | ## [1] "**** Downloading PLSR coefficients ****" 87 | 88 | ``` r 89 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_PLSR_Coefficients_10comp.csv") 90 | LeafLMA.plsr.coeffs <- spectratrait::source_GitHubData(githubURL) 91 | rm(githubURL) 92 | githubURL <- paste0(git_repo,"SSerbin_multibiome_lma_plsr_model/sqrt_LMA_gDW_m2_Jackkife_PLSR_Coefficients.csv") 93 | LeafLMA.plsr.jk.coeffs <- spectratrait::source_GitHubData(githubURL) 94 | rm(githubURL) 95 | ``` 96 | 97 | ### Get source dataset from EcoSIS 98 | 99 | ``` r 100 | dat_raw <- spectratrait::get_ecosis_data(ecosis_id = ecosis_id) 101 | ``` 102 | 103 | ## [1] "**** Downloading Ecosis data ****" 104 | 105 | ## Downloading data... 106 | 107 | ## Rows: 6312 Columns: 2162 108 | ## ── Column specification ──────────────────────────────────────────────────────── 109 | ## Delimiter: "," 110 | ## chr (10): Affiliation, Common Name, Domain, Functional_type, Latin Genus, ... 111 | ## dbl (2152): LMA, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361,... 112 | ## 113 | ## ℹ Use `spec()` to retrieve the full column specification for this data. 114 | ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. 115 | ## Download complete! 116 | 117 | ``` r 118 | head(dat_raw) 119 | ``` 120 | 121 | ## # A tibble: 6 × 2,162 122 | ## Affiliation `Common Name` Domain Functional_type LMA `Latin Genus` 123 | ## 124 | ## 1 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans 125 | ## 2 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans 126 | ## 3 University of Wiscon… black walnut D02 broadleaf 60.8 Juglans 127 | ## 4 University of Wiscon… black walnut D02 broadleaf 60.8 Juglans 128 | ## 5 University of Wiscon… black walnut D02 broadleaf 85.9 Juglans 129 | ## 6 University of Wiscon… black walnut D02 broadleaf 85.9 Juglans 130 | ## # ℹ 2,156 more variables: `Latin Species` , PI , Project , 131 | ## # Sample_ID , `USDA Symbol` , `350` , `351` , 132 | ## # `352` , `353` , `354` , `355` , `356` , 133 | ## # `357` , `358` , `359` , `360` , `361` , 134 | ## # `362` , `363` , `364` , `365` , `366` , 135 | ## # `367` , `368` , `369` , `370` , `371` , 136 | ## # `372` , `373` , `374` , `375` , `376` , … 137 | 138 | ``` r 139 | names(dat_raw)[1:40] 140 | ``` 141 | 142 | ## [1] "Affiliation" "Common Name" "Domain" "Functional_type" 143 | ## [5] "LMA" "Latin Genus" "Latin Species" "PI" 144 | ## [9] "Project" "Sample_ID" "USDA Symbol" "350" 145 | ## [13] "351" "352" "353" "354" 146 | ## [17] "355" "356" "357" "358" 147 | ## [21] "359" "360" "361" "362" 148 | ## [25] "363" "364" "365" "366" 149 | ## [29] "367" "368" "369" "370" 150 | ## [33] "371" "372" "373" "374" 151 | ## [37] "375" "376" "377" "378" 152 | 153 | ### Prepare new data for estimation 154 | 155 | ``` r 156 | Start.wave <- 500 157 | End.wave <- 2400 158 | wv <- seq(Start.wave,End.wave,1) 159 | Spectra <- as.matrix(dat_raw[,names(dat_raw) %in% wv]) 160 | colnames(Spectra) <- c(paste0("Wave_",wv)) 161 | head(Spectra)[1:6,1:10] 162 | ``` 163 | 164 | ## Wave_500 Wave_501 Wave_502 Wave_503 Wave_504 Wave_505 Wave_506 Wave_507 165 | ## [1,] 0.044226 0.044605 0.044927 0.045473 0.046241 0.046878 0.047826 0.049090 166 | ## [2,] 0.046855 0.047601 0.047944 0.048478 0.049381 0.050235 0.051161 0.052191 167 | ## [3,] 0.043758 0.044171 0.044869 0.045465 0.045984 0.046933 0.047993 0.049090 168 | ## [4,] 0.041154 0.041603 0.042088 0.042408 0.042639 0.043260 0.044140 0.045058 169 | ## [5,] 0.037296 0.037944 0.038209 0.038677 0.039388 0.039948 0.040630 0.041501 170 | ## [6,] 0.043878 0.044257 0.044723 0.045295 0.045949 0.046575 0.047378 0.048357 171 | ## Wave_508 Wave_509 172 | ## [1,] 0.050268 0.051525 173 | ## [2,] 0.053322 0.054357 174 | ## [3,] 0.050168 0.051441 175 | ## [4,] 0.045700 0.046476 176 | ## [5,] 0.042613 0.043731 177 | ## [6,] 0.049392 0.050387 178 | 179 | ``` r 180 | sample_info <- dat_raw[,names(dat_raw) %notin% seq(350,2500,1)] 181 | head(sample_info) 182 | ``` 183 | 184 | ## # A tibble: 6 × 11 185 | ## Affiliation `Common Name` Domain Functional_type LMA `Latin Genus` 186 | ## 187 | ## 1 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans 188 | ## 2 University of Wiscon… black walnut D02 broadleaf 72.9 Juglans 189 | ## 3 University of Wiscon… black walnut D02 broadleaf 60.8 Juglans 190 | ## 4 University of Wiscon… black walnut D02 broadleaf 60.8 Juglans 191 | ## 5 University of Wiscon… black walnut D02 broadleaf 85.9 Juglans 192 | ## 6 University of Wiscon… black walnut D02 broadleaf 85.9 Juglans 193 | ## # ℹ 5 more variables: `Latin Species` , PI , Project , 194 | ## # Sample_ID , `USDA Symbol` 195 | 196 | ``` r 197 | sample_info2 <- sample_info %>% 198 | select(Domain,Functional_type,Sample_ID,USDA_Species_Code=`USDA Symbol`,LMA_gDW_m2=LMA) 199 | head(sample_info2) 200 | ``` 201 | 202 | ## # A tibble: 6 × 5 203 | ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 204 | ## 205 | ## 1 D02 broadleaf P0001 JUNI 72.9 206 | ## 2 D02 broadleaf L0001 JUNI 72.9 207 | ## 3 D02 broadleaf P0002 JUNI 60.8 208 | ## 4 D02 broadleaf L0002 JUNI 60.8 209 | ## 5 D02 broadleaf P0003 JUNI 85.9 210 | ## 6 D02 broadleaf L0003 JUNI 85.9 211 | 212 | ``` r 213 | plsr_data <- data.frame(sample_info2,Spectra) 214 | rm(sample_info,sample_info2,Spectra) 215 | ``` 216 | 217 | #### Example data cleaning. 218 | 219 | ``` r 220 | #### End user needs to do what's appropriate for their data. This may be an iterative process. 221 | # Keep only complete rows of inVar and spec data before fitting 222 | plsr_data <- plsr_data[complete.cases(plsr_data[,names(plsr_data) %in% 223 | c(inVar,paste0("Wave_",wv))]),] 224 | ``` 225 | 226 | #### Prepare PLSR model 227 | 228 | ``` r 229 | print("**** Applying PLSR model to estimate LMA from spectral observations ****") 230 | ``` 231 | 232 | ## [1] "**** Applying PLSR model to estimate LMA from spectral observations ****" 233 | 234 | ``` r 235 | # setup model 236 | dims <- dim(LeafLMA.plsr.coeffs) 237 | LeafLMA.plsr.intercept <- LeafLMA.plsr.coeffs[1,] 238 | LeafLMA.plsr.coeffs <- data.frame(LeafLMA.plsr.coeffs[2:dims[1],]) 239 | names(LeafLMA.plsr.coeffs) <- c("wavelength","coefs") 240 | LeafLMA.plsr.coeffs.vec <- as.vector(LeafLMA.plsr.coeffs[,2]) 241 | sub_spec <- droplevels(plsr_data[,which(names(plsr_data) %in% 242 | paste0("Wave_",seq(Start.wave,End.wave,1)))]) 243 | ``` 244 | 245 | #### Apply PLSR model 246 | 247 | ``` r 248 | plsr_pred <- as.matrix(sub_spec) %*% LeafLMA.plsr.coeffs.vec + LeafLMA.plsr.intercept[,2] 249 | leafLMA <- plsr_pred[,1]^2 # convert to standard LMA units from sqrt(LMA) 250 | names(leafLMA) <- "PLSR_LMA_gDW_m2" 251 | 252 | # organize output 253 | LeafLMA.PLSR.dataset <- data.frame(plsr_data[,which(names(plsr_data) %notin% 254 | paste0("Wave_",seq(Start.wave,End.wave,1)))], 255 | PLSR_LMA_gDW_m2=leafLMA, PLSR_Residuals=leafLMA-plsr_data[,inVar]) 256 | head(LeafLMA.PLSR.dataset) 257 | ``` 258 | 259 | ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_LMA_gDW_m2 260 | ## 1 D02 broadleaf P0001 JUNI 72.87 96.26243 261 | ## 2 D02 broadleaf L0001 JUNI 72.87 90.09453 262 | ## 3 D02 broadleaf P0002 JUNI 60.77 77.16475 263 | ## 4 D02 broadleaf L0002 JUNI 60.77 60.99039 264 | ## 5 D02 broadleaf P0003 JUNI 85.92 101.22709 265 | ## 6 D02 broadleaf L0003 JUNI 85.92 97.13018 266 | ## PLSR_Residuals 267 | ## 1 23.3924343 268 | ## 2 17.2245326 269 | ## 3 16.3947533 270 | ## 4 0.2203913 271 | ## 5 15.3070857 272 | ## 6 11.2101840 273 | 274 | #### Generate PLSR uncertainty estimates 275 | 276 | ``` r 277 | print("**** Generate PLSR uncertainty estimates ****") 278 | ``` 279 | 280 | ## [1] "**** Generate PLSR uncertainty estimates ****" 281 | 282 | ``` r 283 | jk_coef <- data.frame(LeafLMA.plsr.jk.coeffs[,3:dim(LeafLMA.plsr.jk.coeffs)[2]]) 284 | jk_coef <- t(jk_coef) 285 | head(jk_coef)[,1:6] 286 | ``` 287 | 288 | ## [,1] [,2] [,3] [,4] [,5] [,6] 289 | ## Wave_500 1.0005875 0.9952840 0.5652908 0.9793160 1.1052207 0.9370473 290 | ## Wave_501 0.9584235 0.9631434 0.5230544 0.9330803 1.0477469 0.9042780 291 | ## Wave_502 0.8960202 0.9065954 0.4597413 0.8710298 0.9658130 0.8628370 292 | ## Wave_503 0.8722135 0.8936197 0.4420696 0.8456098 0.9272967 0.8513741 293 | ## Wave_504 0.8452831 0.8644923 0.4159567 0.8110004 0.8903192 0.8320347 294 | ## Wave_505 0.8240743 0.8378399 0.3902871 0.7829891 0.8570048 0.8150339 295 | 296 | ``` r 297 | jk_int <- t(LeafLMA.plsr.jk.coeffs[,2]) 298 | head(jk_int)[,1:6] 299 | ``` 300 | 301 | ## [1] 7.787098 7.959443 8.015161 8.018586 7.658080 7.998432 302 | 303 | ``` r 304 | jk_pred <- as.matrix(sub_spec) %*% jk_coef + matrix(rep(jk_int, length(plsr_data[,inVar])), 305 | byrow=TRUE, ncol=length(jk_int)) 306 | jk_pred <- jk_pred^2 307 | head(jk_pred)[,1:6] 308 | ``` 309 | 310 | ## [,1] [,2] [,3] [,4] [,5] [,6] 311 | ## 1 94.28721 96.77712 96.44452 95.11992 96.72830 95.33877 312 | ## 2 90.36051 90.57120 90.77562 89.77821 90.24826 89.61806 313 | ## 3 75.71088 77.91861 76.42730 76.11473 77.67179 76.68756 314 | ## 4 61.37001 61.30963 60.56606 60.72330 61.63712 60.69649 315 | ## 5 99.24456 101.75948 101.22916 99.96305 101.70397 100.16758 316 | ## 6 97.40414 97.65463 97.52687 97.00817 97.33677 96.08535 317 | 318 | ``` r 319 | dim(jk_pred) 320 | ``` 321 | 322 | ## [1] 6312 1000 323 | 324 | ``` r 325 | interval <- c(0.025,0.975) 326 | Interval_Conf <- apply(X = jk_pred, MARGIN = 1, FUN = quantile, 327 | probs=c(interval[1], interval[2])) 328 | sd_mean <- apply(X = jk_pred, MARGIN = 1, FUN =sd) 329 | sd_res <- sd(LeafLMA.PLSR.dataset$PLSR_Residuals) 330 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 331 | LeafLMA.PLSR.dataset$LCI <- Interval_Conf[1,] 332 | LeafLMA.PLSR.dataset$UCI <- Interval_Conf[2,] 333 | LeafLMA.PLSR.dataset$LPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2-1.96*sd_tot 334 | LeafLMA.PLSR.dataset$UPI <- LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2+1.96*sd_tot 335 | head(LeafLMA.PLSR.dataset) 336 | ``` 337 | 338 | ## Domain Functional_type Sample_ID USDA_Species_Code LMA_gDW_m2 PLSR_LMA_gDW_m2 339 | ## 1 D02 broadleaf P0001 JUNI 72.87 96.26243 340 | ## 2 D02 broadleaf L0001 JUNI 72.87 90.09453 341 | ## 3 D02 broadleaf P0002 JUNI 60.77 77.16475 342 | ## 4 D02 broadleaf L0002 JUNI 60.77 60.99039 343 | ## 5 D02 broadleaf P0003 JUNI 85.92 101.22709 344 | ## 6 D02 broadleaf L0003 JUNI 85.92 97.13018 345 | ## PLSR_Residuals LCI UCI LPI UPI 346 | ## 1 23.3924343 93.95423 99.03625 71.30476 121.2201 347 | ## 2 17.2245326 88.81329 92.00078 65.21071 114.9784 348 | ## 3 16.3947533 74.79509 79.85715 52.19722 102.1323 349 | ## 4 0.2203913 59.80058 62.29402 36.12678 85.8540 350 | ## 5 15.3070857 98.86570 103.97701 76.26586 126.1883 351 | ## 6 11.2101840 95.50843 99.66865 72.20971 122.0507 352 | 353 | #### Generate PLSR estimated LMA observed vs predicted plot 354 | 355 | ``` r 356 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = LeafLMA.PLSR.dataset, 357 | inVar = inVar, 358 | residuals = LeafLMA.PLSR.dataset$PLSR_Residuals, 359 | range="full") 360 | RMSEP <- rmsep_percrmsep$rmse 361 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 362 | r2 <- round(summary(lm(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2~ 363 | LeafLMA.PLSR.dataset[,inVar]))$adj.r.squared,2) 364 | expr <- vector("expression", 3) 365 | expr[[1]] <- bquote(R^2==.(r2)) 366 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 367 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 368 | rng_vals <- c(min(LeafLMA.PLSR.dataset$LPI), max(LeafLMA.PLSR.dataset$UPI)) 369 | par(mfrow=c(1,1), mar=c(4,5.3,1,0.4), oma=c(0.1, 0.1, 0.1, 0.2)) 370 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 371 | li=LeafLMA.PLSR.dataset$LPI, ui=LeafLMA.PLSR.dataset$UPI, gap=0.009,sfrac=0.000, 372 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 373 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 374 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 375 | ylab=paste0("Observed ", paste(inVar), " (units)"), 376 | cex.axis=1.5,cex.lab=1.8) 377 | abline(0,1,lty=2,lw=2) 378 | plotrix::plotCI(LeafLMA.PLSR.dataset$PLSR_LMA_gDW_m2,LeafLMA.PLSR.dataset[,inVar], 379 | li=LeafLMA.PLSR.dataset$LCI, ui=LeafLMA.PLSR.dataset$UCI, gap=0.009,sfrac=0.004, 380 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 381 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 382 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 383 | ylab=paste0("Observed ", paste(inVar), " (units)"), 384 | cex.axis=1.5,cex.lab=1.8, add=T) 385 | legend("topleft", legend=expr, bty="n", cex=1.5) 386 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 387 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 388 | box(lwd=2.2) 389 | ``` 390 | 391 | ![](sserbin2019_plsr_ex9_files/figure-gfm/unnamed-chunk-11-1.png) 392 | 393 | ``` r 394 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 395 | height=2800, width=3200, res=340) 396 | ``` 397 | 398 | ## quartz_off_screen 399 | ## 3 400 | 401 | ``` r 402 | dev.off(); 403 | ``` 404 | 405 | ## quartz_off_screen 406 | ## 2 407 | 408 | ``` r 409 | print(paste("Output directory: ", outdir)) 410 | ``` 411 | 412 | ## [1] "Output directory: /var/folders/tq/tydmhlwn1bdf_0pmpcq70r2c0000gn/T//RtmpyEgmZw" 413 | 414 | ``` r 415 | # Observed versus predicted 416 | write.csv(LeafLMA.PLSR.dataset,file=file.path(outdir, 417 | paste0(inVar,'_PLSR_Estimates.csv')), 418 | row.names=FALSE) 419 | ``` 420 | 421 | ### Confirm files were written to temp space 422 | 423 | ``` r 424 | print("**** PLSR output files: ") 425 | ``` 426 | 427 | ## [1] "**** PLSR output files: " 428 | 429 | ``` r 430 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) 431 | ``` 432 | 433 | ## [1] "LMA_gDW_m2_PLSR_Estimates.csv" 434 | ## [2] "LMA_gDW_m2_PLSR_Validation_Scatterplot.png" 435 | -------------------------------------------------------------------------------- /vignettes/ely_etal_ex2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. This example illustrates running the PLSR permutation by group 3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: default 7 | html_notebook: default 8 | github_document: default 9 | html_document: 10 | df_print: paged 11 | keep_md: true 12 | rmarkdown: html_vignette 13 | vignette: > 14 | %\VignetteIndexEntry{Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory. This example illustrates running the PLSR permutation by group} 15 | %\usepackage[utf8]{inputenc} 16 | %\VignetteEngine{knitr::knitr} 17 | --- 18 | 19 | ```{r setup, include=FALSE, echo=FALSE} 20 | knitr::opts_chunk$set(echo = TRUE) 21 | ``` 22 | 23 | ### Overview 24 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an 25 | internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 26 | and fit a plsr model for leaf nitrogen content (Narea, g/m2) 27 | 28 | ### Getting Started 29 | ### Load libraries 30 | ```{r, eval=TRUE, echo=TRUE} 31 | list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") 32 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 33 | ``` 34 | 35 | ### Setup other functions and options 36 | ```{r, echo=TRUE} 37 | ### Setup options 38 | 39 | # Script options 40 | pls::pls.options(plsralg = "oscorespls") 41 | pls::pls.options("plsralg") 42 | 43 | # Default par options 44 | opar <- par(no.readonly = T) 45 | 46 | # Specify output directory, output_dir 47 | # Options: 48 | # tempdir - use a OS-specified temporary directory 49 | # user defined PATH - e.g. "~/scratch/PLSR" 50 | output_dir <- "tempdir" 51 | ``` 52 | 53 | ### Load internal Ely et al 2019 dataset 54 | ```{r, echo=TRUE} 55 | data("ely_plsr_data") 56 | head(ely_plsr_data)[,1:8] 57 | 58 | # What is the target variable? 59 | inVar <- "N_g_m2" 60 | ``` 61 | 62 | ### Set working directory (scratch space) 63 | ```{r, echo=FALSE} 64 | if (output_dir=="tempdir") { 65 | outdir <- tempdir() 66 | } else { 67 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 68 | outdir <- file.path(path.expand(output_dir)) 69 | } 70 | setwd(outdir) # set working directory 71 | getwd() # check wd 72 | ``` 73 | 74 | ### Full PLSR dataset 75 | ```{r, echo=TRUE} 76 | Start.wave <- 500 77 | End.wave <- 2400 78 | wv <- seq(Start.wave,End.wave,1) 79 | plsr_data <- ely_plsr_data 80 | head(plsr_data)[,1:6] 81 | ``` 82 | ### Create cal/val datasets 83 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 84 | ### Create cal/val datasets 85 | ## Make a stratified random sampling in the strata USDA_Species_Code and Domain 86 | 87 | method <- "base" #base/dplyr 88 | # base R - a bit slow 89 | # dplyr - much faster 90 | split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 91 | split_seed=23452135, prop=0.7, 92 | group_variables="Species_Code") 93 | names(split_data) 94 | cal.plsr.data <- split_data$cal_data 95 | head(cal.plsr.data)[1:8] 96 | val.plsr.data <- split_data$val_data 97 | head(val.plsr.data)[1:8] 98 | rm(split_data) 99 | 100 | # Datasets: 101 | print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) 102 | print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) 103 | 104 | cal_hist_plot <- ggplot(data = cal.plsr.data, 105 | aes(x = cal.plsr.data[,paste0(inVar)])) + 106 | geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 107 | labs(title=paste0("Calibration Histogram for ",inVar), x = paste0(inVar), 108 | y = "Count") 109 | val_hist_plot <- ggplot(data = val.plsr.data, 110 | aes(x = val.plsr.data[,paste0(inVar)])) + 111 | geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 112 | labs(title=paste0("Validation Histogram for ",inVar), x = paste0(inVar), 113 | y = "Count") 114 | histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) 115 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 116 | plot = histograms, 117 | device="png", width = 30, 118 | height = 12, units = "cm", 119 | dpi = 300) 120 | # output cal/val data 121 | write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), 122 | row.names=FALSE) 123 | write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), 124 | row.names=FALSE) 125 | ``` 126 | 127 | ### Create calibration and validation PLSR datasets 128 | ```{r, echo=TRUE} 129 | ### Format PLSR data for model fitting 130 | cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) 131 | cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], 132 | Spectra=I(cal_spec)) 133 | head(cal.plsr.data)[1:5] 134 | 135 | val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) 136 | val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], 137 | Spectra=I(val_spec)) 138 | head(val.plsr.data)[1:5] 139 | ``` 140 | 141 | ### plot cal and val spectra 142 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 143 | par(mfrow=c(1,2)) # B, L, T, R 144 | spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") 145 | spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") 146 | 147 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 148 | height=2500,width=4900, res=340) 149 | dev.off(); 150 | par(mfrow=c(1,1)) 151 | ``` 152 | 153 | ### Use permutation to determine optimal number of components 154 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE} 155 | ### Use permutation to determine the optimal number of components 156 | if(grepl("Windows", sessionInfo()$running)){ 157 | pls.options(parallel = NULL) 158 | } else { 159 | pls.options(parallel = parallel::detectCores()-1) 160 | } 161 | 162 | method <- "firstMin" #firstPlateau, firstMin 163 | random_seed <- 1245565 164 | seg <- 50 165 | maxComps <- 16 166 | iterations <- 80 167 | prop <- 0.70 168 | nComps <- spectratrait::find_optimal_comp_by_groups(dataset=cal.plsr.data, targetVariable=inVar, 169 | method=method, maxComps=maxComps, 170 | iterations=iterations, prop=prop, 171 | random_seed=random_seed, 172 | group_variables="Species_Code") 173 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 174 | height=2800, width=3400, res=340) 175 | dev.off(); 176 | ``` 177 | 178 | ### Fit final model 179 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 180 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", 181 | trace=FALSE,data=cal.plsr.data) 182 | fit <- plsr.out$fitted.values[,1,nComps] 183 | pls.options(parallel = NULL) 184 | 185 | # External validation fit stats 186 | par(mfrow=c(1,2)) # B, L, T, R 187 | pls::RMSEP(plsr.out, newdata = val.plsr.data) 188 | plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", 189 | xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) 190 | box(lwd=2.2) 191 | 192 | pls::R2(plsr.out, newdata = val.plsr.data) 193 | plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", 194 | xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) 195 | box(lwd=2.2) 196 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 197 | height=2800, width=4800, res=340) 198 | dev.off(); 199 | par(opar) 200 | ``` 201 | 202 | ### PLSR fit observed vs. predicted plot data 203 | ```{r, fig.height = 15, fig.width = 15, echo=TRUE} 204 | #calibration 205 | cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], 206 | PLSR_Predicted=fit, 207 | PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) 208 | cal.plsr.output <- cal.plsr.output %>% 209 | mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) 210 | head(cal.plsr.output) 211 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) 212 | cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) 213 | 214 | val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], 215 | PLSR_Predicted=as.vector(predict(plsr.out, 216 | newdata = val.plsr.data, 217 | ncomp=nComps, type="response")[,,1])) 218 | val.plsr.output <- val.plsr.output %>% 219 | mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) 220 | head(val.plsr.output) 221 | val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) 222 | val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) 223 | 224 | rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) 225 | cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 226 | theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 227 | linetype="dashed", linewidth=1.5) + 228 | xlim(rng_quant[1], rng_quant[2]) + 229 | ylim(rng_quant[1], rng_quant[2]) + 230 | labs(x=paste0("Predicted ", paste(inVar), " (units)"), 231 | y=paste0("Observed ", paste(inVar), " (units)"), 232 | title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 233 | cal.RMSEP))) + 234 | theme(axis.text=element_text(size=18), legend.position="none", 235 | axis.title=element_text(size=20, face="bold"), 236 | axis.text.x = element_text(angle = 0,vjust = 0.5), 237 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 238 | 239 | cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + 240 | geom_histogram(alpha=.5, position="identity") + 241 | geom_vline(xintercept = 0, color="black", 242 | linetype="dashed", linewidth=1) + theme_bw() + 243 | theme(axis.text=element_text(size=18), legend.position="none", 244 | axis.title=element_text(size=20, face="bold"), 245 | axis.text.x = element_text(angle = 0,vjust = 0.5), 246 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 247 | 248 | rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) 249 | val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 250 | theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 251 | linetype="dashed", linewidth=1.5) + 252 | xlim(rng_quant[1], rng_quant[2]) + 253 | ylim(rng_quant[1], rng_quant[2]) + 254 | labs(x=paste0("Predicted ", paste(inVar), " (units)"), 255 | y=paste0("Observed ", paste(inVar), " (units)"), 256 | title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 257 | val.RMSEP))) + 258 | theme(axis.text=element_text(size=18), legend.position="none", 259 | axis.title=element_text(size=20, face="bold"), 260 | axis.text.x = element_text(angle = 0,vjust = 0.5), 261 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 262 | 263 | val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + 264 | geom_histogram(alpha=.5, position="identity") + 265 | geom_vline(xintercept = 0, color="black", 266 | linetype="dashed", linewidth=1) + theme_bw() + 267 | theme(axis.text=element_text(size=18), legend.position="none", 268 | axis.title=element_text(size=20, face="bold"), 269 | axis.text.x = element_text(angle = 0,vjust = 0.5), 270 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 271 | 272 | # plot cal/val side-by-side 273 | scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 274 | val_resid_histogram, nrow=2,ncol=2) 275 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 276 | plot = scatterplots, device="png", 277 | width = 32, 278 | height = 30, units = "cm", 279 | dpi = 300) 280 | ``` 281 | 282 | ### Generate Coefficient and VIP plots 283 | ```{r, fig.height = 9, fig.width = 10, echo=TRUE} 284 | vips <- spectratrait::VIP(plsr.out)[nComps,] 285 | par(mfrow=c(2,1)) 286 | plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", 287 | ylab="Regression coefficients",legendpos = "bottomright", 288 | ncomp=nComps,lwd=2) 289 | box(lwd=2.2) 290 | plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) 291 | lines(seq(Start.wave,End.wave,1),vips,lwd=3) 292 | abline(h=0.8,lty=2,col="dark grey") 293 | box(lwd=2.2) 294 | dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 295 | height=3100, width=4100, res=340) 296 | dev.off(); 297 | ``` 298 | 299 | ### Bootstrap validation 300 | ```{r, echo=TRUE} 301 | if(grepl("Windows", sessionInfo()$running)){ 302 | pls.options(parallel =NULL) 303 | } else { 304 | pls.options(parallel = parallel::detectCores()-1) 305 | } 306 | 307 | ### PLSR bootstrap permutation uncertainty analysis 308 | iterations <- 500 # how many permutation iterations to run 309 | prop <- 0.70 # fraction of training data to keep for each iteration 310 | plsr_permutation <- spectratrait::pls_permutation_by_groups(dataset=cal.plsr.data, 311 | targetVariable=inVar, 312 | maxComps=nComps, 313 | iterations=iterations, 314 | prop=prop, group_variables="Species_Code", 315 | verbose=FALSE) 316 | bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] 317 | bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), 318 | ,nComps] 319 | rm(plsr_permutation) 320 | 321 | # apply coefficients to left-out validation data 322 | interval <- c(0.025,0.975) 323 | Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 324 | matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 325 | ncol=length(bootstrap_intercept)) 326 | Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 327 | probs=c(interval[1], interval[2])) 328 | sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) 329 | sd_res <- sd(val.plsr.output$PLSR_Residuals) 330 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 331 | val.plsr.output$LCI <- Interval_Conf[1,] 332 | val.plsr.output$UCI <- Interval_Conf[2,] 333 | val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot 334 | val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot 335 | head(val.plsr.output) 336 | ``` 337 | 338 | ### Jackknife coefficient plot 339 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE} 340 | # Bootstrap regression coefficient plot 341 | spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 342 | plot_label="Bootstrap regression coefficients",position = 'bottomleft') 343 | abline(h=0,lty=2,col="grey50") 344 | box(lwd=2.2) 345 | dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 346 | height=2100, width=3800, res=340) 347 | dev.off(); 348 | ``` 349 | 350 | ### Bootstrap validation plot 351 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE} 352 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 353 | inVar = inVar, 354 | residuals = val.plsr.output$PLSR_Residuals, 355 | range="full") 356 | RMSEP <- rmsep_percrmsep$rmse 357 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 358 | r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2) 359 | expr <- vector("expression", 3) 360 | expr[[1]] <- bquote(R^2==.(r2)) 361 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 362 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 363 | rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) 364 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) 365 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 366 | li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 367 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 368 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 369 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 370 | ylab=paste0("Observed ", paste(inVar), " (units)"), 371 | cex.axis=1.5,cex.lab=1.8) 372 | abline(0,1,lty=2,lw=2) 373 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 374 | li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 375 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 376 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 377 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 378 | ylab=paste0("Observed ", paste(inVar), " (units)"), 379 | cex.axis=1.5,cex.lab=1.8, add=T) 380 | legend("topleft", legend=expr, bty="n", cex=1.5) 381 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 382 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 383 | box(lwd=2.2) 384 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 385 | height=2800, width=3200, res=340) 386 | dev.off(); 387 | ``` 388 | 389 | ### Output bootstrap results 390 | ```{r, echo=TRUE} 391 | # Bootstrap Coefficients 392 | out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), 393 | Intercept=bootstrap_intercept,t(bootstrap_coef)) 394 | names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) 395 | head(out.jk.coefs)[1:6] 396 | write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, 397 | '_Bootstrap_PLSR_Coefficients.csv')), 398 | row.names=FALSE) 399 | ``` 400 | 401 | ### Create core PLSR outputs 402 | ```{r, echo=TRUE} 403 | print(paste("Output directory: ", outdir)) 404 | 405 | # Observed versus predicted 406 | write.csv(cal.plsr.output,file=file.path(outdir, 407 | paste0(inVar,'_Observed_PLSR_CV_Pred_', 408 | nComps,'comp.csv')), 409 | row.names=FALSE) 410 | 411 | # Validation data 412 | write.csv(val.plsr.output,file=file.path(outdir, 413 | paste0(inVar,'_Validation_PLSR_Pred_', 414 | nComps,'comp.csv')), 415 | row.names=FALSE) 416 | 417 | # Model coefficients 418 | coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) 419 | write.csv(coefs,file=file.path(outdir, 420 | paste0(inVar,'_PLSR_Coefficients_', 421 | nComps,'comp.csv')), 422 | row.names=TRUE) 423 | 424 | # PLSR VIP 425 | write.csv(vips,file=file.path(outdir, 426 | paste0(inVar,'_PLSR_VIPs_', 427 | nComps,'comp.csv'))) 428 | ``` 429 | 430 | ### Confirm files were written to temp space 431 | ```{r, echo=TRUE} 432 | print("**** PLSR output files: ") 433 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) 434 | ``` 435 | -------------------------------------------------------------------------------- /vignettes/ely_etal_ex1.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory 3 | author: "Shawn P. Serbin, Julien Lamour, & Jeremiah Anderson" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: default 7 | html_document: 8 | df_print: paged 9 | keep_md: true 10 | github_document: default 11 | html_notebook: default 12 | rmarkdown: html_vignette 13 | vignette: > 14 | %\VignetteIndexEntry{Spectra-trait PLSR example using leaf-level spectra and leaf nitrogen content (Narea, g/m2) data from eight different crop species growing in a glasshouse at Brookhaven National Laboratory} 15 | %\usepackage[utf8]{inputenc} 16 | %\VignetteEngine{knitr::knitr} 17 | --- 18 | 19 | ```{r setup, include=FALSE, echo=FALSE} 20 | knitr::opts_chunk$set(echo = TRUE) 21 | ``` 22 | 23 | ### Overview 24 | This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook to illustrate how to load an 25 | internal dataset ("ely_plsr_data"), choose the "optimal" number of plsr components, 26 | and fit a plsr model for leaf nitrogen content (Narea, g/m2) 27 | 28 | ### Getting Started 29 | ### Load libraries 30 | ```{r, eval=TRUE, echo=TRUE} 31 | list.of.packages <- c("pls","dplyr","here","plotrix","ggplot2","gridExtra","spectratrait") 32 | invisible(lapply(list.of.packages, library, character.only = TRUE)) 33 | ``` 34 | 35 | ### Setup other functions and options 36 | ```{r, echo=TRUE} 37 | ### Setup options 38 | 39 | # Script options 40 | pls::pls.options(plsralg = "oscorespls") 41 | pls::pls.options("plsralg") 42 | 43 | # Default par options 44 | opar <- par(no.readonly = T) 45 | 46 | # Specify output directory, output_dir 47 | # Options: 48 | # tempdir - use a OS-specified temporary directory 49 | # user defined PATH - e.g. "~/scratch/PLSR" 50 | output_dir <- "tempdir" 51 | ``` 52 | 53 | ### Load internal Ely et al 2019 dataset 54 | ```{r, echo=TRUE} 55 | data("ely_plsr_data") 56 | head(ely_plsr_data)[,1:8] 57 | 58 | # What is the target variable? 59 | inVar <- "N_g_m2" 60 | ``` 61 | 62 | ### Set working directory (scratch space) 63 | ```{r, echo=FALSE} 64 | if (output_dir=="tempdir") { 65 | outdir <- tempdir() 66 | } else { 67 | if (! file.exists(output_dir)) dir.create(output_dir,recursive=TRUE) 68 | outdir <- file.path(path.expand(output_dir)) 69 | } 70 | setwd(outdir) # set working directory 71 | getwd() # check wd 72 | ``` 73 | 74 | ### Full PLSR dataset 75 | ```{r, echo=TRUE} 76 | Start.wave <- 500 77 | End.wave <- 2400 78 | wv <- seq(Start.wave,End.wave,1) 79 | plsr_data <- ely_plsr_data 80 | head(plsr_data)[,1:6] 81 | ``` 82 | ### Create cal/val datasets 83 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 84 | ### Create cal/val datasets 85 | ## Make a stratified random sampling in the strata USDA_Species_Code and Domain 86 | 87 | method <- "base" #base/dplyr 88 | # base R - a bit slow 89 | # dplyr - much faster 90 | split_data <- spectratrait::create_data_split(dataset=plsr_data, approach=method, 91 | split_seed=23452135, prop=0.7, 92 | group_variables="Species_Code") 93 | names(split_data) 94 | cal.plsr.data <- split_data$cal_data 95 | head(cal.plsr.data)[1:8] 96 | val.plsr.data <- split_data$val_data 97 | head(val.plsr.data)[1:8] 98 | rm(split_data) 99 | 100 | # Datasets: 101 | print(paste("Cal observations: ",dim(cal.plsr.data)[1],sep="")) 102 | print(paste("Val observations: ",dim(val.plsr.data)[1],sep="")) 103 | 104 | cal_hist_plot <- ggplot(data = cal.plsr.data, 105 | aes(x = cal.plsr.data[,paste0(inVar)])) + 106 | geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 107 | labs(title=paste0("Calibration Histogram for ",inVar), x = paste0(inVar), 108 | y = "Count") 109 | val_hist_plot <- ggplot(data = val.plsr.data, 110 | aes(x = val.plsr.data[,paste0(inVar)])) + 111 | geom_histogram(fill=I("grey50"),col=I("black"),alpha=I(.7)) + 112 | labs(title=paste0("Validation Histogram for ",inVar), x = paste0(inVar), 113 | y = "Count") 114 | histograms <- grid.arrange(cal_hist_plot, val_hist_plot, ncol=2) 115 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Histograms.png")), 116 | plot = histograms, 117 | device="png", width = 30, 118 | height = 12, units = "cm", 119 | dpi = 300) 120 | # output cal/val data 121 | write.csv(cal.plsr.data,file=file.path(outdir,paste0(inVar,'_Cal_PLSR_Dataset.csv')), 122 | row.names=FALSE) 123 | write.csv(val.plsr.data,file=file.path(outdir,paste0(inVar,'_Val_PLSR_Dataset.csv')), 124 | row.names=FALSE) 125 | ``` 126 | 127 | ### Create calibration and validation PLSR datasets 128 | ```{r, echo=TRUE} 129 | ### Format PLSR data for model fitting 130 | cal_spec <- as.matrix(cal.plsr.data[, which(names(cal.plsr.data) %in% paste0("Wave_",wv))]) 131 | cal.plsr.data <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% paste0("Wave_",wv))], 132 | Spectra=I(cal_spec)) 133 | head(cal.plsr.data)[1:5] 134 | 135 | val_spec <- as.matrix(val.plsr.data[, which(names(val.plsr.data) %in% paste0("Wave_",wv))]) 136 | val.plsr.data <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% paste0("Wave_",wv))], 137 | Spectra=I(val_spec)) 138 | head(val.plsr.data)[1:5] 139 | ``` 140 | 141 | ### plot cal and val spectra 142 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 143 | par(mfrow=c(1,2)) # B, L, T, R 144 | spectratrait::f.plot.spec(Z=cal.plsr.data$Spectra,wv=wv,plot_label="Calibration") 145 | spectratrait::f.plot.spec(Z=val.plsr.data$Spectra,wv=wv,plot_label="Validation") 146 | 147 | dev.copy(png,file.path(outdir,paste0(inVar,'_Cal_Val_Spectra.png')), 148 | height=2500,width=4900, res=340) 149 | dev.off(); 150 | par(mfrow=c(1,1)) 151 | ``` 152 | 153 | ### Use permutation to determine optimal number of components 154 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE} 155 | ### Use permutation to determine the optimal number of components 156 | if(grepl("Windows", sessionInfo()$running)){ 157 | pls.options(parallel = NULL) 158 | } else { 159 | pls.options(parallel = parallel::detectCores()-1) 160 | } 161 | 162 | method <- "pls" #pls, firstPlateau, firstMin 163 | random_seed <- 1245565 164 | seg <- 50 165 | maxComps <- 16 166 | iterations <- 80 167 | prop <- 0.70 168 | if (method=="pls") { 169 | nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 170 | method=method, 171 | maxComps=maxComps, seg=seg, 172 | random_seed=random_seed) 173 | print(paste0("*** Optimal number of components: ", nComps)) 174 | } else { 175 | nComps <- spectratrait::find_optimal_components(dataset=cal.plsr.data, targetVariable=inVar, 176 | method=method, 177 | maxComps=maxComps, iterations=iterations, 178 | seg=seg, prop=prop, 179 | random_seed=random_seed) 180 | } 181 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_PLSR_Component_Selection.png"))), 182 | height=2800, width=3400, res=340) 183 | dev.off(); 184 | ``` 185 | 186 | ### Fit final model 187 | ```{r, fig.height = 5, fig.width = 12, echo=TRUE} 188 | plsr.out <- plsr(as.formula(paste(inVar,"~","Spectra")),scale=FALSE,ncomp=nComps,validation="LOO", 189 | trace=FALSE,data=cal.plsr.data) 190 | fit <- plsr.out$fitted.values[,1,nComps] 191 | pls.options(parallel = NULL) 192 | 193 | # External validation fit stats 194 | par(mfrow=c(1,2)) # B, L, T, R 195 | pls::RMSEP(plsr.out, newdata = val.plsr.data) 196 | plot(pls::RMSEP(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL RMSEP", 197 | xlab="Number of Components",ylab="Model Validation RMSEP",lty=1,col="black",cex=1.5,lwd=2) 198 | box(lwd=2.2) 199 | 200 | pls::R2(plsr.out, newdata = val.plsr.data) 201 | plot(pls::R2(plsr.out,estimate=c("test"),newdata = val.plsr.data), main="MODEL R2", 202 | xlab="Number of Components",ylab="Model Validation R2",lty=1,col="black",cex=1.5,lwd=2) 203 | box(lwd=2.2) 204 | dev.copy(png,file.path(outdir,paste0(paste0(inVar,"_Validation_RMSEP_R2_by_Component.png"))), 205 | height=2800, width=4800, res=340) 206 | dev.off(); 207 | par(opar) 208 | ``` 209 | 210 | ### PLSR fit observed vs. predicted plot data 211 | ```{r, fig.height = 15, fig.width = 15, echo=TRUE} 212 | #calibration 213 | cal.plsr.output <- data.frame(cal.plsr.data[, which(names(cal.plsr.data) %notin% "Spectra")], 214 | PLSR_Predicted=fit, 215 | PLSR_CV_Predicted=as.vector(plsr.out$validation$pred[,,nComps])) 216 | cal.plsr.output <- cal.plsr.output %>% 217 | mutate(PLSR_CV_Residuals = PLSR_CV_Predicted-get(inVar)) 218 | head(cal.plsr.output) 219 | cal.R2 <- round(pls::R2(plsr.out,intercept=F)[[1]][nComps],2) 220 | cal.RMSEP <- round(sqrt(mean(cal.plsr.output$PLSR_CV_Residuals^2)),2) 221 | 222 | val.plsr.output <- data.frame(val.plsr.data[, which(names(val.plsr.data) %notin% "Spectra")], 223 | PLSR_Predicted=as.vector(predict(plsr.out, 224 | newdata = val.plsr.data, 225 | ncomp=nComps, type="response")[,,1])) 226 | val.plsr.output <- val.plsr.output %>% 227 | mutate(PLSR_Residuals = PLSR_Predicted-get(inVar)) 228 | head(val.plsr.output) 229 | val.R2 <- round(pls::R2(plsr.out,newdata=val.plsr.data,intercept=F)[[1]][nComps],2) 230 | val.RMSEP <- round(sqrt(mean(val.plsr.output$PLSR_Residuals^2)),2) 231 | 232 | rng_quant <- quantile(cal.plsr.output[,inVar], probs = c(0.001, 0.999)) 233 | cal_scatter_plot <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Predicted, y=get(inVar))) + 234 | theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 235 | linetype="dashed", linewidth=1.5) + 236 | xlim(rng_quant[1], rng_quant[2]) + 237 | ylim(rng_quant[1], rng_quant[2]) + 238 | labs(x=paste0("Predicted ", paste(inVar), " (units)"), 239 | y=paste0("Observed ", paste(inVar), " (units)"), 240 | title=paste0("Calibration: ", paste0("Rsq = ", cal.R2), "; ", paste0("RMSEP = ", 241 | cal.RMSEP))) + 242 | theme(axis.text=element_text(size=18), legend.position="none", 243 | axis.title=element_text(size=20, face="bold"), 244 | axis.text.x = element_text(angle = 0,vjust = 0.5), 245 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 246 | 247 | cal_resid_histogram <- ggplot(cal.plsr.output, aes(x=PLSR_CV_Residuals)) + 248 | geom_histogram(alpha=.5, position="identity") + 249 | geom_vline(xintercept = 0, color="black", 250 | linetype="dashed", linewidth=1) + theme_bw() + 251 | theme(axis.text=element_text(size=18), legend.position="none", 252 | axis.title=element_text(size=20, face="bold"), 253 | axis.text.x = element_text(angle = 0,vjust = 0.5), 254 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 255 | 256 | rng_quant <- quantile(val.plsr.output[,inVar], probs = c(0.001, 0.999)) 257 | val_scatter_plot <- ggplot(val.plsr.output, aes(x=PLSR_Predicted, y=get(inVar))) + 258 | theme_bw() + geom_point() + geom_abline(intercept = 0, slope = 1, color="dark grey", 259 | linetype="dashed", linewidth=1.5) + 260 | xlim(rng_quant[1], rng_quant[2]) + 261 | ylim(rng_quant[1], rng_quant[2]) + 262 | labs(x=paste0("Predicted ", paste(inVar), " (units)"), 263 | y=paste0("Observed ", paste(inVar), " (units)"), 264 | title=paste0("Validation: ", paste0("Rsq = ", val.R2), "; ", paste0("RMSEP = ", 265 | val.RMSEP))) + 266 | theme(axis.text=element_text(size=18), legend.position="none", 267 | axis.title=element_text(size=20, face="bold"), 268 | axis.text.x = element_text(angle = 0,vjust = 0.5), 269 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 270 | 271 | val_resid_histogram <- ggplot(val.plsr.output, aes(x=PLSR_Residuals)) + 272 | geom_histogram(alpha=.5, position="identity") + 273 | geom_vline(xintercept = 0, color="black", 274 | linetype="dashed", linewidth=1) + theme_bw() + 275 | theme(axis.text=element_text(size=18), legend.position="none", 276 | axis.title=element_text(size=20, face="bold"), 277 | axis.text.x = element_text(angle = 0,vjust = 0.5), 278 | panel.border = element_rect(linetype = "solid", fill = NA, linewidth=1.5)) 279 | 280 | # plot cal/val side-by-side 281 | scatterplots <- grid.arrange(cal_scatter_plot, val_scatter_plot, cal_resid_histogram, 282 | val_resid_histogram, nrow=2,ncol=2) 283 | ggsave(filename = file.path(outdir,paste0(inVar,"_Cal_Val_Scatterplots.png")), 284 | plot = scatterplots, device="png", 285 | width = 32, 286 | height = 30, units = "cm", 287 | dpi = 300) 288 | ``` 289 | 290 | ### Generate Coefficient and VIP plots 291 | ```{r, fig.height = 9, fig.width = 10, echo=TRUE} 292 | vips <- spectratrait::VIP(plsr.out)[nComps,] 293 | par(mfrow=c(2,1)) 294 | plot(plsr.out, plottype = "coef",xlab="Wavelength (nm)", 295 | ylab="Regression coefficients",legendpos = "bottomright", 296 | ncomp=nComps,lwd=2) 297 | box(lwd=2.2) 298 | plot(seq(Start.wave,End.wave,1),vips,xlab="Wavelength (nm)",ylab="VIP",cex=0.01) 299 | lines(seq(Start.wave,End.wave,1),vips,lwd=3) 300 | abline(h=0.8,lty=2,col="dark grey") 301 | box(lwd=2.2) 302 | dev.copy(png,file.path(outdir,paste0(inVar,'_Coefficient_VIP_plot.png')), 303 | height=3100, width=4100, res=340) 304 | dev.off(); 305 | ``` 306 | 307 | ### Bootstrap validation 308 | ```{r, echo=TRUE} 309 | if(grepl("Windows", sessionInfo()$running)){ 310 | pls.options(parallel =NULL) 311 | } else { 312 | pls.options(parallel = parallel::detectCores()-1) 313 | } 314 | 315 | ### PLSR bootstrap permutation uncertainty analysis 316 | iterations <- 500 # how many permutation iterations to run 317 | prop <- 0.70 # fraction of training data to keep for each iteration 318 | plsr_permutation <- spectratrait::pls_permutation(dataset=cal.plsr.data, targetVariable=inVar, 319 | maxComps=nComps, 320 | iterations=iterations, prop=prop, 321 | verbose = FALSE) 322 | bootstrap_intercept <- plsr_permutation$coef_array[1,,nComps] 323 | bootstrap_coef <- plsr_permutation$coef_array[2:length(plsr_permutation$coef_array[,1,nComps]), 324 | ,nComps] 325 | rm(plsr_permutation) 326 | 327 | # apply coefficients to left-out validation data 328 | interval <- c(0.025,0.975) 329 | Bootstrap_Pred <- val.plsr.data$Spectra %*% bootstrap_coef + 330 | matrix(rep(bootstrap_intercept, length(val.plsr.data[,inVar])), byrow=TRUE, 331 | ncol=length(bootstrap_intercept)) 332 | Interval_Conf <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = quantile, 333 | probs=c(interval[1], interval[2])) 334 | sd_mean <- apply(X = Bootstrap_Pred, MARGIN = 1, FUN = sd) 335 | sd_res <- sd(val.plsr.output$PLSR_Residuals) 336 | sd_tot <- sqrt(sd_mean^2+sd_res^2) 337 | val.plsr.output$LCI <- Interval_Conf[1,] 338 | val.plsr.output$UCI <- Interval_Conf[2,] 339 | val.plsr.output$LPI <- val.plsr.output$PLSR_Predicted-1.96*sd_tot 340 | val.plsr.output$UPI <- val.plsr.output$PLSR_Predicted+1.96*sd_tot 341 | head(val.plsr.output) 342 | ``` 343 | 344 | ### Jackknife coefficient plot 345 | ```{r, fig.height = 6, fig.width = 10, echo=TRUE} 346 | # Bootstrap regression coefficient plot 347 | spectratrait::f.plot.coef(Z = t(bootstrap_coef), wv = wv, 348 | plot_label="Bootstrap regression coefficients",position = 'bottomleft') 349 | abline(h=0,lty=2,col="grey50") 350 | box(lwd=2.2) 351 | dev.copy(png,file.path(outdir,paste0(inVar,'_Bootstrap_Regression_Coefficients.png')), 352 | height=2100, width=3800, res=340) 353 | dev.off(); 354 | ``` 355 | 356 | ### Bootstrap validation plot 357 | ```{r, fig.height = 7, fig.width = 8, echo=TRUE} 358 | rmsep_percrmsep <- spectratrait::percent_rmse(plsr_dataset = val.plsr.output, 359 | inVar = inVar, 360 | residuals = val.plsr.output$PLSR_Residuals, 361 | range="full") 362 | RMSEP <- rmsep_percrmsep$rmse 363 | perc_RMSEP <- rmsep_percrmsep$perc_rmse 364 | r2 <- round(pls::R2(plsr.out, newdata = val.plsr.data, intercept=F)$val[nComps],2) 365 | expr <- vector("expression", 3) 366 | expr[[1]] <- bquote(R^2==.(r2)) 367 | expr[[2]] <- bquote(RMSEP==.(round(RMSEP,2))) 368 | expr[[3]] <- bquote("%RMSEP"==.(round(perc_RMSEP,2))) 369 | rng_vals <- c(min(val.plsr.output$LPI), max(val.plsr.output$UPI)) 370 | par(mfrow=c(1,1), mar=c(4.2,5.3,1,0.4), oma=c(0, 0.1, 0, 0.2)) 371 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 372 | li=val.plsr.output$LPI, ui=val.plsr.output$UPI, gap=0.009,sfrac=0.000, 373 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 374 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="grey80", 375 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 376 | ylab=paste0("Observed ", paste(inVar), " (units)"), 377 | cex.axis=1.5,cex.lab=1.8) 378 | abline(0,1,lty=2,lw=2) 379 | plotrix::plotCI(val.plsr.output$PLSR_Predicted,val.plsr.output[,inVar], 380 | li=val.plsr.output$LCI, ui=val.plsr.output$UCI, gap=0.009,sfrac=0.004, 381 | lwd=1.6, xlim=c(rng_vals[1], rng_vals[2]), ylim=c(rng_vals[1], rng_vals[2]), 382 | err="x", pch=21, col="black", pt.bg=scales::alpha("grey70",0.7), scol="black", 383 | cex=2, xlab=paste0("Predicted ", paste(inVar), " (units)"), 384 | ylab=paste0("Observed ", paste(inVar), " (units)"), 385 | cex.axis=1.5,cex.lab=1.8, add=T) 386 | legend("topleft", legend=expr, bty="n", cex=1.5) 387 | legend("bottomright", legend=c("Prediction Interval","Confidence Interval"), 388 | lty=c(1,1), col = c("grey80","black"), lwd=3, bty="n", cex=1.5) 389 | box(lwd=2.2) 390 | dev.copy(png,file.path(outdir,paste0(inVar,"_PLSR_Validation_Scatterplot.png")), 391 | height=2800, width=3200, res=340) 392 | dev.off(); 393 | ``` 394 | 395 | ### Output bootstrap results 396 | ```{r, echo=TRUE} 397 | # Bootstrap Coefficients 398 | out.jk.coefs <- data.frame(Iteration=seq(1,length(bootstrap_intercept),1), 399 | Intercept=bootstrap_intercept,t(bootstrap_coef)) 400 | names(out.jk.coefs) <- c("Iteration","Intercept",paste0("Wave_",wv)) 401 | head(out.jk.coefs)[1:6] 402 | write.csv(out.jk.coefs,file=file.path(outdir,paste0(inVar, 403 | '_Bootstrap_PLSR_Coefficients.csv')), 404 | row.names=FALSE) 405 | ``` 406 | 407 | ### Create core PLSR outputs 408 | ```{r, echo=TRUE} 409 | print(paste("Output directory: ", outdir)) 410 | 411 | # Observed versus predicted 412 | write.csv(cal.plsr.output,file=file.path(outdir, 413 | paste0(inVar,'_Observed_PLSR_CV_Pred_', 414 | nComps,'comp.csv')), 415 | row.names=FALSE) 416 | 417 | # Validation data 418 | write.csv(val.plsr.output,file=file.path(outdir, 419 | paste0(inVar,'_Validation_PLSR_Pred_', 420 | nComps,'comp.csv')), 421 | row.names=FALSE) 422 | 423 | # Model coefficients 424 | coefs <- coef(plsr.out,ncomp=nComps,intercept=TRUE) 425 | write.csv(coefs,file=file.path(outdir, 426 | paste0(inVar,'_PLSR_Coefficients_', 427 | nComps,'comp.csv')), 428 | row.names=TRUE) 429 | 430 | # PLSR VIP 431 | write.csv(vips,file=file.path(outdir, 432 | paste0(inVar,'_PLSR_VIPs_', 433 | nComps,'comp.csv'))) 434 | ``` 435 | 436 | ### Confirm files were written to temp space 437 | ```{r, echo=TRUE} 438 | print("**** PLSR output files: ") 439 | print(list.files(outdir)[grep(pattern = inVar, list.files(outdir))]) 440 | ``` 441 | --------------------------------------------------------------------------------