├── src ├── .gitignore ├── trap_roc.cpp └── RcppExports.cpp ├── .gitignore ├── Structure.png ├── Meta └── vignette.rds ├── data ├── sp_joint.rda ├── sp_test.rda └── sp_train.rda ├── .Rbuildignore ├── inst ├── extdata │ ├── Gbio_1.tif │ ├── Gbio_12.tif │ ├── Gbio_15.tif │ ├── Gbio_17.tif │ ├── Mbio_1.tif │ ├── Mbio_12.tif │ ├── Mbio_15.tif │ ├── Mbio_17.tif │ ├── sp_model.tif │ ├── sp_model_joint.tif │ ├── lambdas_model_joint.lambdas │ └── Rmd_calibration.Rmd └── CITATION ├── extra_vignettes ├── Structure_variation.png └── post-modeling.Rmd ├── R ├── RcppExports.R ├── plot_out.R ├── html_calibration.R ├── kuenm_start.R ├── kuenm.R ├── kuenm_omrat.R ├── kuenm_aicc.R ├── kuenm_occsplit.R ├── to_closest.R ├── kuenm_hierpart.R ├── model_var_contrib.R ├── data_documentation.R ├── kuenm_varcomb.R ├── kuenm_mop.R ├── kuenm_mmop.R ├── result_description.R ├── kuenm_proc.R └── kuenm_feval_swd.R ├── ku.enm.Rproj ├── man ├── wait_written_done.Rd ├── ext_type.Rd ├── n_par.Rd ├── plot_proc_aicc.Rd ├── all_var_comb.Rd ├── run_maxent.Rd ├── sp_joint.Rd ├── sp_test.Rd ├── sp_train.Rd ├── kuenm_start.Rd ├── sp_mod_joint.Rd ├── sp_model.Rd ├── sp_lambdas.Rd ├── result_description.Rd ├── plot_out.Rd ├── mvars.Rd ├── gvars.Rd ├── html_calibration.Rd ├── occ_randsplit.Rd ├── or.Rd ├── aicc.Rd ├── feature_classes.Rd ├── summary_calibration.Rd ├── var_models.Rd ├── prep_independent_swd.Rd ├── kuenm.Rd ├── kuenm_omrat.Rd ├── model_changes.Rd ├── kuenm_aicc.Rd ├── kuenm_occsplit.Rd ├── proc_or_aicc.Rd ├── kuenm_feval_swd.Rd ├── kuenm_varcomb.Rd ├── kuenm_proc.Rd ├── model_var_contrib.Rd ├── kuenm_toclosest.Rd ├── kuenm_feval.Rd ├── kuenm_mop.Rd ├── kuenm_mopagree.Rd ├── kuenm_mmop.Rd ├── kuenm_rpca.Rd ├── prepare_swd.Rd ├── kuenm_projchanges.Rd ├── kuenm_modstats_swd.Rd ├── kuenm_modstats.Rd ├── kuenm_ceval.Rd ├── kuenm_hierpart.Rd └── kuenm_modvar.Rd ├── ecography.csl ├── NAMESPACE ├── DESCRIPTION └── replicate_examples ├── pemp_complete_process.md └── aame_complete_process.md /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /Structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/Structure.png -------------------------------------------------------------------------------- /Meta/vignette.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/Meta/vignette.rds -------------------------------------------------------------------------------- /data/sp_joint.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/data/sp_joint.rda -------------------------------------------------------------------------------- /data/sp_test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/data/sp_test.rda -------------------------------------------------------------------------------- /data/sp_train.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/data/sp_train.rda -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^extra_vignettes$ 3 | ^data-raw$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | -------------------------------------------------------------------------------- /inst/extdata/Gbio_1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Gbio_1.tif -------------------------------------------------------------------------------- /inst/extdata/Gbio_12.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Gbio_12.tif -------------------------------------------------------------------------------- /inst/extdata/Gbio_15.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Gbio_15.tif -------------------------------------------------------------------------------- /inst/extdata/Gbio_17.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Gbio_17.tif -------------------------------------------------------------------------------- /inst/extdata/Mbio_1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Mbio_1.tif -------------------------------------------------------------------------------- /inst/extdata/Mbio_12.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Mbio_12.tif -------------------------------------------------------------------------------- /inst/extdata/Mbio_15.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Mbio_15.tif -------------------------------------------------------------------------------- /inst/extdata/Mbio_17.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/Mbio_17.tif -------------------------------------------------------------------------------- /inst/extdata/sp_model.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/sp_model.tif -------------------------------------------------------------------------------- /inst/extdata/sp_model_joint.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/inst/extdata/sp_model_joint.tif -------------------------------------------------------------------------------- /extra_vignettes/Structure_variation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/marlonecobos/kuenm/HEAD/extra_vignettes/Structure_variation.png -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | trap_roc <- function(x, y) { 5 | .Call('_kuenm_trap_roc', PACKAGE = 'kuenm', x, y) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /ku.enm.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /man/wait_written_done.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{wait_written_done} 4 | \alias{wait_written_done} 5 | \title{Helper function to wait until a file writing is done} 6 | \usage{ 7 | wait_written_done(file) 8 | } 9 | \arguments{ 10 | \item{file}{(character) name of the file of interest.} 11 | } 12 | \description{ 13 | Helper function to wait until a file writing is done 14 | } 15 | -------------------------------------------------------------------------------- /man/ext_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{ext_type} 4 | \alias{ext_type} 5 | \title{Helper function to select extrapolation options} 6 | \usage{ 7 | ext_type(ext.type = "all") 8 | } 9 | \arguments{ 10 | \item{ext.type}{(character) extrapolation type to be used. Options are: 11 | "all", "ext_clam", "ext", and "no_ext". Default = "all".} 12 | } 13 | \description{ 14 | Helper function to select extrapolation options 15 | } 16 | -------------------------------------------------------------------------------- /man/n_par.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{n_par} 4 | \alias{n_par} 5 | \title{Helper function to calculate the AICc values (number of parameters).} 6 | \usage{ 7 | n_par(x) 8 | } 9 | \arguments{ 10 | \item{x}{An object derived from reading the lambdas file created for Maxent. 11 | Use \code{\link[base]{readLines}} function to read the file.} 12 | } 13 | \description{ 14 | Helper function to calculate the AICc values (number of parameters). 15 | } 16 | -------------------------------------------------------------------------------- /man/plot_proc_aicc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{plot_proc_aicc} 4 | \alias{plot_proc_aicc} 5 | \title{Helper function to plot omission rate and AICc results} 6 | \usage{ 7 | plot_proc_aicc(summary.calibration) 8 | } 9 | \arguments{ 10 | \item{summary.calibration}{data.frame containing the summary of all metrics 11 | calculated for all models during calibration.} 12 | } 13 | \description{ 14 | Helper function to plot omission rate and AICc results 15 | } 16 | -------------------------------------------------------------------------------- /man/all_var_comb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prepare_swd.R 3 | \name{all_var_comb} 4 | \alias{all_var_comb} 5 | \title{Helper to create all variable combinations} 6 | \usage{ 7 | all_var_comb(var.names, min.number = 2) 8 | } 9 | \arguments{ 10 | \item{var.names}{(character) vector of variable names} 11 | 12 | \item{min.number}{(numeric) minimum number of variables per set.} 13 | } 14 | \value{ 15 | A list of vectors containing variable names per set. 16 | } 17 | \description{ 18 | Helper to create all variable combinations 19 | } 20 | -------------------------------------------------------------------------------- /src/trap_roc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | 5 | // Trapezoidal integration for Partial ROC curve 6 | // @param x is the fractional area predicted presence 7 | // @param sensibility sensibility at threshold (fract_area value). 8 | // @return Returns the area under the Partial ROC curve. 9 | // [[Rcpp::export]] 10 | 11 | double trap_roc(NumericVector x, NumericVector y) { 12 | int x_s = x.size(); 13 | int y_s = y.size(); 14 | if(x_s != y_s) 15 | ::Rf_error("x and y must have the same length x_size (%d) != y_size (%d)", x_s, y_s); 16 | double auc = 0; 17 | 18 | for(int i = 1; i < x_s; ++i) { 19 | auc += 0.5*(y[i-1] + y[i])*(x[i]-x[i-1]); 20 | } 21 | return auc; 22 | } 23 | -------------------------------------------------------------------------------- /man/run_maxent.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{run_maxent} 4 | \alias{run_maxent} 5 | \title{Helper function to run maxent.jar from R} 6 | \usage{ 7 | run_maxent(batch, maxent.path, add_path = TRUE, wait = FALSE) 8 | } 9 | \arguments{ 10 | \item{batch}{(character) name of the batch file (bash for Unix) with the code 11 | to create all candidate models.} 12 | 13 | \item{maxent.path}{(character) the path were the maxent.jar file is in your 14 | computer.} 15 | 16 | \item{add_path}{(logical) whether to add full path to \code{batch}.} 17 | 18 | \item{wait}{(logical) whether R waits until the running is done or not.} 19 | } 20 | \description{ 21 | Helper function to run maxent.jar from R 22 | } 23 | -------------------------------------------------------------------------------- /man/sp_joint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_joint} 4 | \alias{sp_joint} 5 | \title{A set of occurrence records for ecological niche models} 6 | \format{ 7 | A data frame with 178 rows and 2 columns. 8 | \describe{ 9 | \item{Longitude}{longitude, in decimal degrees.} 10 | \item{Latitude}{latitude, in decimal degrees.} 11 | } 12 | } 13 | \source{ 14 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | } 16 | \description{ 17 | A data.frame containing occurrence records of a tick (\emph{Amblyomma americanum}) 18 | across North America. The data combines records for training and testing. 19 | } 20 | \examples{ 21 | data("sp_joint", package = "kuenm") 22 | 23 | head(sp_joint) 24 | } 25 | -------------------------------------------------------------------------------- /man/sp_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_test} 4 | \alias{sp_test} 5 | \title{A set of occurrence records to test candidate ecological niche models} 6 | \format{ 7 | A data frame with 89 rows and 2 columns. 8 | \describe{ 9 | \item{Longitude}{longitude, in decimal degrees.} 10 | \item{Latitude}{latitude, in decimal degrees.} 11 | } 12 | } 13 | \source{ 14 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | } 16 | \description{ 17 | A data.frame containing occurrence records of a tick (\emph{Amblyomma americanum}) 18 | in North America, used to test candidate models during calibration. 19 | } 20 | \examples{ 21 | data("sp_test", package = "kuenm") 22 | 23 | head(sp_test) 24 | } 25 | -------------------------------------------------------------------------------- /man/sp_train.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_train} 4 | \alias{sp_train} 5 | \title{A set of occurrence records for training candidate ecological niche models} 6 | \format{ 7 | A data frame with 89 rows and 2 columns. 8 | \describe{ 9 | \item{Longitude}{longitude, in decimal degrees.} 10 | \item{Latitude}{latitude, in decimal degrees.} 11 | } 12 | } 13 | \source{ 14 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | } 16 | \description{ 17 | A data.frame containing occurrence records of a tick (\emph{Amblyomma americanum}) 18 | across North America, used to train candidate models during calibration. 19 | } 20 | \examples{ 21 | data("sp_train", package = "kuenm") 22 | 23 | head(sp_train) 24 | } 25 | -------------------------------------------------------------------------------- /man/kuenm_start.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_start.R 3 | \name{kuenm_start} 4 | \alias{kuenm_start} 5 | \alias{kuenm_start_swd} 6 | \title{Creation of an R markdown file for recording all analyses} 7 | \usage{ 8 | kuenm_start(file.name) 9 | 10 | kuenm_start_swd(file.name) 11 | } 12 | \arguments{ 13 | \item{file.name}{(character) is the name of the R markdown file that will be 14 | produced in your working directory. Extension is not needed} 15 | } 16 | \value{ 17 | An R markdown file with instructions and code for performing all 18 | analyses included in this package. 19 | } 20 | \description{ 21 | Generate an R markdown file that serves as a guide for 22 | performing most of the analyses included in this package. 23 | } 24 | \examples{ 25 | kuenm_start(file.name = tempfile()) 26 | } 27 | -------------------------------------------------------------------------------- /man/sp_mod_joint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_mod_joint} 4 | \alias{sp_mod_joint} 5 | \title{A raster output of an ecological niche model created with Maxent (raw)} 6 | \format{ 7 | A RasterLayer with 150 rows, 249 columns, and 37350 cells: 8 | \describe{ 9 | \item{Suitability}{suitability values.} 10 | } 11 | } 12 | \source{ 13 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 14 | } 15 | \description{ 16 | A RasterLayer containing an ecological niche model for the a tick 17 | (\emph{Amblyomma americanum}) that was created with all occurrences. 18 | } 19 | \examples{ 20 | sp_model_joint <- raster::raster(system.file("extdata/sp_model_joint.tif", 21 | package = "kuenm")) 22 | 23 | summary(sp_model_joint) 24 | } 25 | -------------------------------------------------------------------------------- /man/sp_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_model} 4 | \alias{sp_model} 5 | \title{A raster output of an ecological niche model created with Maxent (logistic)} 6 | \format{ 7 | A RasterLayer with 150 rows, 249 columns, and 37350 cells: 8 | \describe{ 9 | \item{Suitability}{suitability values.} 10 | } 11 | } 12 | \source{ 13 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 14 | } 15 | \description{ 16 | A RasterLayer containing an ecological niche model for the tick 17 | (\emph{Amblyomma americanum}) that was created as part of the candidate models 18 | during a calibration process. 19 | } 20 | \examples{ 21 | sp_model <- raster::raster(system.file("extdata/sp_model.tif", 22 | package = "kuenm")) 23 | 24 | summary(sp_model) 25 | } 26 | -------------------------------------------------------------------------------- /man/sp_lambdas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{sp_lambdas} 4 | \alias{sp_lambdas} 5 | \title{A lambdas file resulted from a modeling process in Maxent} 6 | \format{ 7 | A lambdas file. 8 | \describe{ 9 | \item{parameters}{number of parameters in the Maxent model.} 10 | } 11 | } 12 | \source{ 13 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 14 | } 15 | \description{ 16 | A lambdas file resulted from a model created in Maxent with raw output for 17 | \emph{Amblyomma americanum} in North America. This file is used to calculate number 18 | of parameters in the model, which is needed while calculating AICc values. 19 | } 20 | \examples{ 21 | lbds <- readLines(system.file("extdata/lambdas_model_joint.lambdas", 22 | package = "kuenm")) 23 | 24 | head(lbds) 25 | } 26 | -------------------------------------------------------------------------------- /man/result_description.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/result_description.R 3 | \name{result_description} 4 | \alias{result_description} 5 | \title{Helper function to describe results from complex functions} 6 | \usage{ 7 | result_description(process, result.table = NULL, out.dir) 8 | } 9 | \arguments{ 10 | \item{process}{(character) name of the function which results derive from. 11 | Options include: "kuenm_projchanges", "kuenm_modvar", "kuenm_hierpart", 12 | "kuenm_mmop", and "kuenm_mopagree".} 13 | 14 | \item{result.table}{(data.frame) data.frame with the description of results. 15 | Only used when \code{process} = "kuenm_projchanges" or "kuenm_mopagree". 16 | Default = NULL.} 17 | 18 | \item{out.dir}{(character) name of the output directory where description 19 | file will be written.} 20 | } 21 | \description{ 22 | Helper function to describe results from complex functions 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_out.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_out.R 3 | \name{plot_out} 4 | \alias{plot_out} 5 | \title{Helper function for detecting values out of the environmental range of M} 6 | \usage{ 7 | plot_out(M1, G1) 8 | } 9 | \arguments{ 10 | \item{M1}{a numeric matrix containing values of all environmental variables in the calibration area.} 11 | 12 | \item{G1}{a numeric matrix containing values of all environmental variables in the full area of interest.} 13 | } 14 | \value{ 15 | A vector of environmental values in a projection area that are outside the range of values 16 | in the calibration area of an ecological niche model. 17 | } 18 | \description{ 19 | plot.out detects which environmental values in an area of projection are 20 | out of the range of environmental values in the area where ecological niche models are 21 | calibrated. This function is designed to be used specifically in the \code{\link{kuenm_mop}} function. 22 | } 23 | -------------------------------------------------------------------------------- /man/mvars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{mvars} 4 | \alias{mvars} 5 | \title{Raster variables masked to the area where a model is calibrated} 6 | \format{ 7 | A RasterStack with 150 rows, 249 columns, 37350 cells, and 4 layers: 8 | \describe{ 9 | \item{Temperature}{temperature, in Celsius degrees times 10.} 10 | \item{Precipitation}{precipitation, in milimeters.} 11 | } 12 | } 13 | \source{ 14 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | } 16 | \description{ 17 | A RasterStack of predictor variables masked to the calibration area where 18 | a model is calibrated. Variables represent four current bioclimatic variables 19 | downloaded from the WorldClim database (\url{http://www.worldclim.org/}). 20 | } 21 | \examples{ 22 | mvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 23 | pattern = "Mbio_", full.names = TRUE)) 24 | 25 | summary(mvars) 26 | } 27 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // trap_roc 9 | double trap_roc(NumericVector x, NumericVector y); 10 | RcppExport SEXP _kuenm_trap_roc(SEXP xSEXP, SEXP ySEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); 15 | Rcpp::traits::input_parameter< NumericVector >::type y(ySEXP); 16 | rcpp_result_gen = Rcpp::wrap(trap_roc(x, y)); 17 | return rcpp_result_gen; 18 | END_RCPP 19 | } 20 | 21 | static const R_CallMethodDef CallEntries[] = { 22 | {"_kuenm_trap_roc", (DL_FUNC) &_kuenm_trap_roc, 2}, 23 | {NULL, NULL, 0} 24 | }; 25 | 26 | RcppExport void R_init_kuenm(DllInfo *dll) { 27 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 28 | R_useDynamicSymbols(dll, FALSE); 29 | } 30 | -------------------------------------------------------------------------------- /ecography.csl: -------------------------------------------------------------------------------- 1 | 2 | 18 | -------------------------------------------------------------------------------- /man/gvars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_documentation.R 3 | \name{gvars} 4 | \alias{gvars} 5 | \title{Variables masked to the area where a model is transferred} 6 | \format{ 7 | A RasterStack with 900 rows, 2160 columns, 1944000 cells, and 4 layers: 8 | \describe{ 9 | \item{Temperature}{temperature, in Celsius degrees times 10.} 10 | \item{Precipitation}{precipitation, in milimeters.} 11 | } 12 | } 13 | \source{ 14 | \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | } 16 | \description{ 17 | A RasterStack containing predictor variables masked to the area where a model 18 | is projected. Variables represent four future bioclimatic variables (2050) of 19 | the NCAR-CCSM4 general circulation model under the RCP 8.5 emission scenario. 20 | } 21 | \examples{ 22 | gvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 23 | pattern = "Gbio_", full.names = TRUE)) 24 | 25 | summary(gvars) 26 | } 27 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite kuenm in publications use:") 2 | 3 | citEntry(entry = "Article", 4 | title = "kuenm: an R package for detailed development of ecological niche models using Maxent", 5 | author = personList(as.person("Marlon E. Cobos"), 6 | as.person("A. Townsend Peterson"), 7 | as.person("Narayani Barve"), 8 | as.person("Luis Osorio-Olvera")), 9 | journal = "PeerJ", 10 | year = "2019", 11 | volume = "7", 12 | number = "e6281", 13 | month = "jan", 14 | publisher = "PeerJ", 15 | url = "http://doi.org/10.7717/peerj.6281", 16 | doi = "10.7717/peerj.6281", 17 | 18 | textVersion = 19 | paste("Cobos ME, Peterson AT, Barve N, Osorio-Olvera L. (2019)", 20 | "kuenm: an R package for detailed development of ecological niche models using Maxent", 21 | "PeerJ, 7:e6281", 22 | "URL http://doi.org/10.7717/peerj.6281") 23 | ) -------------------------------------------------------------------------------- /man/html_calibration.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/html_calibration.R 3 | \name{html_calibration} 4 | \alias{html_calibration} 5 | \title{Creation of an HTML file with results from model calibration} 6 | \usage{ 7 | html_calibration(path = getwd(), file.name) 8 | } 9 | \arguments{ 10 | \item{path}{directory where the HTML file will be written; current directory 11 | by default.} 12 | 13 | \item{file.name}{(character) name of the HTML file without extension (e.g., 14 | "calibration_results")} 15 | } 16 | \value{ 17 | An HTML file summarizing results from model calibration, evaluation, and 18 | selection. 19 | } 20 | \description{ 21 | html_calibration creates an HTML file that summarizes all outputs 22 | from model calibration, evaluation, and selection. 23 | } 24 | \details{ 25 | This function is used along with the functions \code{\link{kuenm_ceval}} 26 | \code{\link{kuenm_cal_swd}}. 27 | } 28 | \examples{ 29 | path <- getwd() # directory with outputs of the kuenm_ceval function 30 | name <- "evaluation_results" 31 | 32 | \dontrun{ 33 | html_calibration(path = path, file.name = name) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/occ_randsplit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_occsplit.R 3 | \name{occ_randsplit} 4 | \alias{occ_randsplit} 5 | \title{Split occurrences randomly in training and testing data} 6 | \usage{ 7 | occ_randsplit(occ, train.proportion = 0.5) 8 | } 9 | \arguments{ 10 | \item{occ}{matrix or data.frame with the occurrences to be split. Columns may vary but 11 | species, longitude, and latitue are recommended.} 12 | 13 | \item{train.proportion}{(numeric) proportion (from 0 to 1) of data to be used as training 14 | occurrences. The remaining data will be used for testing.} 15 | } 16 | \value{ 17 | List with all occurrences (joint), training occurrences (train), and testing (test) 18 | occurrences. 19 | } 20 | \description{ 21 | occ_randsplit splits a set of occurrences to obtain training and testing 22 | data randomly. 23 | } 24 | \examples{ 25 | # arguments 26 | occs <- read.csv(list.files(system.file("extdata", package = "kuenm"), 27 | pattern = "sp_test.csv", full.names = TRUE)) 28 | occs <- data.frame(Species = "Species_1", occs) 29 | train_prop <- 0.5 30 | 31 | # running 32 | occ_rsplit <- occ_randsplit(occ = occs, train.proportion = train_prop) 33 | } 34 | -------------------------------------------------------------------------------- /man/or.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_analysis_helpers.R 3 | \name{or} 4 | \alias{or} 5 | \title{Omission rates calculation for Maxent SWD predictions} 6 | \usage{ 7 | or(prediction, occ.tra, occ.test, threshold = 5) 8 | } 9 | \arguments{ 10 | \item{prediction}{matrix of longitude and latidue coordinates, and Maxent 11 | prediction obtained using the SWD format. Prediction coordinates must include 12 | the ones in \code{occ.tra}, and \code{occ.test}.} 13 | 14 | \item{occ.tra}{numerical matrix containing coordinates of the occurrence data 15 | used to create the prediction to be evaluated; columns must be: longitude and 16 | latitude.} 17 | 18 | \item{occ.test}{numerical matrix containing coordinates of the occurrences 19 | used to test the prediction to be evaluated; columns must be: longitude and 20 | latitude.} 21 | 22 | \item{threshold}{(numeric) vector of value(s) from 0 to 100 that will be used 23 | as thresholds, default = 5.} 24 | } 25 | \value{ 26 | A named numeric value or numeric vector with the result(s). 27 | } 28 | \description{ 29 | or calculates omission rates of numerical projections of ecological 30 | niche models based on one or multiple user-specified thresholds. 31 | } 32 | -------------------------------------------------------------------------------- /inst/extdata/lambdas_model_joint.lambdas: -------------------------------------------------------------------------------- 1 | bio_1, 0.0, -19.0, 246.0 2 | bio_12, 0.0, 198.0, 1899.0 3 | bio_15, 0.0, 6.0, 91.0 4 | bio_17, 0.0, 10.0, 449.0 5 | bio_1^2, -1.469858403765491, 0.0, 60516.0 6 | bio_12^2, -1.910313084564374, 39204.0, 3606201.0 7 | bio_1*bio_15, -0.19054797947611643, -304.0, 17835.0 8 | bio_1*bio_17, 0.23435578044577152, -2926.0, 67470.0 9 | bio_12*bio_15, 0.24300857202875487, 6954.0, 90916.0 10 | bio_15*bio_17, 2.3492441200539713, 830.0, 10540.0 11 | `bio_17, -0.9575928083196661, 10.0, 69.5 12 | `bio_15, 0.657145851498337, 6.0, 16.5 13 | `bio_12, -1.5051413984961088, 198.0, 681.5 14 | 'bio_15, -1.0056691466946788, 48.5, 91.0 15 | `bio_1, -2.999608606306573, -19.0, 130.5 16 | 'bio_17, -0.6602477523214904, 255.5, 449.0 17 | `bio_12, -0.5810497370715872, 198.0, 1068.5 18 | 'bio_1, -0.38822167343743724, 167.5, 246.0 19 | `bio_15, 1.0371847870683355, 6.0, 19.5 20 | 'bio_15, -2.322022983290843, 47.5, 91.0 21 | `bio_15, -0.07852449362914661, 6.0, 10.5 22 | `bio_1, -2.396906611422994, -19.0, 131.5 23 | `bio_12, -1.0480708443187965, 198.0, 1067.5 24 | `bio_17, -0.4771916505104741, 10.0, 68.5 25 | `bio_17, 0.16976377532917253, 10.0, 112.5 26 | 'bio_1, -0.061740716870974745, 168.5, 246.0 27 | linearPredictorNormalizer, 0.7107467346931102 28 | densityNormalizer, 1834.4035851239296 29 | numBackgroundPoints, 10091 30 | entropy, 8.67178216647997 31 | -------------------------------------------------------------------------------- /man/aicc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_analysis_helpers.R 3 | \name{aicc} 4 | \alias{aicc} 5 | \title{AICc calculation of Maxent SWD predictions} 6 | \usage{ 7 | aicc(occ, prediction, npar) 8 | } 9 | \arguments{ 10 | \item{occ}{matrix or data.frame with coordinates of the occurrences used to 11 | create the model (raster) to be evaluated; columns must be: longitude and 12 | latitude.} 13 | 14 | \item{prediction}{matrix or data.frame of longitude and latitude coordinates, 15 | and Maxent Raw predictions obtained using the SWD format in Maxent. 16 | Coordinates in this prediction must include the ones in \code{occ}} 17 | 18 | \item{npar}{(numeric) number of parameters of the model. Use function 19 | \code{\link{n_par}} to obtain number of parameters in the model from 20 | the lambdas file.} 21 | } 22 | \value{ 23 | A data.frame containing values of AICc, delta AICc, weight of AICc, and 24 | number of parameters. The number of rows of the data.frame corresponds to 25 | the number of models evaluated. 26 | } 27 | \description{ 28 | aicc calculates the Akaike information criterion corrected for 29 | small sample sizes (AICc) for predictions produced with Maxent. 30 | } 31 | \details{ 32 | Calculations are done following 33 | \href{https://doi.org/10.1890/10-1171.1}{Warren and Seifert (2011)}. 34 | } 35 | -------------------------------------------------------------------------------- /man/feature_classes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_short_helpers.R 3 | \name{feature_classes} 4 | \alias{feature_classes} 5 | \title{Helper function to select feature classes} 6 | \usage{ 7 | feature_classes(f.clas = "all") 8 | } 9 | \arguments{ 10 | \item{f.clas}{(character) feature classes can be selected from five different 11 | combination sets or manually. Combination sets are: "all", "basic", "no.t.h", 12 | "no.h", and "no.t". Default = "all". basic = "l", "lq", "lqp", "lqpt", "lqpth". 13 | Combinations "no.t.h", "no.h", and "no.t", exclude t and/or h. See details for 14 | all the available potential combinations of feature classes.} 15 | } 16 | \value{ 17 | character containing java code for defining feature classes in Maxent 18 | candidate models. 19 | } 20 | \description{ 21 | Helper function to select feature classes 22 | } 23 | \details{ 24 | Below all potential combinations of feature classes are shown. Manual selection 25 | can be done by creating a vector of one or more of the combinations of this 26 | list. l = linear, q = quadratic, p = product, t = threshold, and h = hinge. 27 | "l", "q", "p", "t", "h", "lq", "lp", "lt", "lh", "qp", "qt", "qh", "pt", "ph", 28 | "th", "lqp", "lqt", "lqh", "lpt", "lph", "lth", "qpt", "qph", "qth", "pth", 29 | "lqpt", "lqph", "lqth", "lpth", "qpth", and "lqpth". 30 | } 31 | -------------------------------------------------------------------------------- /man/summary_calibration.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_analysis_helpers.R 3 | \name{summary_calibration} 4 | \alias{summary_calibration} 5 | \title{Helper to summarize all results from model calibration exercises} 6 | \usage{ 7 | summary_calibration(proc.or.aicc.results, selection = "OR_AICc") 8 | } 9 | \arguments{ 10 | \item{proc.or.aicc.results}{data.frame with results from evaluation of all 11 | candidate models. Generally the output of \code{\link{proc_or_aicc}}.} 12 | 13 | \item{selection}{(character) model selection criterion, can be "OR_AICc", 14 | "AICc", or "OR"; OR = omission rates. Default = "OR_AICc", which means that 15 | among models that are statistically significant and that present omission 16 | rates below the threshold, those with delta AICc up to 2 will be selected. 17 | See details for other selection criteria.} 18 | } 19 | \value{ 20 | A list with all results that need to be written to produce the evaluation report. 21 | } 22 | \description{ 23 | Helper to summarize all results from model calibration exercises 24 | } 25 | \details{ 26 | Other selecton criteria are described below: If "AICc" criterion is chosen, 27 | all significant models with delta AICc up to 2 will be selected If "OR" is 28 | chosen, the 10 first significant models with the lowest omission rates will 29 | be selected. 30 | } 31 | -------------------------------------------------------------------------------- /R/plot_out.R: -------------------------------------------------------------------------------- 1 | #' Helper function for detecting values out of the environmental range of M 2 | #' 3 | #' @description plot.out detects which environmental values in an area of projection are 4 | #' out of the range of environmental values in the area where ecological niche models are 5 | #' calibrated. This function is designed to be used specifically in the \code{\link{kuenm_mop}} function. 6 | #' 7 | #' @param M1 a numeric matrix containing values of all environmental variables in the calibration area. 8 | #' @param G1 a numeric matrix containing values of all environmental variables in the full area of interest. 9 | #' 10 | #' @return A vector of environmental values in a projection area that are outside the range of values 11 | #' in the calibration area of an ecological niche model. 12 | #' 13 | #' @export 14 | 15 | plot_out <- function (M1, G1) { 16 | if(class(M1)[1] %in% c("RasterBrick", "RasterLayer", "RasterStack")){ 17 | M1 <- raster::values(M1) 18 | } 19 | 20 | if(class(G1)[1] %in% c("RasterBrick", "RasterLayer", "RasterStack")){ 21 | G1 <- raster::values(G1) 22 | } 23 | 24 | d1 <- dim(M1) 25 | AllVec <- vector() 26 | 27 | for (i in 1:d1[2]) { 28 | MRange <- range(M1[, i]) 29 | l1 <- which(G1[, i] < range(M1[, i], na.rm = T)[1] | G1[, i] > range(M1[, i], na.rm = T)[2]) 30 | AllVec <- c(l1, AllVec) 31 | } 32 | 33 | AllVec <- unique(AllVec) 34 | 35 | return(AllVec) 36 | } 37 | -------------------------------------------------------------------------------- /man/var_models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_modvar.R 3 | \name{var_models} 4 | \alias{var_models} 5 | \title{Helper function to calculate raster layers of model variance} 6 | \usage{ 7 | var_models(model.names, is.swd, format = "asc", sp.name, source.codes, 8 | source, split.length = 100) 9 | } 10 | \arguments{ 11 | \item{model.names}{(character) vector of model names.} 12 | 13 | \item{is.swd}{(logical) whether model calibration and final models were 14 | produced using SWD format.} 15 | 16 | \item{format}{(character) format of model raster files. Options are: "asc" or 17 | "tif"; default = "asc".} 18 | 19 | \item{sp.name}{(character) species names. This name must be the one that 20 | appears as part of the raster file of each model replicate.} 21 | 22 | \item{source.codes}{(character or numeric) vector of names or numbers that 23 | will be part of the pattern that will be searched.} 24 | 25 | \item{source}{(character) source of variation to be evaluated. Options are: 26 | "replicates", "parameters", "clim_models", and "emi_scenarios".} 27 | 28 | \item{split.length}{(numeric) limit number of models to be processed at the 29 | time. Bigger numbers would demand more from the RAM. Default = 100.} 30 | } 31 | \value{ 32 | RasterLayer or data.frame depending on \code{is.swd} argument. 33 | } 34 | \description{ 35 | Helper function to calculate raster layers of model variance 36 | } 37 | -------------------------------------------------------------------------------- /R/html_calibration.R: -------------------------------------------------------------------------------- 1 | #' Creation of an HTML file with results from model calibration 2 | #' 3 | #' @description html_calibration creates an HTML file that summarizes all outputs 4 | #' from model calibration, evaluation, and selection. 5 | #' 6 | #' @param path directory where the HTML file will be written; current directory 7 | #' by default. 8 | #' @param file.name (character) name of the HTML file without extension (e.g., 9 | #' "calibration_results") 10 | #' 11 | #' @return 12 | #' An HTML file summarizing results from model calibration, evaluation, and 13 | #' selection. 14 | #' 15 | #' @details 16 | #' This function is used along with the functions \code{\link{kuenm_ceval}} 17 | #' \code{\link{kuenm_cal_swd}}. 18 | #' 19 | #' @export 20 | #' @importFrom rmarkdown render 21 | #' @importFrom knitr kable 22 | #' 23 | #' @examples 24 | #' path <- getwd() # directory with outputs of the kuenm_ceval function 25 | #' name <- "evaluation_results" 26 | #' 27 | #' \dontrun{ 28 | #' html_calibration(path = path, file.name = name) 29 | #' } 30 | 31 | html_calibration <- function(path = getwd(), file.name) { 32 | 33 | if (missing(file.name)) { 34 | stop("Argument 'file.name' must be defined") 35 | } 36 | 37 | file.name <- paste0(path, "/", file.name, ".Rmd") 38 | suppressMessages( 39 | file.copy(from = system.file("extdata", "Rmd_calibration.Rmd", 40 | package = "kuenm"), to = file.name) 41 | ) 42 | 43 | rmarkdown::render(file.name, "html_document", quiet = TRUE) 44 | unlink(file.name) 45 | } 46 | -------------------------------------------------------------------------------- /man/prep_independent_swd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prepare_swd.R 3 | \name{prep_independent_swd} 4 | \alias{prep_independent_swd} 5 | \title{Helper to prepare independent occurrences when using SWD format} 6 | \usage{ 7 | prep_independent_swd( 8 | occ, 9 | species, 10 | longitude, 11 | latitude, 12 | raster.layers, 13 | save = FALSE, 14 | name.occ 15 | ) 16 | } 17 | \arguments{ 18 | \item{occ}{data.frame containing occurrence records of the species of interest. 19 | Mandatory columns are: species, longitude, and latitude. Other columns will 20 | be ignored.} 21 | 22 | \item{species}{(character) name of column containing species name.} 23 | 24 | \item{longitude}{(character) name of column containing longitude values.} 25 | 26 | \item{latitude}{(character) name of column containing latitude values.} 27 | 28 | \item{raster.layers}{RasterStack of predictor variables masked to the area 29 | where the model was calibrated.} 30 | 31 | \item{save}{(logical) whether or not to a write csv file containing 32 | independent occurrences prepared to be used in model evaluation. The file 33 | will contain additional columns with the values of the variables for each 34 | coordinate. Default = FALSE.} 35 | 36 | \item{name.occ}{(character) name to be used for the file with occurrence 37 | records to be written (e.g., "independent_occ").} 38 | } 39 | \value{ 40 | A data.frame with the prepared independent occurrences. 41 | } 42 | \description{ 43 | Helper to prepare independent occurrences when using SWD format 44 | } 45 | -------------------------------------------------------------------------------- /R/kuenm_start.R: -------------------------------------------------------------------------------- 1 | #' Creation of an R markdown file for recording all analyses 2 | #' 3 | #' @description 4 | #' Generate an R markdown file that serves as a guide for 5 | #' performing most of the analyses included in this package. 6 | #' 7 | #' @param file.name (character) is the name of the R markdown file that will be 8 | #' produced in your working directory. Extension is not needed 9 | #' 10 | #' @return An R markdown file with instructions and code for performing all 11 | #' analyses included in this package. 12 | #' 13 | #' @export 14 | #' 15 | #' @usage 16 | #' kuenm_start(file.name) 17 | #' 18 | #' @rdname kuenm_start 19 | #' 20 | #' @examples 21 | #' kuenm_start(file.name = tempfile()) 22 | 23 | kuenm_start <- function(file.name){ 24 | 25 | if (missing(file.name)) { 26 | stop("Argument 'file.name' must be defined") 27 | } 28 | 29 | file.name <- paste0(file.name, ".Rmd") 30 | suppressMessages( 31 | file.copy(from = system.file("extdata", "Rmd_start.Rmd", 32 | package = "kuenm"), to = file.name) 33 | ) 34 | 35 | file.edit(file.name) 36 | } 37 | 38 | 39 | #' @usage 40 | #' kuenm_start_swd(file.name) 41 | #' 42 | #' @rdname kuenm_start 43 | 44 | kuenm_start_swd <- function(file.name){ 45 | 46 | if (missing(file.name)) { 47 | stop("Argument 'file.name' must be defined") 48 | } 49 | 50 | file.name <- paste0(file.name, ".Rmd") 51 | suppressMessages( 52 | file.copy(from = system.file("extdata", "Rmd_start_swd.Rmd", 53 | package = "kuenm"), to = file.name) 54 | ) 55 | 56 | file.edit(file.name) 57 | } 58 | -------------------------------------------------------------------------------- /R/kuenm.R: -------------------------------------------------------------------------------- 1 | #' kuenm: Detailed development of ecological niche models using Maxent 2 | #' 3 | #' kuenm helps with the development of critical phases of the 4 | #' ecological niche modeling process in Maxent. Pre-modeling analyses and 5 | #' explorations can be done to prepare data. Model calibration (model selection) 6 | #' can be done by creating and testing several candidate models. Handy options 7 | #' for producing final models, evaluating such models, and assessing extrapolation 8 | #' risks are also included. Tools for post-modeling analyses are implemented to 9 | #' allow for further exploration of results. 10 | #' 11 | #' 12 | #' @section kuenm functions: 13 | #' \code{\link{explore_var_contrib}}, \code{\link{kuenm_aicc}}, 14 | #' \code{\link{kuenm_cal}}, \code{\link{kuenm_cal_swd}}, 15 | #' \code{\link{kuenm_ceval}}, \code{\link{kuenm_feval}}, 16 | #' \code{\link{kuenm_feval_swd}}, \code{\link{kuenm_hierpart}}, 17 | #' \code{\link{kuenm_mmop}}, \code{\link{kuenm_mod}}, 18 | #' \code{\link{kuenm_mod_swd}}, \code{\link{kuenm_modstats}}, 19 | #' \code{\link{kuenm_modstats_swd}}, \code{\link{kuenm_modvar}}, 20 | #' \code{\link{kuenm_mop}}, \code{\link{kuenm_mopagree}}, 21 | #' \code{\link{kuenm_occsplit}}, \code{\link{kuenm_omrat}}, 22 | #' \code{\link{kuenm_proc}}, \code{\link{kuenm_projchanges}}, 23 | #' \code{\link{kuenm_rpca}}, \code{\link{kuenm_start}}, 24 | #' \code{\link{kuenm_start_swd}}, \code{\link{kuenm_toclosest}}, 25 | #' \code{\link{kuenm_varcomb}}, \code{\link{model_var_contrib}}, 26 | #' \code{\link{prep_independent_swd}}, \code{\link{prepare_swd}} 27 | #' 28 | #' @docType package 29 | #' @name kuenm 30 | NULL 31 | -------------------------------------------------------------------------------- /man/kuenm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm.R 3 | \docType{package} 4 | \name{kuenm} 5 | \alias{kuenm} 6 | \title{kuenm: Detailed development of ecological niche models using Maxent} 7 | \description{ 8 | kuenm helps with the development of critical phases of the 9 | ecological niche modeling process in Maxent. Pre-modeling analyses and 10 | explorations can be done to prepare data. Model calibration (model selection) 11 | can be done by creating and testing several candidate models. Handy options 12 | for producing final models, evaluating such models, and assessing extrapolation 13 | risks are also included. Tools for post-modeling analyses are implemented to 14 | allow for further exploration of results. 15 | } 16 | \section{kuenm functions}{ 17 | 18 | \code{\link{explore_var_contrib}}, \code{\link{kuenm_aicc}}, 19 | \code{\link{kuenm_cal}}, \code{\link{kuenm_cal_swd}}, 20 | \code{\link{kuenm_ceval}}, \code{\link{kuenm_feval}}, 21 | \code{\link{kuenm_feval_swd}}, \code{\link{kuenm_hierpart}}, 22 | \code{\link{kuenm_mmop}}, \code{\link{kuenm_mod}}, 23 | \code{\link{kuenm_mod_swd}}, \code{\link{kuenm_modstats}}, 24 | \code{\link{kuenm_modstats_swd}}, \code{\link{kuenm_modvar}}, 25 | \code{\link{kuenm_mop}}, \code{\link{kuenm_mopagree}}, 26 | \code{\link{kuenm_occsplit}}, \code{\link{kuenm_omrat}}, 27 | \code{\link{kuenm_proc}}, \code{\link{kuenm_projchanges}}, 28 | \code{\link{kuenm_rpca}}, \code{\link{kuenm_start}}, 29 | \code{\link{kuenm_start_swd}}, \code{\link{kuenm_toclosest}}, 30 | \code{\link{kuenm_varcomb}}, \code{\link{model_var_contrib}}, 31 | \code{\link{prep_independent_swd}}, \code{\link{prepare_swd}} 32 | } 33 | 34 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(aicc) 4 | export(all_var_comb) 5 | export(explore_var_contrib) 6 | export(ext_type) 7 | export(feature_classes) 8 | export(html_calibration) 9 | export(kuenm_aicc) 10 | export(kuenm_cal) 11 | export(kuenm_cal_swd) 12 | export(kuenm_ceval) 13 | export(kuenm_feval) 14 | export(kuenm_feval_swd) 15 | export(kuenm_hierpart) 16 | export(kuenm_mmop) 17 | export(kuenm_mod) 18 | export(kuenm_mod_swd) 19 | export(kuenm_modstats) 20 | export(kuenm_modstats_swd) 21 | export(kuenm_modvar) 22 | export(kuenm_mop) 23 | export(kuenm_mopagree) 24 | export(kuenm_occsplit) 25 | export(kuenm_omrat) 26 | export(kuenm_proc) 27 | export(kuenm_projchanges) 28 | export(kuenm_rpca) 29 | export(kuenm_start) 30 | export(kuenm_toclosest) 31 | export(kuenm_varcomb) 32 | export(model_changes) 33 | export(model_var_contrib) 34 | export(n_par) 35 | export(occ_randsplit) 36 | export(or) 37 | export(plot_contribution) 38 | export(plot_out) 39 | export(plot_proc_aicc) 40 | export(prep_independent_swd) 41 | export(prepare_swd) 42 | export(proc_or_aicc) 43 | export(result_description) 44 | export(run_maxent) 45 | export(summary_calibration) 46 | export(var_models) 47 | export(wait_written_done) 48 | importFrom(graphics,abline) 49 | importFrom(graphics,barplot) 50 | importFrom(graphics,box) 51 | importFrom(graphics,layout) 52 | importFrom(graphics,legend) 53 | importFrom(graphics,par) 54 | importFrom(graphics,plot.new) 55 | importFrom(graphics,text) 56 | importFrom(graphics,title) 57 | importFrom(knitr,kable) 58 | importFrom(purrr,map_df) 59 | importFrom(raster,extract) 60 | importFrom(rmarkdown,render) 61 | importFrom(utils,read.csv) 62 | importFrom(utils,write.csv) 63 | useDynLib(kuenm) 64 | -------------------------------------------------------------------------------- /man/kuenm_omrat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_omrat.R 3 | \name{kuenm_omrat} 4 | \alias{kuenm_omrat} 5 | \title{Omission rates calculation for single models} 6 | \usage{ 7 | kuenm_omrat(model, threshold = 5, occ.tra, occ.test) 8 | } 9 | \arguments{ 10 | \item{model}{a RasterLayer of the model to be evaluated.} 11 | 12 | \item{threshold}{(numeric vector) value(s) from 0 to 100 that will be used as thresholds, 13 | default = 5.} 14 | 15 | \item{occ.tra}{a numerical matrix containing coordinates of the occurrence data used to create 16 | the ecological niche model to be evaluated; columns must be: longitude and latitude.} 17 | 18 | \item{occ.test}{a numerical matrix containing coordinates of the occurrences used to test 19 | the ecological niche model to be evaluated; columns must be: longitude and latitude.} 20 | } 21 | \value{ 22 | A named numeric value or numeric vector with the result(s). 23 | } 24 | \description{ 25 | kuenm_omrat calculates omission rates of geographic projections 26 | of ecological niche models based on one or multiple user-specified thresholds. 27 | } 28 | \examples{ 29 | # single threshold 30 | model <- raster::raster(system.file("extdata/sp_model.tif", 31 | package = "kuenm")) 32 | thres <- 5 33 | data("sp_train", package = "kuenm") 34 | data("sp_test", package = "kuenm") 35 | 36 | om_rate <- kuenm_omrat(model, threshold = thres, 37 | occ.tra = sp_train, occ.test = sp_test) 38 | 39 | # multiple thresholds 40 | thres1 <- c(5, 10, 20) 41 | 42 | om_rate <- kuenm_omrat(model, threshold = thres1, 43 | occ.tra = sp_train, occ.test = sp_test) 44 | } 45 | -------------------------------------------------------------------------------- /man/model_changes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_projchanges.R 3 | \name{model_changes} 4 | \alias{model_changes} 5 | \title{Helper function to calculate model changes} 6 | \usage{ 7 | model_changes <- (calibration.model, current.model, fclim.models, 8 | is.swd, result = "continuous", threshold = 5, occ, 9 | clim.models, out.dir) 10 | } 11 | \arguments{ 12 | \item{calibration.model}{(character) name of raster prediction for the 13 | calibration area. Ignored if \code{is.swd} = TRUE.} 14 | 15 | \item{current.model}{(character) name of current model raster name. It can be the same than 16 | \code{calibration.model}.} 17 | 18 | \item{fclim.models}{(character) vector of climatic model raster names.} 19 | 20 | \item{is.swd}{(logical) whether or not modeling was done using SWD format.} 21 | 22 | \item{result}{(character) type of result needed. options are "continuous" and "binary". 23 | Default = "continuous".} 24 | 25 | \item{threshold}{(numeric) if \code{result} = "binary", value from 0 to 100 that will be used 26 | as threshold, default = 5.} 27 | 28 | \item{occ}{if \code{result} = "binary", a numerical matrix containing coordinates of 29 | the occurrence data used to create the final models; columns must be: longitude and latitude.} 30 | 31 | \item{clim.models}{(character) names of that identify climatic models used for project ENMs. 32 | If not defined it is assumed that only one climate model was used.} 33 | 34 | \item{out.dir}{(character) name of the folder that will be created to save the binary models 35 | if \code{result} = "binary".} 36 | } 37 | \description{ 38 | Helper function to calculate model changes 39 | } 40 | -------------------------------------------------------------------------------- /man/kuenm_aicc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_aicc.R 3 | \name{kuenm_aicc} 4 | \alias{kuenm_aicc} 5 | \title{AICc calculation for Maxent models} 6 | \usage{ 7 | kuenm_aicc(occ, model, npar) 8 | } 9 | \arguments{ 10 | \item{occ}{matrix or data.frame with coordinates of the occurrences used to 11 | create the model (raster) to be evaluated; columns must be: longitude and 12 | latitude.} 13 | 14 | \item{model}{a RasterLayer or RasterStack of model projections created using 15 | Maxent with raw outputs.} 16 | 17 | \item{npar}{(numeric) number of parameters for \code{model}. Length must 18 | correspond with number of layers in \code{model}. See function 19 | \code{\link{n_par}}.} 20 | } 21 | \value{ 22 | A data.frame containing values of AICc, delta AICc, weight of AICc, and 23 | number of parameters. The number of rows of the data.frame corresponds to 24 | the number of models evaluated. 25 | } 26 | \description{ 27 | kuenm_aicc calculates the Akaike information criterion corrected 28 | for small sample sizes (AICc) for single or multiple models produced with 29 | Maxent. 30 | } 31 | \details{ 32 | Calculations are done following 33 | \href{https://doi.org/10.1890/10-1171.1}{Warren and Seifert (2011)}. 34 | } 35 | \examples{ 36 | data("sp_joint", package = "kuenm") 37 | model <- raster::raster(system.file("extdata/sp_model_joint.tif", 38 | package = "kuenm")) 39 | 40 | lbds <- readLines(system.file("extdata/lambdas_model_joint.lambdas", 41 | package = "kuenm")) 42 | npar <- n_par(lbds) # counting number of parameters 43 | 44 | aicc <- kuenm_aicc(occ = sp_joint, model = model, npar = npar) 45 | } 46 | \seealso{ 47 | \code{\link{aicc}} for results obtained using the SWD format. 48 | } 49 | -------------------------------------------------------------------------------- /man/kuenm_occsplit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_occsplit.R 3 | \name{kuenm_occsplit} 4 | \alias{kuenm_occsplit} 5 | \title{Split occurrence files in training and testing data} 6 | \usage{ 7 | kuenm_occsplit(occ, train.proportion = 0.5, method = "random", 8 | save = FALSE, name = "occ") 9 | } 10 | \arguments{ 11 | \item{occ}{data.frame of occurrence records containing at least species, 12 | longitude, and latitude columns.} 13 | 14 | \item{train.proportion}{(numeric) proportion (from 0 to 1) of data to be used as training 15 | occurrences. The remaining data will be used for testing.} 16 | 17 | \item{method}{(character) method for selecting training and testing occurrences. Current 18 | option is "random".} 19 | 20 | \item{save}{(logical) whether or not to save the results in the working 21 | directory. Default = FALSE.} 22 | 23 | \item{name}{(character) common name for csv files to be written. A suffix will be added 24 | depending on if the data is the complete set, training set, or testing set of occurrences.} 25 | } 26 | \value{ 27 | List with all, training, and testing occurrences. Three csv files will be written in the 28 | working directory according to the name defined in \code{name} plus the suffix _joint 29 | for all records, _train for the training set, and _test for the testing set. 30 | } 31 | \description{ 32 | kuenm_occsplit splits occurrences contained in a data.frame to obtain training 33 | and testing data based on distinct methods for calibrating models. 34 | } 35 | \examples{ 36 | # arguments 37 | data("sp_joint", package = "kuenm") 38 | 39 | occs <- data.frame(Species = "A_americanum", sp_joint) 40 | train_prop <- 0.5 41 | method = "random" 42 | 43 | # running 44 | data_split <- kuenm_occsplit(occ = occs, train.proportion = train_prop, 45 | method = method) 46 | } 47 | -------------------------------------------------------------------------------- /man/proc_or_aicc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SWD_analysis_helpers.R 3 | \name{proc_or_aicc} 4 | \alias{proc_or_aicc} 5 | \title{Partial ROC, omission rates, and AICc calculations in concert (helper)} 6 | \usage{ 7 | proc_or_aicc(occ.joint, occ.tra, occ.test, raw.folders, log.folders, 8 | threshold = 5, rand.percent = 50, iterations = 500, kept = TRUE) 9 | } 10 | \arguments{ 11 | \item{occ.joint}{(character) the name of csv file with training and testing 12 | occurrences combined; columns must be: species, longitude, and latitude.} 13 | 14 | \item{occ.tra}{(character) the name of the csv file with the training 15 | occurrences; columns as in \code{occ.joint}.} 16 | 17 | \item{occ.test}{(character) the name of the csv file with the evaluation 18 | occurrences; columns as in \code{occ.joint}.} 19 | 20 | \item{raw.folders}{(character) vector of names of directories containing 21 | models created with all occurrences and raw outputs.} 22 | 23 | \item{log.folders}{(character) vector of names of directories containing 24 | models created with training occurrences and logistic outputs.} 25 | 26 | \item{threshold}{(numeric) the percentage of training data omission error 27 | allowed (E); default = 5.} 28 | 29 | \item{rand.percent}{(numeric) the percentage of data to be used for the 30 | bootstraping process when calculating partial ROCs; default = 50.} 31 | 32 | \item{iterations}{(numeric) the number of times that the bootstrap is going 33 | to be repeated; default = 500.} 34 | 35 | \item{kept}{(logical) if FALSE, all candidate models will be erased after 36 | evaluation, default = TRUE.} 37 | } 38 | \value{ 39 | A data.frame with the results of partial ROC, omission rates, and AICc metrics 40 | for all candidate models. 41 | } 42 | \description{ 43 | proc_or_aicc performs a series of step by step processes that 44 | help to read files from directores, extract necessary data, and evaluate 45 | Maxent predictions based on partial ROC, omission rates, and AICc values. 46 | } 47 | -------------------------------------------------------------------------------- /man/kuenm_feval_swd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_feval_swd.R 3 | \name{kuenm_feval_swd} 4 | \alias{kuenm_feval_swd} 5 | \title{Evaluation of final Maxent models with independent data in SWD format} 6 | \usage{ 7 | kuenm_feval_swd(path, occ.joint, occ.ind, replicates, out.eval, threshold = 5, 8 | rand.percent = 50, iterations = 500) 9 | } 10 | \arguments{ 11 | \item{path}{(character) directory in which folders containing final models 12 | were created.} 13 | 14 | \item{occ.joint}{(character) the csv file with training and testing 15 | occurrences combined, or the file containing occurrences used to create final 16 | models; columns must be: species, longitude, latitude, and two or more 17 | columns representing distinct variables.} 18 | 19 | \item{occ.ind}{(character) the name of the csv file with independent 20 | occurrences for model evaluation; these occurrences were not used when 21 | creating final models; columns as in \code{occ.joint}. Prepare this 22 | file with \code{\link{prep_independent_swd}}.} 23 | 24 | \item{replicates}{(logical) whether or not final models were created 25 | with replicates.} 26 | 27 | \item{out.eval}{(character) name of the folder where evaluation results will 28 | be written.} 29 | 30 | \item{threshold}{(numeric) the percentage of omission error allowed (E), 31 | default = 5.} 32 | 33 | \item{rand.percent}{(numeric) the percentage of data to be used for the 34 | bootstrapping process when calculating partial ROCs; default = 50.} 35 | 36 | \item{iterations}{(numeric) the number of times that the bootstrap is going 37 | to be repeated; default = 500.} 38 | } 39 | \value{ 40 | A list with two data.frame objects containing results from the 41 | evaluation process, and a folder, in the working directory, containing a 42 | csv file with the results from final model evaluation. 43 | } 44 | \description{ 45 | kuenm_feval_swd evaluates final Maxent models in terms of 46 | statistical significance (partial ROC) and omission rates with a user-defined 47 | threshold (E). This function works for models created in SWD format. 48 | } 49 | \details{ 50 | This function is used after the creation of final models. 51 | } 52 | -------------------------------------------------------------------------------- /man/kuenm_varcomb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_varcomb.R 3 | \name{kuenm_varcomb} 4 | \alias{kuenm_varcomb} 5 | \title{All potential combinations of a group of variables} 6 | \usage{ 7 | kuenm_varcomb(var.dir, out.dir, min.number = 2, in.format = "ascii", 8 | out.format = "ascii") 9 | } 10 | \arguments{ 11 | \item{var.dir}{(character) the name of the folder containing variables that will be combined.} 12 | 13 | \item{out.dir}{(character) the name of the folder in which subfolders with distinct combinations of 14 | variables will be written.} 15 | 16 | \item{min.number}{(integer) the minimum number of variables per combination. This number must be > 1. 17 | Default = 2.} 18 | 19 | \item{in.format}{(character) format of variables in \code{var.dir}. Options are "ascii", "GTiff", and "EHdr" = bil. 20 | Default = "ascii".} 21 | 22 | \item{out.format}{(character) format of variables to be written in distinct sets inside \code{out.dir}. 23 | Options are "ascii", "GTiff", and "EHdr" = bil. Default = "ascii".} 24 | } 25 | \value{ 26 | A list containing vectors of all the potential combinations of variables. In addition, a folder 27 | named \code{out.dir} with subfolders in which distinct combinations of variables produced are written. 28 | } 29 | \description{ 30 | kuenm_varcomb creates multiple sets of variables by grouping them in all their potential combinations. 31 | } 32 | \details{ 33 | Sest of variables are written in the working directory and not retained as RasterStacks to avoid 34 | problems related to RAM limitations. 35 | 36 | Time of processing will be reduced considerably if \code{in.format} and \code{out.format} coincide 37 | because files will be copied and not loaded and written. 38 | } 39 | \examples{ 40 | # This example depends on data stored in your directory 41 | var_dir <- "Variables" # your directory with variables to be combined 42 | out_dir <- "M_variables" # output directory to be created 43 | min_n <- 2 44 | in_format <- "ascii" 45 | out_format <- "GTiff" 46 | 47 | comb <- kuenm_varcomb(var.dir = var_dir, out.dir = out_dir, min.number = min_n, 48 | in.format = in_format, out.format = out_format) 49 | } 50 | -------------------------------------------------------------------------------- /man/kuenm_proc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_proc.R 3 | \name{kuenm_proc} 4 | \alias{kuenm_proc} 5 | \title{Partial ROC calculation for ecological niche models} 6 | \usage{ 7 | kuenm_proc(occ.test, model, threshold = 5, rand.percent = 50, 8 | iterations = 500, parallel = FALSE) 9 | } 10 | \arguments{ 11 | \item{occ.test}{matrix, data.frame, or numeric vector containing coordinates 12 | of occurrences to test model predictions to be evaluated. If matrix or 13 | data.frame, columns must include longitude and latitude in that order. 14 | If numeric, values of suitability in such occurrences. If a matrix or a 15 | data.frame is provided, \code{model} must be a RasterLayer.} 16 | 17 | \item{model}{RasterLayer or numeric vector of ecological niche model 18 | predictions to be evaluated. If RasterLayer, layer of predicted suitability. 19 | If numeric vector, predicted suitability values.} 20 | 21 | \item{threshold}{(numeric) value from 0 to 100 to represent the percentage of 22 | potential error (E) that the data could have due to any source of uncertainty. 23 | Default = 5.} 24 | 25 | \item{rand.percent}{(numeric) percentage of testing data to be used in each 26 | bootstrapped process for calculating the partial ROC. Default = 50.} 27 | 28 | \item{iterations}{(numeric) number of bootstrap iterations to be performed; 29 | default = 500.} 30 | 31 | \item{parallel}{(logical) argument deprecated. Default = NULL.} 32 | } 33 | \value{ 34 | A list with the summary of the results and a data.frame containing 35 | the AUC values and AUC ratios calculated for all iterations. 36 | } 37 | \description{ 38 | kuenm_proc applies partial ROC tests to model predictions. 39 | } 40 | \details{ 41 | Partial ROC is calculated following Peterson et al. (2008; 42 | \url{http://dx.doi.org/10.1016/j.ecolmodel.2007.11.008}). 43 | } 44 | \examples{ 45 | data("sp_test", package = "kuenm") 46 | model <- raster::raster(system.file("extdata/sp_model.tif", 47 | package = "kuenm")) 48 | thres <- 5 49 | rand_perc <- 50 50 | iterac <- 500 51 | 52 | p_roc <- kuenm_proc(occ.test = sp_test, model = model, threshold = thres, 53 | rand.percent = rand_perc, iterations = iterac) 54 | } 55 | -------------------------------------------------------------------------------- /man/model_var_contrib.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/model_var_contrib.R 3 | \name{model_var_contrib} 4 | \alias{model_var_contrib} 5 | \title{Evaluation of variable contribution to Maxent final models} 6 | \usage{ 7 | model_var_contrib(fmod.dir, model_name = NULL, project, ext.type) 8 | } 9 | \arguments{ 10 | \item{fmod.dir}{(character) the name of the folder in which final models are 11 | (e.g., the output folder after using the \code{\link{kuenm_mod}}) function. 12 | It is important to have only the folders containing the models in this 13 | directory. It can be only one folder or multiple subfolders containing models 14 | for the same species, created with distinct parameter settings. If models were 15 | projected, and the distinct types of extrapolation were used, the name of the 16 | folders contained in this directory should include a pattern describing the 17 | type of extrapolation used (e.g., "EC" for extrapolation and clamping in 18 | Maxent).} 19 | 20 | \item{model_name}{(character) pattern to be searched when finding the model of 21 | interest. This pattern does not include the pattern of \code{ext.type}. By 22 | default, NULL, all models are considered.} 23 | 24 | \item{project}{(logical) if TRUE, it is assumed that models were projected to 25 | other scenarios (this must be always true if models were produced in SWD 26 | format).} 27 | 28 | \item{ext.type}{(character) vector of pattern(s) to be searched in the 29 | folders inside \code{fmod.dir} that identify the extrapolation type(s) of 30 | model projections of interest (e.g., "E", "EC", "NE", or a vector of more 31 | than one of them). Ignored if \code{project} = FALSE.} 32 | } 33 | \value{ 34 | A list with results of variable contribution, permutation importance, and 35 | jackknife results. If multiple models are evaluated, a nested list with results 36 | for all models is returned. 37 | } 38 | \description{ 39 | model_var_contrib helps to explore variable contribution of 40 | Maxent models created as final models with the functions \code{\link{kuenm_mod}} 41 | or \code{\link{kuenm_mod_swd}}. Variable contribution is measured based on 42 | metrics of contribution percentage, permutation importance, and, if existent, 43 | a jackknife analysis. 44 | } 45 | \details{ 46 | When models are created with replicates, the values returned correspond to the 47 | average of such replicates. 48 | } 49 | -------------------------------------------------------------------------------- /man/kuenm_toclosest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_closest.R 3 | \name{kuenm_toclosest} 4 | \alias{kuenm_toclosest} 5 | \title{Move occurrences to closest pixel with environmental data} 6 | \usage{ 7 | kuenm_toclosest(data, longitude, latitude, raster.layer, limit.distance) 8 | } 9 | \arguments{ 10 | \item{data}{data.frame or matrix of occurrence records. Columns must include 11 | longitude and latitude. Other columns are optional and wont be changed.} 12 | 13 | \item{longitude}{(character) name of the column with longitude data.} 14 | 15 | \item{latitude}{(character) name of the column with latitude data.} 16 | 17 | \item{raster.layer}{RasterLayer to be used as a reference.} 18 | 19 | \item{limit.distance}{(numeric) maximun distance in km at which an occurrence 20 | could be to be moved. Records farther than this distance wont be moved.} 21 | } 22 | \value{ 23 | A data.frame with the corrected coordinates and four additional columns. 24 | The first of the new columns indicates the condition of the coordinates: 25 | Correct, if it was not moved because it was on a pixel with data; Moved, if 26 | it was moved to the nearest pixel; and Not_moved, if it was not moved because 27 | the occurrence was farther than the \code{limit_distance} to the closest pixel. 28 | The second new column indicates the distance to the closest pixel with data. 29 | The other two additional columns will contain the initial longitudes and 30 | latitudes. 31 | } 32 | \description{ 33 | kuenm_toclosest helps in changing the longitude and latitude values 34 | of occurrences with no environmental data, so they move to the closest pixel 35 | of a raster layer that contains relevant information. This process prevents 36 | NAs in future analyses. 37 | } 38 | \examples{ 39 | data("sp_test", package = "kuenm") 40 | 41 | var <- raster::raster(list.files(system.file("extdata", package = "kuenm"), 42 | pattern = "Mbio_", full.names = TRUE)[1]) 43 | 44 | raster::plot(var) 45 | 46 | out <- rbind(c(-103, 27), c(-90, 26.5), c(-109, 40), c(-70, 41)) 47 | colnames(out) <- colnames(sp_test) 48 | 49 | data <- rbind.data.frame(sp_test, out) 50 | 51 | points(data) 52 | 53 | data1 <- kuenm_toclosest(data, longitude = "Longitude", latitude = "Latitude", 54 | raster.layer = var, limit.distance = 200) 55 | 56 | points(data1[, 1:2], col = "red") 57 | } 58 | -------------------------------------------------------------------------------- /man/kuenm_feval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_feval.R 3 | \name{kuenm_feval} 4 | \alias{kuenm_feval} 5 | \title{Evaluation of final Maxent models with independent data} 6 | \usage{ 7 | kuenm_feval(path, occ.joint, occ.ind, replicates, out.eval, threshold = 5, 8 | rand.percent = 50, iterations = 500, parallel.proc = FALSE) 9 | } 10 | \arguments{ 11 | \item{path}{(character) directory in which folders containing final models 12 | were created.} 13 | 14 | \item{occ.joint}{(character) the csv file with training and testing 15 | occurrences combined, or the file containing occurrences used to create final 16 | models; columns must be: species, longitude, latitude.} 17 | 18 | \item{occ.ind}{(character) the name of the csv file with independent 19 | occurrences for model evaluation; these occurrences were not used when 20 | creating final models; columns as in \code{occ.joint}.} 21 | 22 | \item{replicates}{(logical) whether or not final models were created 23 | performing replicates.} 24 | 25 | \item{out.eval}{(character) name of the folder where evaluation results will 26 | be written.} 27 | 28 | \item{threshold}{(numeric) the percentage of omission error allowed (E), 29 | default = 5.} 30 | 31 | \item{rand.percent}{(numeric) the percentage of data to be used for the 32 | bootstrapping process when calculating partial ROCs; default = 50.} 33 | 34 | \item{iterations}{(numeric) the number of times that the bootstrap is going 35 | to be repeated; default = 500.} 36 | 37 | \item{parallel.proc}{(logical) if TRUE, pROC calculations will be performed 38 | in parallel using the available cores of the computer. This will demand more 39 | RAM and almost full use of the CPU; hence, its use is more recommended in 40 | high-performance computers. Using this option will speed up the analyses 41 | only if models are large RasterLayers or if \code{iterations} are more than 42 | 5000. Default = FALSE.} 43 | } 44 | \value{ 45 | A list with two data.frame objects containing results from the 46 | evaluation process, and a folder, in the working directory, containing a 47 | csv file with the results from final model evaluation. 48 | } 49 | \description{ 50 | kuenm_feval evaluates final Maxent models in terms of statistical 51 | significance (partial ROC) and omission rates with a user-defined threshold (E). 52 | } 53 | \details{ 54 | This function is used after the creation of final models. 55 | } 56 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: kuenm 2 | Type: Package 3 | Title: Detailed development of ecological niche models using Maxent 4 | Version: 1.1.10 5 | Authors@R: c( 6 | person("Marlon E.", "Cobos", email = "manubio13@gmail.com", role = c("aut","cre"), 7 | comment = c(ORCID = "0000-0002-2611-1767")), 8 | person("A. Townsend", "Peterson", email = "town@ku.edu", role = "aut", 9 | comment = c(ORCID = "0000-0003-0243-2379")), 10 | person("Luis", "Osorio-Olvera", email = "luismurao@gmail.com", role = "aut", 11 | comment = c(ORCID = "0000-0003-0701-5398")), 12 | person("Narayani", "Barve", email = "narayani.ku@gmail.com", role = "aut", 13 | comment = c(ORCID = "0000-0002-7893-8774"))) 14 | Maintainer: Marlon E. Cobos 15 | Date: 2023-06-26 16 | Description: A set of tools to help with the development of critical phases of the 17 | ecological niche modeling process in Maxent. Pre-modeling analyses and 18 | explorations can be done to prepare data. Model calibration (model selection) 19 | can be done by creating and testing several candidate models. Handy options 20 | for producing final models, evaluating such models, and assessing extrapolation 21 | risks are also included. Tools for post-modeling analyses are implemented to 22 | allow for further exploration of results. Methodological and theoretical b 23 | bases for the methods implemented here can be found in: 24 | Peterson et al. (2011) , 25 | Barve et al. (2011) , 26 | Owens et al. (2013) , 27 | Radosavljevic and Anderson (2014) , 28 | Peterson et al. (2028) , 29 | Cobos et al. (2019a) , 30 | Cobos et al. (2019b) , 31 | Cobos et al. (2019c) . 32 | Imports: 33 | doSNOW (>= 1.0), 34 | dplyr (>= 0.7), 35 | fields (>= 9.6), 36 | foreach (>= 1.4), 37 | future (>= 1.8), 38 | Kendall (>= 2.2), 39 | knitr (>= 1.20), 40 | purrr (>= 0.2), 41 | raster (>= 2.6), 42 | Rcpp (>= 0.12), 43 | rmarkdown (>= 1.9), 44 | sp (>= 1.2) 45 | Suggests: 46 | yaml 47 | Depends: 48 | R (>= 3.5.0) 49 | VignetteBuilder: knitr 50 | License: GPL-2 51 | Encoding: UTF-8 52 | LazyData: true 53 | RoxygenNote: 7.2.3 54 | Roxygen: list(markdown = TRUE) 55 | LinkingTo: Rcpp 56 | -------------------------------------------------------------------------------- /man/kuenm_mop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_mop.R 3 | \name{kuenm_mop} 4 | \alias{kuenm_mop} 5 | \title{Extrapolation risk analysis for single comparisons} 6 | \usage{ 7 | kuenm_mop(M.variables, G.stack, percent = 10, comp.each = 2000, 8 | parallel = FALSE, n.cores = NULL) 9 | } 10 | \arguments{ 11 | \item{M.variables}{a RasterStack of variables or a matrix with variables as columns 12 | representing the calibration area. If matrix, columns must contain only 13 | information for the variables to be used.} 14 | 15 | \item{G.stack}{a RasterStack of variables representing the full area of interest, and areas 16 | or scenarios to which models are transferred.} 17 | 18 | \item{percent}{(numeric) percent of values sampled from te calibration region to calculate the MOP.} 19 | 20 | \item{comp.each}{(numeric) compute distance matrix for a each fixed number of rows (default = 2000).} 21 | 22 | \item{parallel}{(logical) if TRUE, calculations will be performed in parallel using \code{n.cores} 23 | of the computer. This will demand more RAM and almost full use of the CPU; hence, its use 24 | is more recommended in high-performance computers. Using this option will speed up the analyses. 25 | Default = FALSE.} 26 | 27 | \item{n.cores}{(numeric) number of cores to be used in parallel processing. 28 | Default = NULL, in which case all CPU cores on current host - 1 will be used.} 29 | } 30 | \value{ 31 | A mobility-oriented parity RasterLayer where values of 0 represent strict extrapolation, 32 | which means complete dissimilarity of environments between the calibration (M) or the background, 33 | and the projection area (G). 34 | } 35 | \description{ 36 | kuenm_mop calculates a mobility-oriented parity layer by 37 | comparing environmental values between the calibration area and the area or 38 | scenario to which an ecological niche model is transferred. 39 | } 40 | \details{ 41 | The MOP is calculated following Owens et al. 42 | (2013; \url{https://doi.org/10.1016/j.ecolmodel.2013.04.011}). This function is a modification 43 | of the \code{\link[ENMGadgets]{MOP}} funcion, available at \url{https://github.com/narayanibarve/ENMGadgets}. 44 | } 45 | \examples{ 46 | mvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 47 | pattern = "Mbio_", full.names = TRUE)) 48 | gvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 49 | pattern = "Gbio_", full.names = TRUE)) 50 | names(mvars) <- gsub("M", "", names(mvars)) 51 | names(gvars) <- names(mvars) 52 | 53 | perc <- 5 54 | 55 | mop <- kuenm_mop(M.variables = mvars, G.stack = gvars, percent = perc) 56 | } 57 | -------------------------------------------------------------------------------- /R/kuenm_omrat.R: -------------------------------------------------------------------------------- 1 | #' Omission rates calculation for single models 2 | #' 3 | #' @description kuenm_omrat calculates omission rates of geographic projections 4 | #' of ecological niche models based on one or multiple user-specified thresholds. 5 | #' 6 | #' @param model a RasterLayer of the model to be evaluated. 7 | #' @param threshold (numeric vector) value(s) from 0 to 100 that will be used as thresholds, 8 | #' default = 5. 9 | #' @param occ.tra a numerical matrix containing coordinates of the occurrence data used to create 10 | #' the ecological niche model to be evaluated; columns must be: longitude and latitude. 11 | #' @param occ.test a numerical matrix containing coordinates of the occurrences used to test 12 | #' the ecological niche model to be evaluated; columns must be: longitude and latitude. 13 | #' 14 | #' @return A named numeric value or numeric vector with the result(s). 15 | #' 16 | #' @export 17 | #' 18 | #' @examples 19 | #' # single threshold 20 | #' model <- raster::raster(system.file("extdata/sp_model.tif", 21 | #' package = "kuenm")) 22 | #' thres <- 5 23 | #' data("sp_train", package = "kuenm") 24 | #' data("sp_test", package = "kuenm") 25 | #' 26 | #' om_rate <- kuenm_omrat(model, threshold = thres, 27 | #' occ.tra = sp_train, occ.test = sp_test) 28 | #' 29 | #' # multiple thresholds 30 | #' thres1 <- c(5, 10, 20) 31 | #' 32 | #' om_rate <- kuenm_omrat(model, threshold = thres1, 33 | #' occ.tra = sp_train, occ.test = sp_test) 34 | 35 | kuenm_omrat <- function(model, threshold = 5, occ.tra, occ.test) { 36 | 37 | if (missing(model)) { 38 | stop("Argument model is not defined.") 39 | } 40 | if (missing(occ.tra)) { 41 | stop("Argument occ.tra is not defined.") 42 | } 43 | if (missing(occ.test)) { 44 | stop("Argument occ.test is not defined.") 45 | } 46 | 47 | ran_mod <- range(na.omit(model[])) 48 | 49 | if(ran_mod[1] == ran_mod[2]) { 50 | warning("\nModel imput has no variability, omission rate will return NA.\n") 51 | 52 | om_rate <- rep(NA, length(threshold)) 53 | }else { 54 | 55 | suit_val_cal <- na.omit(raster::extract(model, occ.tra)) 56 | suit_val_eval <- na.omit(raster::extract(model, occ.test)) 57 | 58 | om_rate <- vector("numeric", length = length(threshold)) 59 | 60 | for (i in 1:length(threshold)) { 61 | val <- ceiling(length(occ.tra[, 1]) * threshold[i] / 100) + 1 62 | omi_val_suit <- sort(suit_val_cal)[val] 63 | om_rate[i] <- as.numeric(length(suit_val_eval[suit_val_eval < omi_val_suit]) / length(suit_val_eval)) 64 | } 65 | } 66 | 67 | names(om_rate) <- paste("om_rate_", threshold, "%", sep = "") 68 | return(om_rate) 69 | } 70 | -------------------------------------------------------------------------------- /man/kuenm_mopagree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_mopagree.R 3 | \name{kuenm_mopagree} 4 | \alias{kuenm_mopagree} 5 | \title{Agreement of extrapolative areas of MOP layers} 6 | \usage{ 7 | kuenm_mopagree(mop.dir, in.format, out.format = "GTiff", current, 8 | time.periods, emi.scenarios, out.dir = "MOP_agremment") 9 | } 10 | \arguments{ 11 | \item{mop.dir}{(character) name of the folder in which MOP results are (e.g., the output 12 | folder after using the \code{\link{kuenm_mmop}}) function.} 13 | 14 | \item{in.format}{(character) format of model raster files. Options are "ascii", "GTiff", and "EHdr" = bil.} 15 | 16 | \item{out.format}{(character) format of layers to be written in \code{out.dir}. Options are "ascii", "GTiff", 17 | and "EHdr" = bil. Default = "GTiff".} 18 | 19 | \item{current}{(character) if exist, pattern to look for when defining which is the scenario of current 20 | projection to be excluded from calculations. If not defined, no current projection is assumed.} 21 | 22 | \item{time.periods}{(character or numeric) pattern to be searched when identifying MOP layers for 23 | distinct time projections. If not defined it is assumed that only one time period was considered.} 24 | 25 | \item{emi.scenarios}{(character) pattern to be searched for identifying distinct emission 26 | scenarios (e.g., RCP). If not defined it is asumed that only one emission scenario was used.} 27 | 28 | \item{out.dir}{(character) name of the output directory to be created in which subdirectories 29 | containing raster layers of strict extrapolative areas agreement will be written. Default = "MOP_agremment".} 30 | } 31 | \value{ 32 | Folders named as the set or sets of variables used to perform the MOP, containing raster layers in format 33 | \code{out.format} that represent agreement of strict strapolative areas for each emission scenario 34 | in a each time period. Folders will be written inside \code{out.dir}. 35 | } 36 | \description{ 37 | kuenm_mopagree calculates raster layers that represent the agreement of strict 38 | extrapolative areas among two or more climate models of an emission scenario in a 39 | given time period. Various emission scenarios and time periods can be processed. 40 | } 41 | \details{ 42 | Users must be specific when defining the patterns that the function will search for. This patterns 43 | must be part of the mop layer names so the function can locate each file without problems. 44 | This function uses this system of work to avoid high demands of RAM while perfomring these analyses. 45 | } 46 | \examples{ 47 | # MOP layers must be already created before using this function. 48 | 49 | # Arguments 50 | mop_dir <- "MOP_results" 51 | format <- "GTiff" 52 | curr <- "current" 53 | time_periods <- 2050 54 | emi_scenarios <- c("RCP4.5", "RCP8.5") 55 | out_dir <- "MOP_agremment" 56 | 57 | kuenm_mopagree(mop.dir = mop_dir, in.format = format, out.format = format, 58 | current = curr, time.periods = time_periods, 59 | emi.scenarios = emi_scenarios, out.dir = out_dir) 60 | } 61 | -------------------------------------------------------------------------------- /man/kuenm_mmop.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_mmop.R 3 | \name{kuenm_mmop} 4 | \alias{kuenm_mmop} 5 | \title{Extrapolation risk analysis for multiple comparisons} 6 | \usage{ 7 | kuenm_mmop(G.var.dir, M.var.dir, is.swd, sets.var, out.mop, percent = 10, 8 | comp.each = 2000, parallel = FALSE) 9 | } 10 | \arguments{ 11 | \item{G.var.dir}{(character) if project is TRUE, name of the directory containing 12 | folders names as the sets to be compared (\code{sets.var}), which contain 13 | subfolders with variables representing the scenarios of projection.} 14 | 15 | \item{M.var.dir}{(character) name of the folder containing either folders with 16 | ascii raster layers or csv files (SWD format) representing representing sets 17 | of variables used to calibrate the models.} 18 | 19 | \item{is.swd}{(logical) whether model calibration and final models were produced 20 | using SWD format.} 21 | 22 | \item{sets.var}{(character) value or vector with the name(s) of the sets of 23 | variables from G.var.dir and M.var.dir that are going to be compared to create 24 | the MOP(s).} 25 | 26 | \item{out.mop}{(character) name of the folder to which MOP results will be 27 | written.} 28 | 29 | \item{percent}{(numeric) percentage of values sampled from the calibration 30 | region to calculate the MOP.} 31 | 32 | \item{comp.each}{(numeric) compute distance matrix for a each fixed number 33 | of rows (default 2000).} 34 | 35 | \item{parallel}{(logical) option to be passed to the \code{\link{kuenm_mop}} 36 | function (for each independent MOP analyses). If TRUE, calculations will be 37 | performed in parallel using \code{n.cores} of the computer. This will demand 38 | more RAM and almost full use of the CPU; hence, its use is more recommended 39 | in high-performance computers. Using this option will speed up the analyses. 40 | Default = FALSE.} 41 | 42 | \item{n.cores}{(numeric) number of cores to be used in parallel processing. 43 | Default = NULL, in which case all CPU cores on current host - 1 will be used.} 44 | } 45 | \value{ 46 | A folder containing one or multiple mobility-oriented parity raster layers 47 | depending on how many projection areas or scenarios are considered. This 48 | results will be organized by the different sets of variables chosen for 49 | creating final models. Values of 0 in resultant raster layers represent strict 50 | extrapolation. 51 | } 52 | \description{ 53 | kuenm_mmop calculates mobility-oriented parity (MOP) layers by 54 | comparing environmental values between the calibration area and multiple areas 55 | or scenarios to which ecological niche models are transferred. 56 | } 57 | \details{ 58 | This function can be used after selection of parameters that produce the best 59 | models (when chosen sets of variables are known), or after producing final 60 | models with the function \code{\link{kuenm_mod}}. In a MOP layer, areas of 61 | strict extrapolation are excluded and other values represent how similar areas 62 | or scenarios are to environmental conditions in the calibration area. MOP is 63 | calculated following Owens et al. (2013; \url{https://doi.org/10.1016/j.ecolmodel.2013.04.011}). 64 | } 65 | -------------------------------------------------------------------------------- /man/kuenm_rpca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_rpca.R 3 | \name{kuenm_rpca} 4 | \alias{kuenm_rpca} 5 | \title{Principal componens for raster layers and projections} 6 | \usage{ 7 | kuenm_rpca(variables, in.format, var.scale = TRUE, write.result = TRUE, 8 | out.format = "GTiff", out.dir = "PCA_results", project = FALSE, 9 | proj.vars, n.pcs) 10 | } 11 | \arguments{ 12 | \item{variables}{(character or RasterStack) if character, name of the folder where raster layers are located. 13 | If RasterStack, stack of raster layers to be used in principal component analyses.} 14 | 15 | \item{in.format}{(character) valid only if \code{variables} is character. Format of variables in the directory. 16 | Options are "ascii", "GTiff", and "EHdr" = bil.} 17 | 18 | \item{var.scale}{(logical) wheter or not to scale variables before performing principal component 19 | analyses. Default = TRUE.} 20 | 21 | \item{write.result}{(logical) whether or not to write PCA results and raster layers (PCs) in \code{out.dir}.} 22 | 23 | \item{out.format}{(character) if \code{write.result} = TRUE, format of variables to be written in distinct 24 | sets inside \code{out.dir}. Options are "ascii", "GTiff", and "EHdr" = bil. Default = "GTiff".} 25 | 26 | \item{out.dir}{(character) valid if \code{write.result} = TRUE. Name of the folder to be created to save the 27 | results of the analyses. Default = "PCA_results".} 28 | 29 | \item{project}{(logical) whether or not to project the species niche to other scenario(s). 30 | If TRUE, argument \code{proj.variables} needs to be defined. Default = FALSE.} 31 | 32 | \item{proj.vars}{(character or RasterStack) if character, name of the folder where subfolders with environmental 33 | variables of scenarios for projections are (useful if multiple projections are needed). If RasterStack, object 34 | containing stacked variables of only one projection scenario. Variables must correspond with variables in \code{vars.folder} 35 | (i.e., their names must correspond but they should represent conditions in other scenario).} 36 | 37 | \item{n.pcs}{(numeric) number of principal components to be returned as rasters. By default all principal 38 | components are returned as RasterLayers.} 39 | } 40 | \value{ 41 | A list containing PCA loadings and PCA summary as matrices, as well as one or multiple (if projected) RasterStacks 42 | of principal components. 43 | 44 | If \code{write.result} = TRUE, all results are written in \code{out.dir}. 45 | } 46 | \description{ 47 | kuenm_rpca performs a principal component analysis with a set of variables and 48 | produces raster layers of them. If needed the pricipal components are projected to other 49 | scenarios. 50 | } 51 | \details{ 52 | If \code{var.scale} = TRUE, variables are centered to cero and scaled using \code{\link[base]{scale}}. 53 | } 54 | \examples{ 55 | # Data 56 | variab <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 57 | pattern = "Mbio_", full.names = TRUE)) 58 | names(variab) <- paste0("bio_", c(1, 12, 15, 17)) 59 | 60 | proj_var <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 61 | pattern = "Gbio_", full.names = TRUE)) 62 | names(proj_var) <- paste0("bio_", c(1, 12, 15, 17)) 63 | 64 | # Example with no projection 65 | npcs <- 3 66 | 67 | rpca <- kuenm_rpca(variables = variab, var.scale = TRUE, write.result = FALSE, 68 | n.pcs = npcs) 69 | 70 | # Example with projection 71 | project <- TRUE 72 | 73 | rpca1 <- kuenm_rpca(variables = variab, var.scale = TRUE, write.result = FALSE, 74 | project = project, proj.vars = proj_var, n.pcs = npcs) 75 | } 76 | -------------------------------------------------------------------------------- /R/kuenm_aicc.R: -------------------------------------------------------------------------------- 1 | #' AICc calculation for Maxent models 2 | #' 3 | #' @description kuenm_aicc calculates the Akaike information criterion corrected 4 | #' for small sample sizes (AICc) for single or multiple models produced with 5 | #' Maxent. 6 | #' 7 | #' @param occ matrix or data.frame with coordinates of the occurrences used to 8 | #' create the model (raster) to be evaluated; columns must be: longitude and 9 | #' latitude. 10 | #' @param model a RasterLayer or RasterStack of model projections created using 11 | #' Maxent with raw outputs. 12 | #' @param npar (numeric) number of parameters for \code{model}. Length must 13 | #' correspond with number of layers in \code{model}. See function 14 | #' \code{\link{n_par}}. 15 | #' 16 | #' @return 17 | #' A data.frame containing values of AICc, delta AICc, weight of AICc, and 18 | #' number of parameters. The number of rows of the data.frame corresponds to 19 | #' the number of models evaluated. 20 | #' 21 | #' @details 22 | #' Calculations are done following 23 | #' [Warren and Seifert (2011)](https://doi.org/10.1890/10-1171.1). 24 | #' 25 | #' @usage 26 | #' kuenm_aicc(occ, model, npar) 27 | #' 28 | #' @export 29 | #' @importFrom raster extract 30 | #' 31 | #' @seealso 32 | #' \code{\link{aicc}} for results obtained using the SWD format. 33 | #' 34 | #' @examples 35 | #' data("sp_joint", package = "kuenm") 36 | #' model <- raster::raster(system.file("extdata/sp_model_joint.tif", 37 | #' package = "kuenm")) 38 | #' 39 | #' lbds <- readLines(system.file("extdata/lambdas_model_joint.lambdas", 40 | #' package = "kuenm")) 41 | #' npar <- n_par(lbds) # counting number of parameters 42 | #' 43 | #' aicc <- kuenm_aicc(occ = sp_joint, model = model, npar = npar) 44 | 45 | kuenm_aicc <- function (occ, model, npar) { 46 | if (missing(occ)) { 47 | stop("Argument 'occ' must be defined, see function's help.") 48 | } 49 | if (missing(model)) { 50 | stop("Argument 'model' must be defined, see function's help.") 51 | } 52 | if (!class(model)[1] %in% c("RasterLayer", "RasterStack", "RasterBrick")) { 53 | stop("'model' must be a RasterLayer or RasterStack object. See function's help.") 54 | } 55 | if (missing(npar)) { 56 | stop("Argument 'npar' must be defined, see function's help.") 57 | } 58 | if (dim(model)[3] != length(npar)) { 59 | stop("Number of 'models' to evaluate must correspond with length of 'npar'.") 60 | } 61 | 62 | AIC.valid <- npar < nrow(occ) 63 | if (dim(model)[3] == 0) { 64 | res <- data.frame(cbind(AICc = NA, delta.AICc = NA, 65 | w.AIC = NA, parameters = npar)) 66 | warning("Cannot calculate AICc when model = FALSE. Returning NA's.") 67 | } else { 68 | vals <- na.omit(raster::extract(model, occ)) 69 | probsum <- sum(model[], na.rm = TRUE) 70 | LL <- colSums(log(.Machine$double.eps + t(t(vals)/probsum))) 71 | AICc <- (2 * npar - 2 * LL) + (2 * (npar) * (npar + 1)/(nrow(occ) - npar - 1)) 72 | AICc[AIC.valid == FALSE] <- NA 73 | AICc[is.infinite(AICc)] <- NA 74 | if (sum(is.na(AICc)) == length(AICc)) { 75 | warning("AICc not valid: too many parameters, or likelihood = Inf. Returning NA.") 76 | res <- data.frame(cbind(AICc, delta.AICc = NA, w.AIC = NA, 77 | parameters = npar)) 78 | } else { 79 | delta.AICc <- (AICc - min(AICc, na.rm = TRUE)) 80 | w.AIC <- (exp(-0.5 * delta.AICc))/(sum(exp(-0.5 * 81 | delta.AICc), na.rm = TRUE)) 82 | res <- data.frame(AICc, delta.AICc, w.AIC, parameters = npar) 83 | rownames(res) <- NULL 84 | } 85 | } 86 | rownames(res) <- NULL 87 | return(res) 88 | } 89 | 90 | -------------------------------------------------------------------------------- /R/kuenm_occsplit.R: -------------------------------------------------------------------------------- 1 | #' Split occurrence files in training and testing data 2 | #' 3 | #' @description kuenm_occsplit splits occurrences contained in a data.frame to obtain training 4 | #' and testing data based on distinct methods for calibrating models. 5 | #' 6 | #' @param occ data.frame of occurrence records containing at least species, 7 | #' longitude, and latitude columns. 8 | #' @param train.proportion (numeric) proportion (from 0 to 1) of data to be used as training 9 | #' occurrences. The remaining data will be used for testing. 10 | #' @param method (character) method for selecting training and testing occurrences. Current 11 | #' option is "random". 12 | #' @param save (logical) whether or not to save the results in the working 13 | #' directory. Default = FALSE. 14 | #' @param name (character) common name for csv files to be written. A suffix will be added 15 | #' depending on if the data is the complete set, training set, or testing set of occurrences. 16 | #' 17 | #' @return 18 | #' List with all, training, and testing occurrences. Three csv files will be written in the 19 | #' working directory according to the name defined in \code{name} plus the suffix _joint 20 | #' for all records, _train for the training set, and _test for the testing set. 21 | #' 22 | #' @usage 23 | #' kuenm_occsplit(occ, train.proportion = 0.5, method = "random", 24 | #' save = FALSE, name = "occ") 25 | #' 26 | #' @export 27 | #' 28 | #' @examples 29 | #' # arguments 30 | #' data("sp_joint", package = "kuenm") 31 | #' 32 | #' occs <- data.frame(Species = "A_americanum", sp_joint) 33 | #' train_prop <- 0.5 34 | #' method = "random" 35 | #' 36 | #' # running 37 | #' data_split <- kuenm_occsplit(occ = occs, train.proportion = train_prop, 38 | #' method = method) 39 | 40 | kuenm_occsplit <- function(occ, train.proportion = 0.5, method = "random", 41 | save = FALSE, name = "occ") { 42 | 43 | if (missing(occ)) {stop("Argument 'occ' needs to be defined.")} 44 | 45 | occ <- na.omit(occ) 46 | 47 | if (method == "random") { 48 | files <- occ_randsplit(occ, train.proportion = train.proportion) 49 | } 50 | 51 | if (save == TRUE) { 52 | names <- paste0(name, c("_joint", "_train", "_test"), ".csv") 53 | wrt <- sapply(1:length(files), function(x){ 54 | write.csv(files[[x]], file = names[x], row.names = FALSE) 55 | }) 56 | } 57 | 58 | return(files) 59 | } 60 | 61 | 62 | #' Split occurrences randomly in training and testing data 63 | #' 64 | #' @description occ_randsplit splits a set of occurrences to obtain training and testing 65 | #' data randomly. 66 | #' 67 | #' @param occ matrix or data.frame with the occurrences to be split. Columns may vary but 68 | #' species, longitude, and latitue are recommended. 69 | #' @param train.proportion (numeric) proportion (from 0 to 1) of data to be used as training 70 | #' occurrences. The remaining data will be used for testing. 71 | #' 72 | #' @return 73 | #' List with all occurrences (joint), training occurrences (train), and testing (test) 74 | #' occurrences. 75 | #' 76 | #' @usage 77 | #' occ_randsplit(occ, train.proportion = 0.5) 78 | #' 79 | #' @export 80 | #' 81 | #' @examples 82 | #' # arguments 83 | #' occs <- read.csv(list.files(system.file("extdata", package = "kuenm"), 84 | #' pattern = "sp_test.csv", full.names = TRUE)) 85 | #' occs <- data.frame(Species = "Species_1", occs) 86 | #' train_prop <- 0.5 87 | #' 88 | #' # running 89 | #' occ_rsplit <- occ_randsplit(occ = occs, train.proportion = train_prop) 90 | 91 | occ_randsplit <- function(occ, train.proportion = 0.5) { 92 | ndata <- nrow(occ) 93 | ids <- sample(ndata, size = round(train.proportion * ndata)) 94 | data <- list(joint = occ, train = occ[ids, ], test = occ[-ids, ]) 95 | 96 | return(data) 97 | } 98 | -------------------------------------------------------------------------------- /man/prepare_swd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prepare_swd.R 3 | \name{prepare_swd} 4 | \alias{prepare_swd} 5 | \title{Prepare data for SWD maxent calibration processes} 6 | \usage{ 7 | prepare_swd(occ, species, longitude, latitude, data.split.method = "random", 8 | train.proportion = 0.5, raster.layers, sample.size = 10000, 9 | var.sets = NULL, min.number = 2, save = FALSE, name.occ, 10 | back.folder, set.seed = 1) 11 | } 12 | \arguments{ 13 | \item{occ}{data.frame containing occurrence records of the species of interest. 14 | Mandatory columns are: species, longitude, and latitude. Other columns will 15 | be ignored.} 16 | 17 | \item{species}{(character) name of column containing species name.} 18 | 19 | \item{longitude}{(character) name of column containing longitude values.} 20 | 21 | \item{latitude}{(character) name of column containing latitude values.} 22 | 23 | \item{data.split.method}{(character) name of the method to split training and 24 | testing records. Default and only option for now = "random".} 25 | 26 | \item{train.proportion}{(numeric) proportion of records to be used for training 27 | models. Default = 0.5} 28 | 29 | \item{raster.layers}{RasterStack of predictor variables masked to the area 30 | where the model will be calibrated.} 31 | 32 | \item{sample.size}{(numeric) number of points to represent the background for 33 | the model. Default = 10000} 34 | 35 | \item{var.sets}{(character or list) if character the only option is "all_comb", 36 | which will prepare the background to obtain all potential combinations of 37 | variables considering the ones in \code{raster.layers}. The minimum number of 38 | variables per set is defied by \code{min.number}. If list, a list 39 | of character vectors with the names of the variables per each set. Names of 40 | variables in sets must match names of layers in \code{raster.layers}. 41 | The default (NULL) produces only one set of variables for the background.} 42 | 43 | \item{min.number}{(numeric) minimum number of variables per set when option 44 | "all_comb" is used in \code{var.sets}. Default = 2.} 45 | 46 | \item{save}{(logical) whether or not to write csv files containing all, train, 47 | and test occurrences, as well as the background. All files will contain 48 | additional columns with the values of the variables for each coordinate. 49 | Default = FALSE.} 50 | 51 | \item{name.occ}{(character) name to be used for files with occurrence records. 52 | Only one name is needed, a sufix will be added to represent all (_join), 53 | _train, and _test records (e.g., "occurrences").} 54 | 55 | \item{back.folder}{name for the csv file containing background coordinates 56 | (e.g., "background").} 57 | 58 | \item{set.seed}{seed to be used when sampling background and splitting records. 59 | Default = 1} 60 | } 61 | \description{ 62 | prepare_swd helps to create csv files containing occurrence 63 | records (all, train, and test records) and background coordinates, together 64 | with values of predictor variables, that later can be used to run model 65 | calibration in Maxent using the SWD format. 66 | } 67 | \examples{ 68 | # data 69 | data("sp_joint", package = "kuenm") 70 | occ <- data.frame(Species = "A_americanum", sp_joint) 71 | 72 | mvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 73 | pattern = "Mbio_", full.names = TRUE)) 74 | 75 | # preparing swd data one set of variables 76 | prep <- prepare_swd(occ, species = "Species", longitude = "Longitude", 77 | latitude = "Latitude", raster.layers = mvars, 78 | sample.size = 5000) 79 | 80 | # various sets of variables 81 | preps <- prepare_swd(occ, species = "Species", longitude = "Longitude", 82 | latitude = "Latitude", raster.layers = mvars, 83 | var.sets = "all_comb", min.number = 3, 84 | sample.size = 5000) 85 | } 86 | -------------------------------------------------------------------------------- /man/kuenm_projchanges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_projchanges.R 3 | \name{kuenm_projchanges} 4 | \alias{kuenm_projchanges} 5 | \title{Detection of changes in models projected in time} 6 | \usage{ 7 | kuenm_projchanges(occ, fmod.stats, threshold = 5, current, time.periods, 8 | emi.scenarios, clim.models, ext.type, 9 | out.dir = "Projection_changes") 10 | } 11 | \arguments{ 12 | \item{occ}{(character) name of the csv file with all the occurrences used to create final models; 13 | columns must be: species, longitude, latitude. In the case of the kuenm package, this must be the 14 | name of the file that was used to create final models.} 15 | 16 | \item{fmod.stats}{(character) the name of the folder in which final models are (i.e., the output 17 | folder after using the \code{\link{kuenm_modstats}}) function.} 18 | 19 | \item{threshold}{(numeric) value from 0 to 100 that will be used as threshold, default = 5.} 20 | 21 | \item{current}{(character) pattern to look for when defining which is the scenario of current 22 | projection. If not defined coparisons will be performed between the calibration area and time 23 | projections defined by the three following arguments.} 24 | 25 | \item{time.periods}{(character or numeric) pattern to be searched when identifying models from 26 | distinct time projections. If not defined it is assumed that one time period was considered.} 27 | 28 | \item{emi.scenarios}{(character) pattern to be searched for identifying distinct emission 29 | scenarios (e.g., RCP numbers). If not defined it is asumed that only one emission scenario 30 | was used.} 31 | 32 | \item{clim.models}{(character) names of that identify climatic models used for project ENMs. 33 | If not defined it is assumed that only one climate model was used.} 34 | 35 | \item{ext.type}{(character) vector of pattern(s) to be searched in the folders inside 36 | \code{fmod.dir} that identify the extrapolation type(s) of model projections. This pattern(s) 37 | need to be clearly distinguishable from the rest of the name of the model folder name. For instance, 38 | capital letter can be used to separate this pattern from the rest of the folder name (e.g., "EC" will 39 | be the patter that denotes extrapolation and clamping in the folder named "M_0.1_F_l_set1_EC").} 40 | 41 | \item{out.dir}{(character) name of the output directory to be created in which subdirectories 42 | containing the results of analyses of changes are. Default = "Projection_changes".} 43 | } 44 | \value{ 45 | Folders named Changes_("pattern" depending on the ext.type) containing raster layers 46 | of the results, which include: changes in suitability, changes in suitable areas, and binary 47 | raster layers of models for all scenarios. All results will be written inside \code{out.dir}. 48 | } 49 | \description{ 50 | kuenm_projchanges performs map algebra operations to represent how and where 51 | models projected in time change compared to the current one. If more than one climate model 52 | (GCM) was used, it gives the degree of agreement among all the GCMs per emission scenario. 53 | } 54 | \details{ 55 | If any of the potential sources of variation is equal to one (e.g., only one parameter, or 56 | only one climate model), this source of variation will not be considered. 57 | 58 | Users must be specific when defining the patterns that the function will search for. This patterns 59 | must be part of the model (raster layer) names so the function can locate each file without problems. 60 | This function uses this system of work to avoid demand of the RAM while perfomring these analyses. 61 | } 62 | \examples{ 63 | # Models statistics should have been calculated before starting. This can be done using the 64 | # kuenm_modstats function. 65 | 66 | # Arguments 67 | occ <- "Sp_occ.csv" 68 | fmod_stats <- "Final_Model_Stats" 69 | thres <- 5 70 | curr <- "current" 71 | emi_scenarios <- c("RCP4.5", "RCP8.5") 72 | c_mods <- c("GCM1", "GCM2") 73 | ext_type <- c("E", "EC", "NE") 74 | out_dir1 <- "Projection_Changes" 75 | 76 | kuenm_projchanges(occ = occ, fmod.stats = fmod_stats, threshold = thres, 77 | current = curr, emi.scenarios = emi_scenarios, 78 | clim.models = c_mods, ext.type = ext_type, out.dir = out_dir1) 79 | } 80 | -------------------------------------------------------------------------------- /man/kuenm_modstats_swd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_modstats_swd.R 3 | \name{kuenm_modstats_swd} 4 | \alias{kuenm_modstats_swd} 5 | \title{Calculation of descriptive statistics of models created in SWD format} 6 | \usage{ 7 | kuenm_modstats_swd(sp.name, fmod.dir, format = "asc", statistics, 8 | proj.scenarios, ext.type, out.dir = "Final_Model_Stats") 9 | } 10 | \arguments{ 11 | \item{sp.name}{(character) name of the species. This name must be the one that 12 | appears as part of the raster file of each model. If results are from Maxent, 13 | this is the name that is in the first column of the csv containing species 14 | occurrence data (species) but changing spaces (if there is any) by underscore.} 15 | 16 | \item{fmod.dir}{(character) the name of the folder in which final models are 17 | (e.g., the output folder after using the \code{\link{kuenm_mod_swd}}) function. 18 | It is important to have only the folders containing the models in this directory. 19 | It can be only one folder or multiple subfolders containing models 20 | for the same species, created with distinct parameter settings. If models were 21 | projected, and distinct types of extrapolation were used, the name of the 22 | folders contained in this directory should include a pattern describing the 23 | type of extrapolation used (e.g., "EC" for extrapolation and clamping in Maxent).} 24 | 25 | \item{format}{(character) format of model raster files. Options are: "asc" or 26 | "tif"; default = "asc".} 27 | 28 | \item{statistics}{(character) vector of descriptive statistics to be calculated. 29 | Options include med = median, mean, min = minimum, max = maximum, range, 30 | sd = standard deviation, and se = standard error. By default c("med", "min", 31 | "max", "range") are calculated, unless a character vector with the desired 32 | statistics is provided.} 33 | 34 | \item{proj.scenarios}{(character) vector of pattern(s) that identify each 35 | projection area (scenario) to which models were projected.} 36 | 37 | \item{ext.type}{(character) vector of pattern(s) to be searched in the folders 38 | inside \code{fmod.dir} that identify the extrapolation type(s) of model 39 | projections. This pattern(s) need to be clearly distinguishable from the rest 40 | of the name of the folder. For instance, capital letter can be used to separate 41 | this pattern from the rest of the folder name (e.g., "EC" will be the patter 42 | that denotes extrapolation and clamping in the folder named "M_0.1_F_l_set1_EC").} 43 | 44 | \item{out.dir}{(character) name of the output directory to be created in which 45 | resulting raster layers of model statistics will be written. 46 | Default = "Final_Model_Stats".} 47 | } 48 | \value{ 49 | Folders named Statistics or Statistics_("pattern" depending on the ext.type) 50 | with all the raster layers of the descriptive statistics for models in 51 | \code{fmod.dir}. Folders will be written inside \code{out.dir}. 52 | } 53 | \description{ 54 | kuenm_modstats_swd calculates raster layers with some descriptive 55 | statistics of all model replicates across multiple parameter settings, for 56 | models created in SWD format and projected to one or multiple projection areas 57 | (scenarios). 58 | } 59 | \details{ 60 | Users must be specific when defining the patterns that the function will search 61 | for. These patterns must be part of the model (raster layer) names so the 62 | function can locate each file without problems. This function uses this system 63 | of work to avoid high demands of the RAM while performing these analyses. 64 | } 65 | \examples{ 66 | # Models should be ready before starting these analyses, for an example of how 67 | to create them see https://github.com/marlonecobos/kuenm 68 | 69 | # Arguments 70 | sp_name <- "sp1" 71 | fmod_dir <- "Final_Models" 72 | format <- "asc" 73 | project <- TRUE 74 | stats <- c("med", "range") 75 | rep <- TRUE 76 | scenarios <- c("current", "GCM1_RCP4.5", "GCM1_RCP8.5", "GCM2_RCP4.5", "GCM2_RCP8.5") 77 | ext_type <- c("E", "EC", "NE") # you can select only one type of extrapolation if needed 78 | out_dir <- "Final_Model_Stats" 79 | 80 | kuenm_modstats_swd(sp.name = sp_name, fmod.dir = fmod_dir, 81 | statistics = stats, proj.scenarios = scenarios, 82 | ext.type = ext_type, out.dir = out_dir) 83 | } 84 | -------------------------------------------------------------------------------- /man/kuenm_modstats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_modstats.R 3 | \name{kuenm_modstats} 4 | \alias{kuenm_modstats} 5 | \title{Calculation of descriptive statistics of models} 6 | \usage{ 7 | kuenm_modstats(sp.name, fmod.dir, format = "asc", project, statistics, replicated, 8 | proj.scenarios, ext.type, out.dir = "Final_Model_Stats") 9 | } 10 | \arguments{ 11 | \item{sp.name}{(character) name of the species. This name must be the one that appears as part 12 | of the raster file of each model. If results are from Maxent, this is the name that 13 | is in the first column of the csv containing species occurrence data (species) but changing spaces 14 | (if there is any) by underscore.} 15 | 16 | \item{fmod.dir}{(character) the name of the folder in which final models are (e.g., the output 17 | folder after using the \code{\link{kuenm_mod}}) function. It is important to have only the folders 18 | containing the models in this directory. It can be only one folder or multiple subfolders containing models 19 | for the same species, created with distinct parameter settings. If models were projected, and the 20 | distinct types of extrapolation were used, the name of the folders contained in this directory should 21 | include a pattern describing the type of extrapolation used (e.g., "EC" for extrapolation and 22 | clamping in Maxent).} 23 | 24 | \item{format}{(character) format of model raster files. Options are: "asc" or "tif"; default = "asc".} 25 | 26 | \item{project}{(logical) if TRUE, it is assumed that models were projected to other scenarios.} 27 | 28 | \item{statistics}{(character) vector of descriptive statistics to be calculated. Options include 29 | med = median, mean, min = minimum, max = maximum, range, sd = standard deviation, and se = standard error. 30 | By default c("med", "min", "max", "range") are calculated, unless a character vector with the desired 31 | statistics is provided.} 32 | 33 | \item{replicated}{(logical) whether or not final models were created performing replicates.} 34 | 35 | \item{proj.scenarios}{(character) valid if \code{project} = TRUE, vector of pattern(s) that identify 36 | each projection area (scenario) to which models were projected.} 37 | 38 | \item{ext.type}{(character) valid if \code{project} = TRUE, vector of pattern(s) to be searched in the 39 | folders inside \code{fmod.dir} that identify the extrapolation type(s) of model projections. This pattern(s) 40 | need to be clearly distinguishable from the rest of the name of the folder. For instance, 41 | capital letter can be used to separate this pattern from the rest of the folder name (e.g., "EC" will be 42 | the patter that denotes extrapolation and clamping in the folder named "M_0.1_F_l_set1_EC").} 43 | 44 | \item{out.dir}{(character) name of the output directory to be created in which 45 | resulting raster layers of model statistics will be written. Default = "Final_Model_Stats".} 46 | } 47 | \value{ 48 | Folders named Statistics or Statistics_("pattern" depending on the ext.type) 49 | with all the raster layers of the descriptive statistics for models in \code{fmod.dir}. 50 | Folders will be written inside \code{out.dir}. 51 | } 52 | \description{ 53 | kuenm_modstats calculates raster layers with some descriptive statistics of all 54 | model replicates across multiple parameter settings. All of this, discriminating among models 55 | transferred to distinct projection areas (scenarios). 56 | } 57 | \details{ 58 | Users must be specific when defining the patterns that the function will search for. These patterns 59 | must be part of the model (raster layer) names so the function can locate each file without problems. 60 | This function uses this system of work to avoid high demands of the RAM while performing these analyses. 61 | } 62 | \examples{ 63 | # Models should be ready before starting these analyses, for an example of how to create them see 64 | # https://github.com/marlonecobos/kuenm 65 | 66 | # Arguments 67 | sp_name <- "sp1" 68 | fmod_dir <- "Final_Models" 69 | format <- "asc" 70 | project <- TRUE 71 | stats <- c("med", "range") 72 | rep <- TRUE 73 | scenarios <- c("current", "GCM1_RCP4.5", "GCM1_RCP8.5", "GCM2_RCP4.5", "GCM2_RCP8.5") 74 | ext_type <- c("E", "EC", "NE") # you can select only one type of extrapolation if needed 75 | out_dir <- "Final_Model_Stats" 76 | 77 | kuenm_modstats(sp.name = sp_name, fmod.dir = fmod_dir, format = format, project = project, 78 | statistics = stats, replicated = rep, proj.scenarios = scenarios, 79 | ext.type = ext_type, out.dir = out_dir) 80 | } 81 | -------------------------------------------------------------------------------- /inst/extdata/Rmd_calibration.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "kuenm: calibration results" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_depth: 4 7 | --- 8 | 9 | ```{r setup, include=FALSE} 10 | knitr::opts_chunk$set(echo = TRUE) 11 | ``` 12 | 13 |
14 | 15 | ### Brief description of the model calibration and selection process 16 | 17 | ```{r, echo=FALSE} 18 | st4 <- read.csv("calibration_results.csv") 19 | sett <- as.character(st4[,1]) 20 | setts <- strsplit(sett, split = "_") 21 | rm <- vector() 22 | for (i in 1:length(setts)) { 23 | rm[i] <- setts[[i]][2] 24 | } 25 | f.clas <- vector() 26 | for (i in 1:length(setts)) { 27 | f.clas[i] <- setts[[i]][4] 28 | } 29 | var.di <- vector() 30 | for (i in 1:length(setts)) { 31 | var.di[i] <- paste(setts[[i]][5:length(setts[[i]])], collapse = "_") 32 | } 33 | rm1 <- paste(unique(rm), collapse = ", ") 34 | f.clas1 <- paste(unique(f.clas), collapse = ", ") 35 | var.di1 <- paste(unique(var.di), collapse = ", ") 36 | para <- rbind(rm1, f.clas1, var.di1) 37 | ``` 38 | 39 | This is the final report of the evaluation of candidate models during calibration implemented in kuenm. 40 | 41 | In all, `r length(st4[,1])` candidate models, with parameters reflecting all combinations of `r length(unique(rm))` regularization multiplier settings, `r length(unique(f.clas))` feature class combinations, and `r length(unique(var.di))` distinct sets of environmental variables, have been evaluated. Model performance was evaluated based on statistical significance (Partial ROC), omission rates (OR), and the Akaike information criterion corrected for small sample sizes (AICc). 42 | 43 | ```{r para, echo=FALSE} 44 | colnames(para) <- "Parameters" 45 | row.names(para) <- c("Regularization multipliers", "Feature classes", "Sets of predictors") 46 | knitr::kable(para, digits=c(0,0), row.names = TRUE, caption = "Table 1. Parameters used to produce candidate models.") 47 | ``` 48 | 49 |
50 | 51 | All the results presented below can be found in the folder where outputs from model calibration were written. 52 | 53 |
54 |
55 | 56 | ### Model calibration statistics 57 | 58 | In the following table, information about how many models met the three selection criteria is presented. 59 | 60 | ```{r, echo=FALSE} 61 | st <- read.csv("calibration_stats.csv") 62 | colnames(st) <- c("Criteria", "Number_of_models") 63 | knitr::kable(st, digits=c(0,0), caption = "Table 2. General statistics of models that met distinct criteria.") 64 | ``` 65 | 66 |
67 |
68 | 69 | ### Models selected according to user-defined criteria 70 | 71 | The following table contains the models selected according to the user's pre-defined criteria. 72 | 73 | Note that if the selection criteria was "OR_AICc" (statistically significant models with omission rates below a predefined *E*, and among them those with lower AICc values), delta AICc values were recalculated only among models meeting the significance and omission rate criteria. 74 | 75 | ```{r, echo=FALSE} 76 | st1 <- read.csv("selected_models.csv") 77 | colnames(st1) <- c("Model", "Mean_AUC_ratio", "Partial_ROC", gsub("[.]", "%", colnames(st1)[4]), "AICc", "Delta_AICc", "W_AICc", "N_parameters") 78 | knitr::kable(st1, digits = c(0, 3, 3, 3, 3, 3, 3, 0), 79 | caption = "Table 3. Performance statistics for models selected based on the user's pre-defined critera.") 80 | ``` 81 | 82 |
83 |
84 | 85 | ### Model performance plot 86 | 87 | The figure below shows the position of the selected models in the distribution of all candidate models in terms of omission rates and AICc values. 88 | 89 | ![Figure 1. Distribution of all models, non-statistically significant models, and selected models in terms of AICc and omission rate values.](calibration_figure.png){width=60%} 90 | 91 |
92 |
93 | 94 | ### Performance statistics for all models 95 | 96 | Following the statistics of performance for all candidate models (a sample if more than 500 models) are presented. See file calibration_results.csv for an editable file with of results for all candidate models. 97 | 98 | ```{r, echo=FALSE} 99 | st4 <- read.csv("calibration_results.csv") 100 | if (dim(st4)[1] > 500) { 101 | st4 <- st4[1:500, ] 102 | } 103 | colnames(st4) <- c("Model", "Mean_AUC_ratio", "Partial_ROC", gsub("[.]", "%", colnames(st4)[4]), "AICc", "Delta_AICc", "W_AICc", "N_parameters") 104 | knitr::kable(st4, digits = c(0, 3, 3, 3, 3, 3, 3, 0), 105 | caption = "Table 4. Performance statistics for candidate models.") 106 | ``` 107 | -------------------------------------------------------------------------------- /man/kuenm_ceval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_ceval.R 3 | \name{kuenm_ceval} 4 | \alias{kuenm_ceval} 5 | \title{Evaluation of candidate Maxent models during calibration} 6 | \usage{ 7 | kuenm_ceval(path, occ.joint, occ.tra, occ.test, batch, out.eval, 8 | threshold = 5, rand.percent = 50, iterations = 500, 9 | kept = TRUE, selection = "OR_AICc", parallel.proc = FALSE) 10 | } 11 | \arguments{ 12 | \item{path}{(character) directory in which folders containig calibration models are being created 13 | or were created.} 14 | 15 | \item{occ.joint}{(character) the name of csv file with training and testing occurrences combined; 16 | columns must be: species, longitude, latitude.} 17 | 18 | \item{occ.tra}{(character) the name of the csv file with the training occurrences; 19 | columns as in occ.joint.} 20 | 21 | \item{occ.test}{(character) the name of the csv file with the evaluation occurrences; 22 | columns as in occ.joint.} 23 | 24 | \item{batch}{(character) name of the batch file (bash for Unix) with the code to create all candidate models 25 | for calibration.} 26 | 27 | \item{out.eval}{(character) name of the folder where evaluation results will be written.} 28 | 29 | \item{threshold}{(numeric) the percentage of training data omission error allowed (E); default = 5.} 30 | 31 | \item{rand.percent}{(numeric) the percentage of data to be used for the bootstraping process 32 | when calculating partial ROCs; default = 50.} 33 | 34 | \item{iterations}{(numeric) the number of times that the bootstrap is going to be repeated; 35 | default = 500.} 36 | 37 | \item{kept}{(logical) if FALSE, all candidate models will be erased after evaluation, default = TRUE.} 38 | 39 | \item{selection}{(character) model selection criterion, can be "OR_AICc", "AICc", or "OR"; 40 | OR = omission rates. Default = "OR_AICc", which means that among models that are statistically significant 41 | and that present omission rates below the \code{threshold}, those with delta AICc up to 2 will be 42 | selected. See details for other selection criteria.} 43 | 44 | \item{parallel.proc}{(logical) if TRUE, pROC calculations will be performed in parallel using the available 45 | cores of the computer. This will demand more RAM and almost full use of the CPU; hence, its use 46 | is more recommended in high-performance computers. Using this option will speed up the analyses 47 | only if models are large RasterLayers or if \code{iterations} are more than 5000. Default = FALSE.} 48 | } 49 | \value{ 50 | A list with three dataframes containing results from the calibration process and a scatterplot 51 | of all models based on the AICc values and omission rates. In addition, a folder, in the 52 | working directory, containing a csv file with information about models meeting the user-defined 53 | selection criterion, another csv file with a summary of the evaluation and selection process, 54 | an extra csv file containing all the statistics of model performance (pROC, AICc, and omission 55 | rates) for all candidate models, a png scatterplot of all models based on the AICc values and 56 | rates, and an HTML file sumarizing all the information produced after evaluation for helping with 57 | further interpretation. 58 | } 59 | \description{ 60 | kuenm_ceval evaluates candidate models in terms of statistical 61 | significance (partial ROC), prediction ability (omission rates), and model complexity (AICc). 62 | After evaluation, this function selects the best models based on user-defined criteria. 63 | } 64 | \details{ 65 | This function is used after or during the creation of Maxent candidate models for calibration. 66 | 67 | Other selecton criteria are described below: 68 | If "AICc" criterion is chosen, all significant models with delta AICc up to 2 will be selected 69 | If "OR" is chosen, the 10 first significant models with the lowest omission rates will be selected. 70 | } 71 | \examples{ 72 | # To run this function the kuenm_cal function needs te be used first. This previous function will 73 | # create the models that kuenm_ceval evaluates. 74 | 75 | # Variables with information to be used as arguments. 76 | occ_joint <- "aame_joint.csv" 77 | occ_tra <- "aame_train.csv" 78 | batch_cal <- "Candidate_models" 79 | out_dir <- "Candidate_Models" 80 | occ_test <- "aame_test.csv" 81 | out_eval <- "Calibration_results" 82 | threshold <- 5 83 | rand_percent <- 50 84 | iterations <- 100 85 | kept <- TRUE 86 | selection <- "OR_AICc" 87 | paral_proc <- FALSE # make this true to perform pROC calculations in parallel 88 | 89 | cal_eval <- kuenm_ceval(path = out_dir, occ.joint = occ_joint, occ.tra = occ_tra, occ.test = occ_test, batch = batch_cal, 90 | out.eval = out_eval, threshold = threshold, rand.percent = rand_percent, iterations = iterations, 91 | kept = kept, selection = selection, parallel.proc = paral_proc) 92 | } 93 | -------------------------------------------------------------------------------- /R/to_closest.R: -------------------------------------------------------------------------------- 1 | #' Move occurrences to closest pixel with environmental data 2 | #' 3 | #' @description kuenm_toclosest helps in changing the longitude and latitude values 4 | #' of occurrences with no environmental data, so they move to the closest pixel 5 | #' of a raster layer that contains relevant information. This process prevents 6 | #' NAs in future analyses. 7 | #' 8 | #' @param data data.frame or matrix of occurrence records. Columns must include 9 | #' longitude and latitude. Other columns are optional and wont be changed. 10 | #' @param longitude (character) name of the column with longitude data. 11 | #' @param latitude (character) name of the column with latitude data. 12 | #' @param raster.layer RasterLayer to be used as a reference. 13 | #' @param limit.distance (numeric) maximun distance in km at which an occurrence 14 | #' could be to be moved. Records farther than this distance wont be moved. 15 | #' 16 | #' @return 17 | #' A data.frame with the corrected coordinates and four additional columns. 18 | #' The first of the new columns indicates the condition of the coordinates: 19 | #' Correct, if it was not moved because it was on a pixel with data; Moved, if 20 | #' it was moved to the nearest pixel; and Not_moved, if it was not moved because 21 | #' the occurrence was farther than the \code{limit_distance} to the closest pixel. 22 | #' The second new column indicates the distance to the closest pixel with data. 23 | #' The other two additional columns will contain the initial longitudes and 24 | #' latitudes. 25 | #' 26 | #' @export 27 | #' 28 | #' @examples 29 | #' data("sp_test", package = "kuenm") 30 | #' 31 | #' var <- raster::raster(list.files(system.file("extdata", package = "kuenm"), 32 | #' pattern = "Mbio_", full.names = TRUE)[1]) 33 | #' 34 | #' raster::plot(var) 35 | #' 36 | #' out <- rbind(c(-103, 27), c(-90, 26.5), c(-109, 40), c(-70, 41)) 37 | #' colnames(out) <- colnames(sp_test) 38 | #' 39 | #' data <- rbind.data.frame(sp_test, out) 40 | #' 41 | #' points(data) 42 | #' 43 | #' data1 <- kuenm_toclosest(data, longitude = "Longitude", latitude = "Latitude", 44 | #' raster.layer = var, limit.distance = 200) 45 | #' 46 | #' points(data1[, 1:2], col = "red") 47 | 48 | kuenm_toclosest <- function(data, longitude, latitude, raster.layer, limit.distance) { 49 | # detecting potential errors 50 | if (missing(data)) { 51 | stop("Argument data is necessary to perform the analysis") 52 | } 53 | if (missing(longitude)) { 54 | stop("Argument longitude is not defined.") 55 | } 56 | if (missing(latitude)) { 57 | stop("Argument latitude is not defined.") 58 | } 59 | if (missing(raster.layer)) { 60 | stop("Argument raster_layer is not defined.") 61 | } 62 | if (missing(limit.distance)) { 63 | stop("Argument limit.distance is not defined.") 64 | } 65 | 66 | # preparing data 67 | xy <- data[, c(longitude, latitude)] 68 | vals <- raster::extract(raster.layer, xy) 69 | 70 | tomove <- which(is.na(vals)) 71 | xyout <- xy[tomove, ] 72 | 73 | if (nrow(xyout) > 0) { 74 | xyras <- raster::rasterToPoints(raster.layer)[, 1:2] 75 | 76 | dists <- raster::pointDistance(xyout, xyras, lonlat = TRUE) 77 | 78 | condition <- rep("Correct", nrow(data)) 79 | distss <- rep(0, nrow(data)) 80 | 81 | limdist <- limit.distance * 1000 82 | 83 | # running process 84 | cat("\nMoving occurrences to closest pixels:\n") 85 | if (class(xyout)[1] %in% c("matrix", "data.frame")) { 86 | no <- nrow(xyout) 87 | } else { 88 | no <- length(xyout) 89 | } 90 | 91 | for (i in 1:no) { 92 | if (class(xyout)[1] %in% c("matrix", "data.frame")) { 93 | mindis <- min(dists[i, ]) 94 | } else { 95 | mindis <- min(dists[i]) 96 | } 97 | 98 | if (mindis <= limdist) { 99 | if (class(xyout)[1] %in% c("matrix", "data.frame")) { 100 | xyin <- xyras[dists[i, ] == mindis, ] 101 | } else { 102 | xyin <- xyras[dists[i] == mindis, ] 103 | } 104 | 105 | if (class(xyin)[1] %in% c("matrix", "data.frame")) { 106 | xyin <- xyin[1, ] 107 | } 108 | data[tomove[i], longitude] <- xyin[1] 109 | data[tomove[i], latitude] <- xyin[2] 110 | condition[tomove[i]] <- "Moved" 111 | distss[tomove[i]] <- mindis / 1000 112 | cat("\tOccurrence", i, "of", nrow(xyout), "moved\n") 113 | } else { 114 | condition[tomove[i]] <- "Not_moved" 115 | distss[tomove[i]] <- mindis / 1000 116 | cat(paste0("\tOccurrence ", i," of ", nrow(xyout), " was not moved because it is more than\n\t", 117 | limit.distance, " km apart from the closest pixel with environmental values\n")) 118 | } 119 | } 120 | data <- data.frame(data, condition = condition, distance_km = distss, 121 | initial_lon = xy[, 1], initial_lat = xy[, 2], 122 | stringsAsFactors = FALSE) 123 | } else { 124 | cat("No occurrence is out of the area of the raster layer of reference with values.") 125 | } 126 | 127 | 128 | 129 | return(data) 130 | } 131 | -------------------------------------------------------------------------------- /man/kuenm_hierpart.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_hierpart.R 3 | \name{kuenm_hierpart} 4 | \alias{kuenm_hierpart} 5 | \title{Hierarchical partition of the variance coming from distinct sources in ENMs} 6 | \usage{ 7 | kuenm_hierpart(sp.name, fmod.dir, is.swd, format = "ascii", replicated, project, 8 | current = NULL, time.periods = NULL, emi.scenarios = NULL, 9 | clim.models = NULL, ext.type, iterations = 100, 10 | sample.size = 1000, set.seed = 1, keep.tables = FALSE, 11 | factors.col = NULL, out.dir, verbose = TRUE) 12 | } 13 | \arguments{ 14 | \item{sp.name}{(character) name of the species. This name must be the one 15 | that appears as part of the raster file of each model replicate. If results 16 | are from Maxent, this is the name that is in the first column of the csv 17 | containing species occurrence data (species) but spaces replaced by "_".} 18 | 19 | \item{fmod.dir}{(character) name of the folder where all models are (e.g., 20 | the output folder after using the \code{\link{kuenm_mod}}) function.} 21 | 22 | \item{is.swd}{(logical) whether model calibration and final models were 23 | produced using SWD format.} 24 | 25 | \item{format}{(character) format of model raster files. Options are "ascii", 26 | "GTiff", and "EHdr" = bil. Default = "ascii".} 27 | 28 | \item{replicated}{(logical) whether or not models were created with 29 | replicates.} 30 | 31 | \item{project}{(logical) if TRUE, it is assumed that models were projected 32 | to other scenarios. These scenarios can be current (projections in space), 33 | and/or future or past (projections in time).} 34 | 35 | \item{current}{(character) pattern to look for when defining which is the 36 | raster file representing current projections. If NULL, results will be 37 | produced for the area of calibration, and if any of \code{time.periods}, 38 | \code{clim.models}, or \code{emi.scenarios} is defined, results will be 39 | be produced for these variance sources as well.} 40 | 41 | \item{time.periods}{(character) pattern to be searched to identify model 42 | projections to distinct time periods. If NULL, the default, it is assumed 43 | that only one time period was considered.} 44 | 45 | \item{emi.scenarios}{(character) pattern to be searched to identify 46 | distinct emission scenarios (e.g., "recp45"). If NULL, the default, it is 47 | assumed that only one emission scenario was used. Therefore, this source of 48 | variation will not be considered.} 49 | 50 | \item{clim.models}{(character) names that identify climatic models used for 51 | project ENMs. If NULL, the default, it is assumed that only one climate model 52 | was used. Therefore, this source of variation will not be considered.} 53 | 54 | \item{ext.type}{(character) pattern(s) to be searched in the folders inside 55 | \code{fmod.dir} that identify the extrapolation type(s) used in model 56 | projections. This pattern(s) needs to be clearly distinguishable from the 57 | other parts of the name of the folder name containing the model. For instance, 58 | "EC" will be the patter that denotes extrapolation and clamping in the folder 59 | named "M_0.1_F_l_set1_EC".} 60 | 61 | \item{iterations}{(numeric) number of iterations to be performed in the 62 | hierarchical partitioning analysis. Default = 100.} 63 | 64 | \item{sample.size}{(numeric) number of pixels to be sampled per each model. 65 | Default = 1000. Increasing this number is recommended when the number of 66 | models and the computer features allow it.} 67 | 68 | \item{set.seed}{(numeric) initial seed to be set before running analysis.} 69 | 70 | \item{keep.tables}{(logical) if TRUE, tables that are written in 71 | \code{out.dir} for each iteration of the hierarchical partitioning analyses 72 | are kept. Default = FALSE.} 73 | 74 | \item{factors.col}{a vector of colors for the bars to be plotted; if not 75 | defined, a gray color palette is used.} 76 | 77 | \item{out.dir}{(character) name of the output directory to be created 78 | where results of the hierarchical partitioning analysis will be written.} 79 | 80 | \item{verbose}{(logical) whether to print messages; default = TRUE.} 81 | } 82 | \value{ 83 | The function returns a list containing the summary of total effects of 84 | factors on variance contained in the models (mean and confidence intervals 85 | of total effects). A plot of these values is also returned. 86 | 87 | Other results are written in \code{out.dir}. Folders named Variation or 88 | HP_results_(EC, NE, and/or E, depending on \code{ext.type}) containing 89 | csv files with the results of the hierarchical partitioning analyses an a 90 | plot summarizing the total effects of the sources of variation on the 91 | variance in the models. 92 | } 93 | \description{ 94 | kuenm_hierpart has been deprecated for the moment. A future, 95 | more complete version will be available soon. 96 | } 97 | \details{ 98 | If the length of any of the potential sources of variation is equal to one 99 | (e.g., only one parameter, or only one climate model), this source of 100 | variation will not be considered. 101 | 102 | Users must be specific when defining the patterns that the function will 103 | search for. These patterns must be part of the raster file names of the 104 | models so the function can locate each file without problems. 105 | 106 | Error whiskers in resulting plots represent the 95\% Confidence Interval of 107 | the mean. This interval is calculated using a bootstrap approach. 108 | } 109 | -------------------------------------------------------------------------------- /man/kuenm_modvar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kuenm_modvar.R 3 | \name{kuenm_modvar} 4 | \alias{kuenm_modvar} 5 | \title{Prediction variance coming from distinct sources in ENMs} 6 | \usage{ 7 | kuenm_modvar(sp.name, fmod.dir, is.swd, replicated, format = "asc", project, 8 | current, time.periods, emi.scenarios, clim.models, ext.type, 9 | split.length = 100, out.dir = "Variation_from_sources") 10 | } 11 | \arguments{ 12 | \item{sp.name}{(character) name of the species. This name must be the one 13 | that appears as part of the raster file of each model replicate. If results 14 | are from Maxent, this is the name that is in the first column of the csv 15 | containing species occurrence data (species) but excluding spaces.} 16 | 17 | \item{fmod.dir}{(character) the name of the folder in which final models 18 | are (i.e., the output folder after using the \code{\link{kuenm_mod}}) function.} 19 | 20 | \item{is.swd}{(logical) whether model calibration and final models were 21 | produced using SWD format.} 22 | 23 | \item{replicated}{(logical) whether or not final models were created performing 24 | replicates.} 25 | 26 | \item{format}{(character) format of model raster files. Options are: "asc" 27 | or "tif"; default = "asc".} 28 | 29 | \item{project}{(logical) if TRUE, assumes that models were projected to other 30 | scenarios. These scenarios can be current (projections in space), and/or 31 | future or past (projections in time).} 32 | 33 | \item{current}{(character) pattern to look for when defining which is the 34 | scenario of current projection. If not defined variance maps will be produced 35 | for the area of calibration, and if #' any of \code{time.periods}, 36 | \code{clim.models}, or \code{emi.scenarios} exist, variance maps will be 37 | produced for these layers as well.} 38 | 39 | \item{time.periods}{(character or numeric) pattern to be searched when 40 | identifying models from distinct time projections. If not defined it is 41 | assumed that one time period was considered.} 42 | 43 | \item{emi.scenarios}{(character) pattern to be searched for identifying 44 | distinct emission scenarios (e.g., RCP numbers). If not defined, it is 45 | assumed that only one emission scenario was used. Therefore, this source of 46 | variation will not be considered.} 47 | 48 | \item{clim.models}{(character) names that identify climatic models used to 49 | project ENMs. If not defined it is assumed that only one climate model was 50 | used. Therefore, this source of variation will not be considered.} 51 | 52 | \item{ext.type}{(character) valid if \code{project} = TRUE, vector of 53 | pattern(s) to be searched in the folders inside \code{fmod.dir} that identify 54 | the extrapolation type(s) of model projections. This pattern(s) need to be 55 | clearly distinguishable from the rest of the name of the model folder name. 56 | For instance, capital letter can be used to separate this pattern from the 57 | rest of the folder name (e.g., "EC" will be the patter that denotes 58 | extrapolation and clamping in the folder named "M_0.1_F_l_set1_EC").} 59 | 60 | \item{split.length}{(numeric) limit number of models to be processed at the 61 | time. Bigger numbers would demand more from the RAM. Default = 100.} 62 | 63 | \item{out.dir}{(character) name of the output directory to be created in 64 | which subdirectories containing raster layers of model variance will be 65 | written. Default = "Variation_from_sources".} 66 | } 67 | \value{ 68 | Folders named Variation or Variation_("pattern" depending on the ext.type) 69 | containing subdirectories named according to where/when models were projected. 70 | Inside this folder, raster layers of variance coming from distinct sources. 71 | All results will be written inside \code{out.dir}. 72 | } 73 | \description{ 74 | kuenm_modvar calculates the variance in model predictions 75 | distinguishing the source from which this is coming. In this version 76 | potential sources of variation are: replicates, parameterizations, general 77 | circulation models (GCMs), and emission scenarios. The last two considered 78 | only when projections in time are performed. 79 | } 80 | \details{ 81 | If any of the potential sources of variation is equal to one (e.g., only one 82 | parameter, or only one climate model), this source of variation will not be 83 | considered. 84 | 85 | Users must be specific when defining the patterns that the function will 86 | search for. This patterns must be part of the model (raster layer) names so 87 | the function can locate each file without problems. This function uses this 88 | system of work to avoid high demands of the RAM while perfomring these 89 | analyses. 90 | } 91 | \examples{ 92 | # Models should be ready before starting these analyses, for an example of 93 | # how to create them see https://github.com/marlonecobos/kuenm 94 | 95 | # Arguments 96 | sp_name <- "sp1" 97 | fmod_dir <- "Final_Models" 98 | is_swd <- FALSE 99 | rep <- TRUE 100 | format <- "asc" 101 | project <- TRUE 102 | curr <- "current" 103 | emi_scenarios <- c("RCP4.5", "RCP8.5") 104 | c_mods <- c("GCM1", "GCM2") 105 | ext_type <- c("E", "EC", "NE") 106 | split <- 100 107 | out_dir2 <- "Variation_from_sources" 108 | 109 | kuenm_modvar(sp.name = sp_name, fmod.dir = fmod_dir, is.swd = is_swd, 110 | replicated = rep, format = format, project = project, 111 | current = curr, emi.scenarios = emi_scenarios, 112 | clim.models = c_mods, ext.type = ext_type, split.length = split, 113 | out.dir = out_dir2) 114 | } 115 | -------------------------------------------------------------------------------- /R/kuenm_hierpart.R: -------------------------------------------------------------------------------- 1 | #' Hierarchical partition of the variance coming from distinct sources in ENMs 2 | #' 3 | #' @description kuenm_hierpart has been deprecated for the moment. A future, 4 | #' more complete version will be available soon. 5 | #' 6 | #' @param sp.name (character) name of the species. This name must be the one 7 | #' that appears as part of the raster file of each model replicate. If results 8 | #' are from Maxent, this is the name that is in the first column of the csv 9 | #' containing species occurrence data (species) but spaces replaced by "_". 10 | #' @param fmod.dir (character) name of the folder where all models are (e.g., 11 | #' the output folder after using the \code{\link{kuenm_mod}}) function. 12 | #' @param is.swd (logical) whether model calibration and final models were 13 | #' produced using SWD format. 14 | #' @param format (character) format of model raster files. Options are "ascii", 15 | #' "GTiff", and "EHdr" = bil. Default = "ascii". 16 | #' @param replicated (logical) whether or not models were created with 17 | #' replicates. 18 | #' @param project (logical) if TRUE, it is assumed that models were projected 19 | #' to other scenarios. These scenarios can be current (projections in space), 20 | #' and/or future or past (projections in time). 21 | #' @param current (character) pattern to look for when defining which is the 22 | #' raster file representing current projections. If NULL, results will be 23 | #' produced for the area of calibration, and if any of \code{time.periods}, 24 | #' \code{clim.models}, or \code{emi.scenarios} is defined, results will be 25 | #' be produced for these variance sources as well. 26 | #' @param time.periods (character) pattern to be searched to identify model 27 | #' projections to distinct time periods. If NULL, the default, it is assumed 28 | #' that only one time period was considered. 29 | #' @param emi.scenarios (character) pattern to be searched to identify 30 | #' distinct emission scenarios (e.g., "recp45"). If NULL, the default, it is 31 | #' assumed that only one emission scenario was used. Therefore, this source of 32 | #' variation will not be considered. 33 | #' @param clim.models (character) names that identify climatic models used for 34 | #' project ENMs. If NULL, the default, it is assumed that only one climate model 35 | #' was used. Therefore, this source of variation will not be considered. 36 | #' @param ext.type (character) pattern(s) to be searched in the folders inside 37 | #' \code{fmod.dir} that identify the extrapolation type(s) used in model 38 | #' projections. This pattern(s) needs to be clearly distinguishable from the 39 | #' other parts of the name of the folder name containing the model. For instance, 40 | #' "EC" will be the patter that denotes extrapolation and clamping in the folder 41 | #' named "M_0.1_F_l_set1_EC". 42 | #' @param iterations (numeric) number of iterations to be performed in the 43 | #' hierarchical partitioning analysis. Default = 100. 44 | #' @param sample.size (numeric) number of pixels to be sampled per each model. 45 | #' Default = 1000. Increasing this number is recommended when the number of 46 | #' models and the computer features allow it. 47 | #' @param set.seed (numeric) initial seed to be set before running analysis. 48 | #' @param keep.tables (logical) if TRUE, tables that are written in 49 | #' \code{out.dir} for each iteration of the hierarchical partitioning analyses 50 | #' are kept. Default = FALSE. 51 | #' @param factors.col a vector of colors for the bars to be plotted; if not 52 | #' defined, a gray color palette is used. 53 | #' @param out.dir (character) name of the output directory to be created 54 | #' where results of the hierarchical partitioning analysis will be written. 55 | #' @param verbose (logical) whether to print messages; default = TRUE. 56 | #' 57 | #' @return 58 | #' The function returns a list containing the summary of total effects of 59 | #' factors on variance contained in the models (mean and confidence intervals 60 | #' of total effects). A plot of these values is also returned. 61 | #' 62 | #' Other results are written in \code{out.dir}. Folders named Variation or 63 | #' HP_results_(EC, NE, and/or E, depending on \code{ext.type}) containing 64 | #' csv files with the results of the hierarchical partitioning analyses an a 65 | #' plot summarizing the total effects of the sources of variation on the 66 | #' variance in the models. 67 | #' 68 | #' @details 69 | #' If the length of any of the potential sources of variation is equal to one 70 | #' (e.g., only one parameter, or only one climate model), this source of 71 | #' variation will not be considered. 72 | #' 73 | #' Users must be specific when defining the patterns that the function will 74 | #' search for. These patterns must be part of the raster file names of the 75 | #' models so the function can locate each file without problems. 76 | #' 77 | #' Error whiskers in resulting plots represent the 95% Confidence Interval of 78 | #' the mean. This interval is calculated using a bootstrap approach. 79 | #' 80 | #' @usage 81 | #' kuenm_hierpart(sp.name, fmod.dir, is.swd, format = "ascii", replicated, project, 82 | #' current = NULL, time.periods = NULL, emi.scenarios = NULL, 83 | #' clim.models = NULL, ext.type, iterations = 100, 84 | #' sample.size = 1000, set.seed = 1, keep.tables = FALSE, 85 | #' factors.col = NULL, out.dir, verbose = TRUE) 86 | #' 87 | #' @export 88 | 89 | 90 | kuenm_hierpart <- function(sp.name, fmod.dir, is.swd, format = "ascii", replicated, 91 | project, current = NULL, time.periods = NULL, 92 | emi.scenarios = NULL, clim.models = NULL, ext.type, 93 | iterations = 100, sample.size = 1000, set.seed = 1, 94 | keep.tables = FALSE, factors.col = NULL, 95 | out.dir, verbose = TRUE) { 96 | 97 | stop("The function 'kuenm_hierpart' has been excluded from 'kuenm' for the moment.") 98 | } 99 | 100 | -------------------------------------------------------------------------------- /R/model_var_contrib.R: -------------------------------------------------------------------------------- 1 | #' Evaluation of variable contribution to Maxent final models 2 | #' 3 | #' @description model_var_contrib helps to explore variable contribution of 4 | #' Maxent models created as final models with the functions \code{\link{kuenm_mod}} 5 | #' or \code{\link{kuenm_mod_swd}}. Variable contribution is measured based on 6 | #' metrics of contribution percentage, permutation importance, and, if existent, 7 | #' a jackknife analysis. 8 | #' 9 | #' @param fmod.dir (character) the name of the folder in which final models are 10 | #' (e.g., the output folder after using the \code{\link{kuenm_mod}}) function. 11 | #' It is important to have only the folders containing the models in this 12 | #' directory. It can be only one folder or multiple subfolders containing models 13 | #' for the same species, created with distinct parameter settings. If models were 14 | #' projected, and the distinct types of extrapolation were used, the name of the 15 | #' folders contained in this directory should include a pattern describing the 16 | #' type of extrapolation used (e.g., "EC" for extrapolation and clamping in 17 | #' Maxent). 18 | #' @param model_name (character) pattern to be searched when finding the model of 19 | #' interest. This pattern does not include the pattern of \code{ext.type}. By 20 | #' default, NULL, all models are considered. 21 | #' @param project (logical) if TRUE, it is assumed that models were projected to 22 | #' other scenarios (this must be always true if models were produced in SWD 23 | #' format). 24 | #' @param ext.type (character) vector of pattern(s) to be searched in the 25 | #' folders inside \code{fmod.dir} that identify the extrapolation type(s) of 26 | #' model projections of interest (e.g., "E", "EC", "NE", or a vector of more 27 | #' than one of them). Ignored if \code{project} = FALSE. 28 | #' 29 | #' @return 30 | #' A list with results of variable contribution, permutation importance, and 31 | #' jackknife results. If multiple models are evaluated, a nested list with results 32 | #' for all models is returned. 33 | #' 34 | #' @details 35 | #' When models are created with replicates, the values returned correspond to the 36 | #' average of such replicates. 37 | #' 38 | #' @usage 39 | #' model_var_contrib(fmod.dir, model_name = NULL, project, ext.type) 40 | #' 41 | #' @export 42 | 43 | 44 | model_var_contrib <- function(fmod.dir, model_name = NULL, project, ext.type) { 45 | # tests 46 | if (missing(fmod.dir)) { 47 | stop("Argument fmod.dir needs to be defined.") 48 | } 49 | if (!dir.exists(fmod.dir)) { 50 | stop(paste(fmod.dir, "does not exist in the working directory, check folder name", 51 | "\nor its existence.")) 52 | } 53 | if (missing(project)) { 54 | stop("Argument project needs to be defined.") 55 | } else { 56 | if (project == TRUE) { 57 | if (missing(ext.type)) { 58 | stop("Argument ext.type needs to be defined.") 59 | } 60 | } 61 | } 62 | 63 | # Folders 64 | if (project == FALSE) { 65 | if (!is.null(model_name)) { 66 | parameters <- list(dir(fmod.dir, pattern = model_name[1], 67 | full.names = TRUE)) 68 | models <- list(dir(fmod.dir, pattern = model_name[1])) 69 | } else { 70 | parameters <- list(list.dirs(fmod.dir, recursive = FALSE)) 71 | models <- list.dirs(fmod.dir, full.names = FALSE, recursive = FALSE) 72 | } 73 | } else { 74 | if (!is.null(model_name)) { 75 | parameters <- lapply(ext.type, function(i) { 76 | dir(fmod.dir, pattern = paste0(model_name, "_", i, "$"), 77 | full.names = TRUE, recursive = FALSE) 78 | }) 79 | models <- lapply(ext.type, function(i) { 80 | dir(fmod.dir, pattern = paste0(model_name, "_", i, "$"), 81 | full.names = FALSE, recursive = FALSE) 82 | }) 83 | } else { 84 | parameters <- lapply(ext.type, function(i) { 85 | dir(fmod.dir, pattern = paste0("_", i, "$"), 86 | full.names = TRUE, recursive = FALSE) 87 | }) 88 | models <- lapply(ext.type, function(i) { 89 | dir(fmod.dir, pattern = paste0("_", i, "$"), 90 | full.names = FALSE, recursive = FALSE) 91 | }) 92 | } 93 | } 94 | 95 | 96 | # preparing results 97 | var_cont_res <- lapply(1:length(parameters), function(x) { 98 | re <- lapply(1:length(parameters[[x]]), function(y) { 99 | allres <- read.csv(paste0(parameters[[x]][[y]], "/maxentResults.csv")) 100 | cols <- colnames(allres) 101 | nro <- nrow(allres) 102 | 103 | ## relevant columns 104 | colcont <- grep("contribution", cols) 105 | colperm <- grep("permutation.importance", cols) 106 | colgain <- grep("Regularized.training.gain", cols) 107 | colwith <- grep("gain.with.only", cols) 108 | colwout <- grep("gain.without", cols) 109 | 110 | ## relevant values 111 | varnamesmx <- gsub(".contribution", "", cols[colcont]) 112 | contrib <- data.frame(Variable = varnamesmx, 113 | Contribution = unlist(allres[nro, colcont])) 114 | rownames(contrib) <- NULL 115 | 116 | permimp <- data.frame(Variable = varnamesmx, 117 | Permutation_importance = unlist(allres[nro, colperm])) 118 | rownames(permimp) <- NULL 119 | 120 | if (length(colwith) > 0) { 121 | jackkni <- data.frame(Variable = varnamesmx, 122 | Training_gain_with = unlist(allres[nro, colwith]), 123 | Training_gain_without = unlist(allres[nro, colwout])) 124 | rownames(jackkni) <- NULL 125 | 126 | rtg <- allres[nro, colgain] 127 | } else { 128 | jackkni <- NULL 129 | rtg <- NULL 130 | } 131 | 132 | list(Contribution = contrib, Permutation_importance = permimp, 133 | Jackknife_results = list(Regularized_training_gain_model = c(rtg), 134 | Training_gain_with_without = jackkni)) 135 | }) 136 | 137 | names(re) <- models[[x]] 138 | re 139 | }) 140 | 141 | if (project == FALSE) { 142 | var_cont_res <- var_cont_res[[1]] 143 | } else { 144 | names(var_cont_res) <- ext.type 145 | } 146 | 147 | # results 148 | return(var_cont_res) 149 | } 150 | -------------------------------------------------------------------------------- /R/data_documentation.R: -------------------------------------------------------------------------------- 1 | #' A set of occurrence records for ecological niche models 2 | #' 3 | #' A data.frame containing occurrence records of a tick (*Amblyomma americanum*) 4 | #' across North America. The data combines records for training and testing. 5 | #' 6 | #' @name sp_joint 7 | #' 8 | #' @format A data frame with 178 rows and 2 columns. 9 | #' \describe{ 10 | #' \item{Longitude}{longitude, in decimal degrees.} 11 | #' \item{Latitude}{latitude, in decimal degrees.} 12 | #' } 13 | #' 14 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 15 | #' 16 | #' @examples 17 | #' data("sp_joint", package = "kuenm") 18 | #' 19 | #' head(sp_joint) 20 | NULL 21 | 22 | 23 | #' A set of occurrence records to test candidate ecological niche models 24 | #' 25 | #' A data.frame containing occurrence records of a tick (*Amblyomma americanum*) 26 | #' in North America, used to test candidate models during calibration. 27 | #' 28 | #' @name sp_test 29 | #' 30 | #' @format A data frame with 89 rows and 2 columns. 31 | #' \describe{ 32 | #' \item{Longitude}{longitude, in decimal degrees.} 33 | #' \item{Latitude}{latitude, in decimal degrees.} 34 | #' } 35 | #' 36 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 37 | #' 38 | #' @examples 39 | #' data("sp_test", package = "kuenm") 40 | #' 41 | #' head(sp_test) 42 | NULL 43 | 44 | 45 | 46 | #' A set of occurrence records for training candidate ecological niche models 47 | #' 48 | #' A data.frame containing occurrence records of a tick (*Amblyomma americanum*) 49 | #' across North America, used to train candidate models during calibration. 50 | #' 51 | #' @name sp_train 52 | #' 53 | #' @format A data frame with 89 rows and 2 columns. 54 | #' \describe{ 55 | #' \item{Longitude}{longitude, in decimal degrees.} 56 | #' \item{Latitude}{latitude, in decimal degrees.} 57 | #' } 58 | #' 59 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 60 | #' 61 | #' @examples 62 | #' data("sp_train", package = "kuenm") 63 | #' 64 | #' head(sp_train) 65 | NULL 66 | 67 | 68 | 69 | 70 | #' A lambdas file resulted from a modeling process in Maxent 71 | #' 72 | #' A lambdas file resulted from a model created in Maxent with raw output for 73 | #' *Amblyomma americanum* in North America. This file is used to calculate number 74 | #' of parameters in the model, which is needed while calculating AICc values. 75 | #' 76 | #' @name sp_lambdas 77 | #' 78 | #' @format A lambdas file. 79 | #' \describe{ 80 | #' \item{parameters}{number of parameters in the Maxent model.} 81 | #' } 82 | #' 83 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 84 | #' 85 | #' @examples 86 | #' lbds <- readLines(system.file("extdata/lambdas_model_joint.lambdas", 87 | #' package = "kuenm")) 88 | #' 89 | #' head(lbds) 90 | NULL 91 | 92 | 93 | 94 | 95 | #' Raster variables masked to the area where a model is calibrated 96 | #' 97 | #' A RasterStack of predictor variables masked to the calibration area where 98 | #' a model is calibrated. Variables represent four current bioclimatic variables 99 | #' downloaded from the WorldClim database (\url{http://www.worldclim.org/}). 100 | #' 101 | #' @name mvars 102 | #' 103 | #' @format A RasterStack with 150 rows, 249 columns, 37350 cells, and 4 layers: 104 | #' \describe{ 105 | #' \item{Temperature}{temperature, in Celsius degrees times 10.} 106 | #' \item{Precipitation}{precipitation, in milimeters.} 107 | #' } 108 | #' 109 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 110 | #' 111 | #' @examples 112 | #' mvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 113 | #' pattern = "Mbio_", full.names = TRUE)) 114 | #' 115 | #' summary(mvars) 116 | NULL 117 | 118 | 119 | 120 | 121 | 122 | #' Variables masked to the area where a model is transferred 123 | #' 124 | #' A RasterStack containing predictor variables masked to the area where a model 125 | #' is projected. Variables represent four future bioclimatic variables (2050) of 126 | #' the NCAR-CCSM4 general circulation model under the RCP 8.5 emission scenario. 127 | #' 128 | #' @name gvars 129 | #' 130 | #' @format A RasterStack with 900 rows, 2160 columns, 1944000 cells, and 4 layers: 131 | #' \describe{ 132 | #' \item{Temperature}{temperature, in Celsius degrees times 10.} 133 | #' \item{Precipitation}{precipitation, in milimeters.} 134 | #' } 135 | #' 136 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 137 | #' 138 | #' @examples 139 | #' gvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 140 | #' pattern = "Gbio_", full.names = TRUE)) 141 | #' 142 | #' summary(gvars) 143 | NULL 144 | 145 | 146 | 147 | 148 | 149 | #' A raster output of an ecological niche model created with Maxent (logistic) 150 | #' 151 | #' A RasterLayer containing an ecological niche model for the tick 152 | #' (*Amblyomma americanum*) that was created as part of the candidate models 153 | #' during a calibration process. 154 | #' 155 | #' @name sp_model 156 | #' 157 | #' @format A RasterLayer with 150 rows, 249 columns, and 37350 cells: 158 | #' \describe{ 159 | #' \item{Suitability}{suitability values.} 160 | #' } 161 | #' 162 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 163 | #' 164 | #' @examples 165 | #' sp_model <- raster::raster(system.file("extdata/sp_model.tif", 166 | #' package = "kuenm")) 167 | #' 168 | #' summary(sp_model) 169 | NULL 170 | 171 | 172 | 173 | 174 | 175 | 176 | #' A raster output of an ecological niche model created with Maxent (raw) 177 | #' 178 | #' A RasterLayer containing an ecological niche model for the a tick 179 | #' (*Amblyomma americanum*) that was created with all occurrences. 180 | #' 181 | #' @name sp_mod_joint 182 | #' 183 | #' @format A RasterLayer with 150 rows, 249 columns, and 37350 cells: 184 | #' \describe{ 185 | #' \item{Suitability}{suitability values.} 186 | #' } 187 | #' 188 | #' @source \url{https://kuscholarworks.ku.edu/handle/1808/26376} 189 | #' 190 | #' @examples 191 | #' sp_model_joint <- raster::raster(system.file("extdata/sp_model_joint.tif", 192 | #' package = "kuenm")) 193 | #' 194 | #' summary(sp_model_joint) 195 | NULL 196 | -------------------------------------------------------------------------------- /R/kuenm_varcomb.R: -------------------------------------------------------------------------------- 1 | #' All potential combinations of a group of variables 2 | #' 3 | #' @description kuenm_varcomb creates multiple sets of variables by grouping them in all their potential combinations. 4 | #' 5 | #' @param var.dir (character) the name of the folder containing variables that will be combined. 6 | #' @param out.dir (character) the name of the folder in which subfolders with distinct combinations of 7 | #' variables will be written. 8 | #' @param min.number (integer) the minimum number of variables per combination. This number must be > 1. 9 | #' Default = 2. 10 | #' @param in.format (character) format of variables in \code{var.dir}. Options are "ascii", "GTiff", and "EHdr" = bil. 11 | #' Default = "ascii". 12 | #' @param out.format (character) format of variables to be written in distinct sets inside \code{out.dir}. 13 | #' Options are "ascii", "GTiff", and "EHdr" = bil. Default = "ascii". 14 | #' 15 | #' @return A list containing vectors of all the potential combinations of variables. In addition, a folder 16 | #' named \code{out.dir} with subfolders in which distinct combinations of variables produced are written. 17 | #' 18 | #' @details 19 | #' Sest of variables are written in the working directory and not retained as RasterStacks to avoid 20 | #' problems related to RAM limitations. 21 | #' 22 | #' Time of processing will be reduced considerably if \code{in.format} and \code{out.format} coincide 23 | #' because files will be copied and not loaded and written. 24 | #' 25 | #' @usage 26 | #' kuenm_varcomb(var.dir, out.dir, min.number = 2, in.format = "ascii", 27 | #' out.format = "ascii") 28 | #' 29 | #' @export 30 | #' 31 | #' @examples 32 | #' # This example depends on data stored in your directory 33 | #' var_dir <- "Variables" # your directory with variables to be combined 34 | #' out_dir <- "M_variables" # output directory to be created 35 | #' min_n <- 2 36 | #' in_format <- "ascii" 37 | #' out_format <- "GTiff" 38 | #' 39 | #' comb <- kuenm_varcomb(var.dir = var_dir, out.dir = out_dir, min.number = min_n, 40 | #' in.format = in_format, out.format = out_format) 41 | 42 | 43 | kuenm_varcomb <- function(var.dir, out.dir, min.number = 2, in.format = "ascii", 44 | out.format = "ascii") { 45 | 46 | # Setting things up 47 | if (min.number < 2) { 48 | stop("min.number must be an integer > 1.") 49 | } 50 | 51 | if (in.format == "ascii") { 52 | patt <- ".asc$" 53 | } 54 | if (in.format == "GTiff") { 55 | patt <- ".tif$" 56 | } 57 | if (in.format == "EHdr") { 58 | patt <- ".bil$" 59 | } 60 | 61 | # List variable names 62 | variables <- list.files(path = var.dir, pattern = patt) 63 | 64 | if (length(variables) == 0) { 65 | stop(paste("No variables with format", in.format, "were found in the directory", var.dir)) 66 | } 67 | 68 | if (min.number > length(variables)) { 69 | stop("min.number must be < the total number of variables.") 70 | } 71 | 72 | # Generating all combinations of variable names 73 | var_combinations <- all_var_comb(variables, min.number) 74 | 75 | # Output directory (General) 76 | dir.create(out.dir) 77 | 78 | # Subdirectories for variable sets 79 | sub_paths <- paste(out.dir, paste("Set", 1:length(var_combinations), sep = "_"), sep = "/") 80 | 81 | # Telling users how many sets they will create 82 | cat("\nA total of", length(sub_paths), "sets of variables resulted from combinations of", 83 | length(variables), "variables will be written.\n") 84 | 85 | # Copying or writing variable sin new sets 86 | if (in.format == out.format) { 87 | # Copying variables 88 | if(.Platform$OS.type == "unix") { 89 | pb <- txtProgressBar(min = 0, max = length(sub_paths), style = 3) #progress bar 90 | } else { 91 | pb <- winProgressBar(title = "Progress bar", min = 0, max = length(sub_paths), width = 300) #progress bar 92 | } 93 | 94 | for (k in 1:length(sub_paths)) { 95 | Sys.sleep(0.1) 96 | if(.Platform$OS.type == "unix") { 97 | setTxtProgressBar(pb, k) 98 | } else { 99 | setWinProgressBar(pb, k, title = paste(round(k / length(sub_paths) * 100, 2), 100 | paste("% of the process has finished"))) 101 | } 102 | 103 | dir.create(sub_paths[k]) 104 | vars_comb <- paste(var.dir, var_combinations[[k]], sep = "/") 105 | 106 | vars_set <- paste(sub_paths[k], var_combinations[[k]], sep = "/") 107 | 108 | file.copy(from = vars_comb, to = vars_set) 109 | } 110 | 111 | if(.Platform$OS.type != "unix") { 112 | suppressMessages(close(pb)) 113 | } 114 | 115 | } else { 116 | # Formats 117 | if (out.format == "ascii") { 118 | patt1 <- ".asc" 119 | } 120 | if (out.format == "GTiff") { 121 | patt1 <- ".tif" 122 | } 123 | if (out.format == "EHdr") { 124 | patt1 <- ".bil" 125 | } 126 | 127 | # change format names 128 | var_combinations <- lapply(var_combinations, function(x) {gsub(patt, patt1, x)}) 129 | 130 | # Preparing folders, variable combinations, and writing results 131 | vars_all <- raster::stack(paste(var.dir, variables, sep = "/")) 132 | 133 | if(.Platform$OS.type == "unix") { 134 | pb <- txtProgressBar(min = 0, max = length(sub_paths), style = 3) #progress bar 135 | } else { 136 | pb <- winProgressBar(title = "Progress bar", min = 0, max = length(sub_paths), width = 300) #progress bar 137 | } 138 | 139 | for (k in 1:length(sub_paths)) { 140 | Sys.sleep(0.1) 141 | if(.Platform$OS.type == "unix") { 142 | setTxtProgressBar(pb, k) 143 | } else { 144 | setWinProgressBar(pb, k, title = paste(round(k / length(sub_paths) * 100, 2), 145 | paste("% of the process has finished"))) 146 | } 147 | 148 | dir.create(sub_paths[k]) 149 | vars_set <- vars_all[[gsub(patt1, "", var_combinations[[k]])]] 150 | 151 | for (l in 1:dim(vars_set)[3]) { 152 | raster::writeRaster(vars_set[[l]], filename = paste(sub_paths[k], var_combinations[[k]][l], sep = "/"), 153 | format = out.format) 154 | } 155 | } 156 | 157 | if(.Platform$OS.type != "unix") { 158 | suppressMessages(close(pb)) 159 | } 160 | } 161 | 162 | return(var_combinations) 163 | } 164 | 165 | 166 | -------------------------------------------------------------------------------- /R/kuenm_mop.R: -------------------------------------------------------------------------------- 1 | #' Extrapolation risk analysis for single comparisons 2 | #' 3 | #' @description kuenm_mop calculates a mobility-oriented parity layer by 4 | #' comparing environmental values between the calibration area and the area or 5 | #' scenario to which an ecological niche model is transferred. 6 | #' 7 | #' @param M.variables a RasterStack of variables or a matrix with variables as columns 8 | #' representing the calibration area. If matrix, columns must contain only 9 | #' information for the variables to be used. 10 | #' @param G.stack a RasterStack of variables representing the full area of interest, and areas 11 | #' or scenarios to which models are transferred. 12 | #' @param percent (numeric) percent of values sampled from te calibration region to calculate the MOP. 13 | #' @param comp.each (numeric) compute distance matrix for a each fixed number of rows (default = 2000). 14 | #' @param parallel (logical) if TRUE, calculations will be performed in parallel using \code{n.cores} 15 | #' of the computer. This will demand more RAM and almost full use of the CPU; hence, its use 16 | #' is more recommended in high-performance computers. Using this option will speed up the analyses. 17 | #' Default = FALSE. 18 | #' @param n.cores (numeric) number of cores to be used in parallel processing. 19 | #' Default = NULL, in which case all CPU cores on current host - 1 will be used. 20 | #' 21 | #' @return A mobility-oriented parity RasterLayer where values of 0 represent strict extrapolation, 22 | #' which means complete dissimilarity of environments between the calibration (M) or the background, 23 | #' and the projection area (G). 24 | #' 25 | #' @details The MOP is calculated following Owens et al. 26 | #' (2013; \url{https://doi.org/10.1016/j.ecolmodel.2013.04.011}). This function is a modification 27 | #' of the \code{\link[ENMGadgets]{MOP}} funcion, available at \url{https://github.com/narayanibarve/ENMGadgets}. 28 | #' 29 | #' @usage 30 | #' kuenm_mop(M.variables, G.stack, percent = 10, comp.each = 2000, 31 | #' parallel = FALSE, n.cores = NULL) 32 | #' 33 | #' @export 34 | #' 35 | #' @examples 36 | #' mvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 37 | #' pattern = "Mbio_", full.names = TRUE)) 38 | #' gvars <- raster::stack(list.files(system.file("extdata", package = "kuenm"), 39 | #' pattern = "Gbio_", full.names = TRUE)) 40 | #' names(mvars) <- gsub("M", "", names(mvars)) 41 | #' names(gvars) <- names(mvars) 42 | #' 43 | #' perc <- 5 44 | #' 45 | #' mop <- kuenm_mop(M.variables = mvars, G.stack = gvars, percent = perc) 46 | 47 | 48 | kuenm_mop <- function(M.variables, G.stack, percent = 10, comp.each = 2000, 49 | parallel = FALSE, n.cores = NULL) { 50 | 51 | suppressPackageStartupMessages(library(doSNOW)) 52 | suppressPackageStartupMessages(library(Kendall)) 53 | suppressPackageStartupMessages(library(foreach)) 54 | 55 | if (class(M.variables)[1] %in% c("RasterStack", "RasterBrick", "matrix", "data.frame")) { 56 | if (class(M.variables)[1] %in% c("RasterStack", "RasterBrick")) { 57 | mValues <- raster::getValues(M.variables) 58 | } 59 | if (class(M.variables)[1] == "data.frame") {mValues <- as.matrix(M.variables)} 60 | if (class(M.variables)[1] == "matrix") {mValues <- M.variables} 61 | } else { 62 | stop("Argument 'M.variables' is not valid.") 63 | } 64 | 65 | mop_raster <- G.stack[[1]] 66 | gValues <- raster::getValues(G.stack) 67 | 68 | mnames <- colnames(mValues) 69 | gnames <- colnames(gValues) 70 | gnames <- gnames[order(match(gnames, mnames))] 71 | gValues <- gValues[, gnames] 72 | 73 | if (!identical(mnames, gnames)) { 74 | stop("Variables in M and G must be the same.") 75 | } 76 | 77 | g_noNA <- stats::na.omit(gValues) 78 | g_naIDs <- attr(g_noNA, "na.action") 79 | m_noNA <- stats::na.omit(mValues) 80 | #m_naIDs <- attr(m_noNA, "na.action") 81 | 82 | ids_raster <- 1:dim(gValues)[1] 83 | if (!is.null(g_naIDs)) { 84 | ids_raster <- ids_raster[-g_naIDs] 85 | } 86 | 87 | out_index <- plot_out(mValues, gValues) 88 | 89 | steps <- seq(1, dim(g_noNA)[1], comp.each) 90 | kkk <- c(steps, dim(g_noNA)[1] + 1) 91 | long_k <- length(kkk) 92 | 93 | if (parallel == FALSE) { 94 | pb <- txtProgressBar(min = 1, max = (length(kkk) - 1), style = 3) 95 | 96 | mop1 <- lapply(1:(length(kkk) - 1), function(x) { 97 | Sys.sleep(0.1) 98 | setTxtProgressBar(pb, x) 99 | 100 | seq_rdist <- kkk[x]:(kkk[x + 1] - 1) 101 | eudist <- fields::rdist(g_noNA[seq_rdist, ], m_noNA) 102 | mean_quantile <- lapply(1:dim(eudist)[1], function(y) { 103 | di <- eudist[y, ] 104 | qdi <- quantile(di, probs = percent / 100, na.rm = TRUE) 105 | ii <- which(di <= qdi) 106 | return(mean(di[ii])) 107 | }) 108 | 109 | return(unlist(mean_quantile)) 110 | }) 111 | 112 | close(pb) 113 | mop_vals <- unlist(mop1) 114 | 115 | }else { 116 | if (is.null(n.cores)) {n.cores <- parallel::detectCores() - 1} 117 | cl <- makeSOCKcluster(n.cores) 118 | registerDoSNOW(cl) 119 | 120 | pb <- txtProgressBar(min = 1, max = (length(kkk) - 1), style = 3) 121 | progress <- function(n) setTxtProgressBar(pb, n) 122 | opts <- list(progress = progress) 123 | 124 | mop_vals <- foreach(i = 1:(length(kkk) - 1), .packages = "Kendall", .inorder = TRUE, 125 | .options.snow = opts, .combine = "c") %dopar% { 126 | seq_rdist <- kkk[i]:(kkk[i + 1] - 1) 127 | eudist <- fields::rdist(g_noNA[seq_rdist, ], m_noNA) 128 | mean_quantile <- lapply(1:dim(eudist)[1], function(y) { 129 | di <- eudist[y, ] 130 | qdi <- quantile(di, probs = percent / 100, na.rm = TRUE) 131 | ii <- which(di <= qdi) 132 | return(mean(di[ii])) 133 | }) 134 | return(unlist(mean_quantile)) 135 | } 136 | 137 | close(pb) 138 | stopCluster(cl) 139 | } 140 | 141 | mop_raster[ids_raster] <- mop_vals 142 | mop_max <- raster::cellStats(mop_raster, "max") * 1.05 143 | mop_raster[out_index] <- mop_max 144 | mop_raster <- 1 - (mop_raster / mop_max) 145 | return(mop_raster) 146 | } 147 | -------------------------------------------------------------------------------- /R/kuenm_mmop.R: -------------------------------------------------------------------------------- 1 | #' Extrapolation risk analysis for multiple comparisons 2 | #' 3 | #' @description kuenm_mmop calculates mobility-oriented parity (MOP) layers by 4 | #' comparing environmental values between the calibration area and multiple areas 5 | #' or scenarios to which ecological niche models are transferred. 6 | #' 7 | #' @param G.var.dir (character) if project is TRUE, name of the directory containing 8 | #' folders names as the sets to be compared (\code{sets.var}), which contain 9 | #' subfolders with variables representing the scenarios of projection. 10 | #' @param M.var.dir (character) name of the folder containing either folders with 11 | #' ascii raster layers or csv files (SWD format) representing representing sets 12 | #' of variables used to calibrate the models. 13 | #' @param is.swd (logical) whether model calibration and final models were produced 14 | #' using SWD format. 15 | #' @param sets.var (character) value or vector with the name(s) of the sets of 16 | #' variables from G.var.dir and M.var.dir that are going to be compared to create 17 | #' the MOP(s). 18 | #' @param out.mop (character) name of the folder to which MOP results will be 19 | #' written. 20 | #' @param percent (numeric) percentage of values sampled from the calibration 21 | #' region to calculate the MOP. 22 | #' @param comp.each (numeric) compute distance matrix for a each fixed number 23 | #' of rows (default 2000). 24 | #' @param parallel (logical) option to be passed to the \code{\link{kuenm_mop}} 25 | #' function (for each independent MOP analyses). If TRUE, calculations will be 26 | #' performed in parallel using \code{n.cores} of the computer. This will demand 27 | #' more RAM and almost full use of the CPU; hence, its use is more recommended 28 | #' in high-performance computers. Using this option will speed up the analyses. 29 | #' Default = FALSE. 30 | #' @param n.cores (numeric) number of cores to be used in parallel processing. 31 | #' Default = NULL, in which case all CPU cores on current host - 1 will be used. 32 | #' 33 | #' @return 34 | #' A folder containing one or multiple mobility-oriented parity raster layers 35 | #' depending on how many projection areas or scenarios are considered. This 36 | #' results will be organized by the different sets of variables chosen for 37 | #' creating final models. Values of 0 in resultant raster layers represent strict 38 | #' extrapolation. 39 | #' 40 | #' @details 41 | #' This function can be used after selection of parameters that produce the best 42 | #' models (when chosen sets of variables are known), or after producing final 43 | #' models with the function \code{\link{kuenm_mod}}. In a MOP layer, areas of 44 | #' strict extrapolation are excluded and other values represent how similar areas 45 | #' or scenarios are to environmental conditions in the calibration area. MOP is 46 | #' calculated following Owens et al. (2013; \url{https://doi.org/10.1016/j.ecolmodel.2013.04.011}). 47 | #' 48 | #' @usage 49 | #' kuenm_mmop(G.var.dir, M.var.dir, is.swd, sets.var, out.mop, percent = 10, 50 | #' comp.each = 2000, parallel = FALSE) 51 | #' 52 | #' @export 53 | 54 | kuenm_mmop <- function(G.var.dir, M.var.dir, is.swd, sets.var, out.mop, percent = 10, 55 | comp.each = 2000, parallel = FALSE, n.cores = NULL) { 56 | if (missing(G.var.dir)) { 57 | stop("Argument G.var.dir is not defined.") 58 | } 59 | if (!dir.exists(G.var.dir)) { 60 | stop(paste(G.var.dir, "does not exist in the working directory, check folder name", 61 | "\nor its existence.")) 62 | } 63 | if (length(list.dirs(G.var.dir, recursive = FALSE)) == 0) { 64 | stop(paste(G.var.dir, "does not contain any subdirectory with sets of projection variables;", 65 | "\neach subdirectory inside", G.var.dir, "must containg at least one subdirectory", 66 | "\nwith the projection variables")) 67 | } 68 | if (missing(M.var.dir)) { 69 | stop("Argument M.var.dir is not defined.") 70 | } 71 | if (!dir.exists(M.var.dir)) { 72 | stop(paste(M.var.dir, "does not exist in the working directory, check folder name", 73 | "\nor its existence.")) 74 | } 75 | if (length(dir(M.var.dir)) == 0) { 76 | stop(paste(M.var.dir, "is empty. Check function's help for details.")) 77 | } 78 | if (missing(is.swd)) { 79 | stop("Argument is.swd is not defined.") 80 | } 81 | 82 | #MOP directory 83 | dir.create(out.mop) 84 | 85 | #Calculating MOP for each comparison set by set 86 | for (h in 1:length(sets.var)) { 87 | 88 | if (is.swd == TRUE) { 89 | dirsm <- dir(M.var.dir, pattern = paste0("^", sets.var[h], ".csv$"), 90 | full.names = TRUE) 91 | m_vars <- read.csv(dirsm)[, -(1:3)] #stacking the variables 92 | } else { 93 | dirsm <- dir(M.var.dir, pattern = paste0("^", sets.var[h], "$"), 94 | full.names = TRUE) 95 | m_var <- list.files(dirsm, pattern = "asc", full.names = TRUE) #listing vars in M 96 | m_vars <- raster::stack(m_var) #stacking the variables 97 | } 98 | 99 | dirsg <- dir(G.var.dir, pattern = paste0("^", sets.var[h], "$"), 100 | full.names = TRUE) 101 | dirsg_in <- dir(dirsg, full.names = TRUE) 102 | namesg <- dir(dirsg) 103 | 104 | dir.create(paste(out.mop, sets.var[h], sep = "/")) 105 | 106 | dirs_mop <- paste(paste(out.mop, sets.var[h], "MOP", sep = "/"), 107 | paste(percent, "%", sep = ""), namesg, sep = "_") 108 | 109 | if(.Platform$OS.type != "unix") { 110 | pb <- winProgressBar(title = "Progress bar", min = 0, max = length(dirsg_in), 111 | width = 300) #progress bar 112 | } 113 | 114 | for(i in 1:length(dirsg_in)) { 115 | Sys.sleep(0.1) 116 | if(.Platform$OS.type != "unix") { 117 | setWinProgressBar(pb, i, title = paste(round(i / length(dirsg_in) * 100, 2), 118 | paste("% of the process for", sets.var[h], "has finished"))) 119 | } 120 | 121 | g_var <- list.files(dirsg_in[i], pattern = "asc", 122 | full.names = TRUE) #listing var of different Gs 123 | g_vars <- raster::stack(g_var) 124 | 125 | #MOP calculation 126 | mop_res <- kuenm_mop(M.variables = m_vars, G.stack = g_vars, percent = percent, 127 | comp.each = comp.each, parallel = parallel, n.cores = n.cores) 128 | 129 | #Writing results 130 | raster::writeRaster(mop_res, filename = paste(dirs_mop[i],".tif", sep = ""), 131 | format = "GTiff") 132 | 133 | if(.Platform$OS.type == "unix") { 134 | cat("\n\t", paste(i, "of", length(dirsg_in), "MOPs", sep = " "), "\n") 135 | } 136 | } 137 | 138 | if(.Platform$OS.type != "unix") { 139 | suppressMessages(close(pb)) 140 | } 141 | cat("\n", paste(h, "of", length(sets.var), "processes", sep = " "), "\n") 142 | } 143 | 144 | # writting desciption 145 | result_description(process = "kuenm_mmop", out.dir = out.mop) 146 | } 147 | -------------------------------------------------------------------------------- /replicate_examples/pemp_complete_process.md: -------------------------------------------------------------------------------- 1 | *P. empusa*: modeling process 2 | ================ 3 | 4 | - [Getting started](#getting-started) 5 | - [Candidate models](#candidate-models) 6 | - [Evaluation and selection of best models](#evaluation-and-selection-of-best-models) 7 | - [Final model creation](#final-model-creation) 8 | - [Final model evaluation](#final-model-evaluation) 9 | - [MOP analysis](#mop-analysis) 10 | 11 | A brief tutorial for using functions of the kuenm R package can be found in the package vignette. Additionally, function help can be checked to change arguments according to specific needs. 12 | 13 |
14 | 15 | ### Getting started 16 | 17 | Package loading and working directory setting. 18 | 19 | ``` r 20 | if(!require(devtools)){ 21 | install.packages("devtools") 22 | } 23 | 24 | if(!require(kuenm)){ 25 | devtools::install_github("marlonecobos/kuenm") 26 | } 27 | 28 | library(kuenm) 29 | 30 | # working directory 31 | setwd("YOUR/DIRECTORY/P_empusa") 32 | ``` 33 | 34 |
35 | 36 | ### Candidate models 37 | 38 | Candidate models are a large set of candidate models created to respond to the need to test broad suites of parameter combinations, such as, distinct regularization multiplier values, various feature classes, and different sets of environmental variables. The following code calls the help page of the function kuenm\_cal. 39 | 40 | ``` r 41 | help(kuenm_cal) 42 | ``` 43 | 44 |
45 | 46 | The next chunk of code is for preparing the arguments for using the function following the modularity principle. These variables can be changed according to each case. 47 | 48 | ``` r 49 | # Variables with information to be used as arguments. Change "YOUR/DIRECTORY" by your actual directory. 50 | occ_joint <- "pemp_joint.csv" 51 | occ_tra <- "pemp_train.csv" 52 | M_var_dir <- "M_variables" 53 | batch_cal <- "Candidate_models" 54 | out_dir <- "Candidate_Models" 55 | reg_mult <- c(seq(0.1, 1, 0.1), seq(2, 6, 1), 8, 10) 56 | f_clas <- "all" 57 | background <- 10000 58 | maxent_path <- getwd() 59 | wait <- FALSE 60 | run <- TRUE 61 | ``` 62 | 63 |
64 | 65 | The following is the code for using the function. 66 | 67 | ``` r 68 | kuenm_cal(occ.joint = occ_joint, occ.tra = occ_tra, M.var.dir = M_var_dir, batch = batch_cal, 69 | out.dir = out_dir, reg.mult = reg_mult, f.clas = f_clas, background = background, 70 | maxent.path = maxent_path, wait = wait, run = run) 71 | ``` 72 | 73 |
74 | 75 | ### Evaluation and selection of best models 76 | 77 | Evaluation is a crucial step in model calibration. This step centers on selecting candidate models and their associated parameters to identify the best models for the purposes of the study. The kuenm\_eval function evaluates candidate models based on three distinct criteria: statistical significance (based on partial ROC analyses), prediction ability (we use omission rates, but other metrics, such as overall correct classification rate, can also be used), and model complexity (here evaluated using AICc). The following code chunk calls the function help window. 78 | 79 | ``` r 80 | help(kuenm_ceval) 81 | ``` 82 | 83 |
84 | 85 | Below, arguments for this functions will be defined. 86 | 87 | ``` r 88 | occ_test <- "pemp_test.csv" 89 | out_eval <- "Calibration_results" 90 | threshold <- 5 91 | rand_percent <- 50 92 | iterations <- 500 93 | kept <- TRUE 94 | selection <- "OR_AICc" 95 | paral_proc <- FALSE 96 | # Notice, some of the variables used here as arguments were already created for the previous function 97 | ``` 98 | 99 |
100 | 101 | This code also allows evaluating candidate models that were created previously, selecting those with best performance based on the three criteria. 102 | 103 | ``` r 104 | cal_eval <- kuenm_ceval(path = out_dir, occ.joint = occ_joint, occ.tra = occ_tra, occ.test = occ_test, batch = batch_cal, 105 | out.eval = out_eval, threshold = threshold, rand.percent = rand_percent, iterations = iterations, 106 | kept = kept, selection = selection, parallel.proc = paral_proc) 107 | ``` 108 | 109 |
110 | 111 | ### Final model creation 112 | 113 | After selecting parametrizations that produce best models, the next step is to create the final models, and if needed transfer them to other environmental data sets (e.g., to other time periods or other geographic regions). The function help is called via this code: 114 | 115 | ``` r 116 | help(kuenm_mod) 117 | ``` 118 | 119 |
120 | 121 | For preparing the arguments for this function use the following chunk of code. 122 | 123 | ``` r 124 | batch_fin <- "Final_models" 125 | mod_dir <- "Final_Models" 126 | rep_n <- 10 127 | rep_type <- "Bootstrap" 128 | jackknife <- FALSE 129 | G_var_dir <- "G_variables" 130 | out_format <- "logistic" 131 | project <- TRUE 132 | ext_type <- "all" 133 | write_mess <- FALSE 134 | write_clamp <- FALSE 135 | wait1 <- FALSE 136 | run1 <- TRUE 137 | args <- "outputgrids=false" 138 | # Again, some of the variables used here as arguments were already created for the previous functions 139 | ``` 140 | 141 |
142 | 143 | The kuenm\_mod function has the following syntax: 144 | 145 | ``` r 146 | kuenm_mod(occ.joint = occ_joint, M.var.dir = M_var_dir, out.eval = out_eval, batch = batch_fin, rep.n = rep_n, 147 | rep.type = rep_type, jackknife = jackknife, out.dir = mod_dir, out.format = out_format, project = project, 148 | G.var.dir = G_var_dir, ext.type = ext_type, write.mess = write_mess, write.clamp = write_clamp, 149 | maxent.path = maxent_path, args = args, wait = wait1, run = run1) 150 | ``` 151 | 152 |
153 | 154 | ### Final model evaluation 155 | 156 | Final model can be evaluated after being created; for this step, independent data are needed (data not used in the calibration process, ideally coming from different sources). The function help is called via this code: 157 | 158 | ``` r 159 | help(kuenm_feval) 160 | ``` 161 | 162 |
163 | 164 | For preparing the arguments for this function, use the following chunk code. 165 | 166 | ``` r 167 | occ_ind <- "pemp_ind.csv" 168 | replicates <- TRUE 169 | out_feval <- "Final_Models_evaluation" 170 | # Again, some of the variables used here as arguments were already created for the previous functions 171 | ``` 172 | 173 |
174 | 175 | The following is the code for using the function. 176 | 177 | ``` r 178 | fin_eval <- kuenm_feval(path = mod_dir, occ.joint = occ_joint, occ.ind = occ_ind, replicates = replicates, 179 | out.eval = out_feval, threshold = threshold, rand.percent = rand_percent, 180 | iterations = iterations, parallel.proc = paral_proc) 181 | ``` 182 | 183 |
184 | 185 | ### MOP analysis 186 | 187 | If transfers were performed when creating final models, the MOP analysis will help to identify areas of strict extrapolation and levels of similarity between the calibration area and the region or scenario of projection. The code below will help to see the function's documentation: 188 | 189 | ``` r 190 | help(kuenm_mmop) 191 | ``` 192 | 193 |
194 | 195 | Below, arguments for this functions will be defined. 196 | 197 | ``` r 198 | sets_var <- "Set3" 199 | out_mop <- "MOP_results" 200 | percent <- 10 201 | paral <- TRUE 202 | # Again, some of the variables used here as arguments were already created for the previous functions 203 | ``` 204 | 205 |
206 | 207 | The kuenm\_mmop function has the following syntax: 208 | 209 | ``` r 210 | kuenm_mmop(G.var.dir = G_var_dir, M.var.dir = M_var_dir, sets.var = sets_var, out.mop = out_mop, 211 | percent = percent, parallel = paral) 212 | ``` 213 | -------------------------------------------------------------------------------- /replicate_examples/aame_complete_process.md: -------------------------------------------------------------------------------- 1 | *A. americanum*: modeling process 2 | ================ 3 | 4 | - [Getting started](#getting-started) 5 | - [Candidate models](#candidate-models) 6 | - [Evaluation and selection of best models](#evaluation-and-selection-of-best-models) 7 | - [Final model creation](#final-model-creation) 8 | - [Final model evaluation](#final-model-evaluation) 9 | - [MOP analysis](#mop-analysis) 10 | 11 | A brief tutorial for using functions of the kuenm R package can be found in the package vignette. Additionally, function help can be checked to change arguments according to specific needs. 12 | 13 |
14 | 15 | ### Getting started 16 | 17 | Package loading and working directory setting. 18 | 19 | ``` r 20 | if(!require(devtools)){ 21 | install.packages("devtools") 22 | } 23 | 24 | if(!require(kuenm)){ 25 | devtools::install_github("marlonecobos/kuenm") 26 | } 27 | 28 | library(kuenm) 29 | 30 | # working directory 31 | setwd("YOUR/DIRECTORY/A_americanum") 32 | ``` 33 | 34 |
35 | 36 | ### Candidate models 37 | 38 | Candidate models are a large set of candidate models created to respond to the need to test broad suites of parameter combinations, such as, distinct regularization multiplier values, various feature classes, and different sets of environmental variables. The following code calls the help page of the function kuenm\_cal. 39 | 40 | ``` r 41 | help(kuenm_cal) 42 | ``` 43 | 44 |
45 | 46 | The next chunk of code is for preparing the arguments for using the function following the modularity principle. These variables can be changed according to each case. 47 | 48 | ``` r 49 | # Variables with information to be used as arguments. Change "YOUR/DIRECTORY" by your actual directory. 50 | occ_joint <- "aame_joint.csv" 51 | occ_tra <- "aame_train.csv" 52 | M_var_dir <- "M_variables" 53 | batch_cal <- "Candidate_models" 54 | out_dir <- "Candidate_Models" 55 | reg_mult <- c(seq(0.1, 1, 0.1), seq(2, 6, 1), 8, 10) 56 | f_clas <- "all" 57 | background <- 10000 58 | maxent_path <- getwd() 59 | wait <- FALSE 60 | run <- TRUE 61 | ``` 62 | 63 |
64 | 65 | The following is the code for using the function. 66 | 67 | ``` r 68 | kuenm_cal(occ.joint = occ_joint, occ.tra = occ_tra, M.var.dir = M_var_dir, batch = batch_cal, 69 | out.dir = out_dir, reg.mult = reg_mult, f.clas = f_clas, background = background, 70 | maxent.path = maxent_path, wait = wait, run = run) 71 | ``` 72 | 73 |
74 | 75 | ### Evaluation and selection of best models 76 | 77 | Evaluation is a crucial step in model calibration. This step centers on selecting candidate models and their associated parameters to identify the best models for the purposes of the study. The kuenm\_eval function evaluates candidate models based on three distinct criteria: statistical significance (based on partial ROC analyses), prediction ability (we use omission rates, but other metrics, such as overall correct classification rate, can also be used), and model complexity (here evaluated using AICc). The following code chunk calls the function help window. 78 | 79 | ``` r 80 | help(kuenm_ceval) 81 | ``` 82 | 83 |
84 | 85 | Below, arguments for this functions will be defined. 86 | 87 | ``` r 88 | occ_test <- "aame_test.csv" 89 | out_eval <- "Calibration_results" 90 | threshold <- 5 91 | rand_percent <- 50 92 | iterations <- 500 93 | kept <- TRUE 94 | selection <- "OR_AICc" 95 | paral_proc <- FALSE 96 | # Notice, some of the variables used here as arguments were already created for the previous function 97 | ``` 98 | 99 |
100 | 101 | This code also allows evaluating candidate models that were created previously, selecting those with best performance based on the three criteria. 102 | 103 | ``` r 104 | cal_eval <- kuenm_ceval(path = out_dir, occ.joint = occ_joint, occ.tra = occ_tra, occ.test = occ_test, batch = batch_cal, 105 | out.eval = out_eval, threshold = threshold, rand.percent = rand_percent, iterations = iterations, 106 | kept = kept, selection = selection, parallel.proc = paral_proc) 107 | ``` 108 | 109 |
110 | 111 | ### Final model creation 112 | 113 | After selecting parametrizations that produce best models, the next step is to create the final models, and if needed transfer them to other environmental data sets (e.g., to other time periods or other geographic regions). The function help is called via this code: 114 | 115 | ``` r 116 | help(kuenm_mod) 117 | ``` 118 | 119 |
120 | 121 | For preparing the arguments for this function use the following chunk of code. 122 | 123 | ``` r 124 | batch_fin <- "Final_models" 125 | mod_dir <- "Final_Models" 126 | rep_n <- 10 127 | rep_type <- "Bootstrap" 128 | jackknife <- FALSE 129 | G_var_dir <- "G_variables" 130 | out_format <- "logistic" 131 | project <- TRUE 132 | ext_type <- "all" 133 | write_mess <- FALSE 134 | write_clamp <- FALSE 135 | wait1 <- FALSE 136 | run1 <- TRUE 137 | args <- "outputgrids=false" 138 | # Again, some of the variables used here as arguments were already created for the previous functions 139 | ``` 140 | 141 |
142 | 143 | The kuenm\_mod function has the following syntax: 144 | 145 | ``` r 146 | kuenm_mod(occ.joint = occ_joint, M.var.dir = M_var_dir, out.eval = out_eval, batch = batch_fin, rep.n = rep_n, 147 | rep.type = rep_type, jackknife = jackknife, out.dir = mod_dir, out.format = out_format, project = project, 148 | G.var.dir = G_var_dir, ext.type = ext_type, write.mess = write_mess, write.clamp = write_clamp, 149 | maxent.path = maxent_path, args = args, wait = wait1, run = run1) 150 | ``` 151 | 152 |
153 | 154 | ### Final model evaluation 155 | 156 | Final model can be evaluated after being created; for this step, independent data are needed (data not used in the calibration process, ideally coming from different sources). The function help is called via this code: 157 | 158 | ``` r 159 | help(kuenm_feval) 160 | ``` 161 | 162 |
163 | 164 | For preparing the arguments for this function, use the following chunk code. 165 | 166 | ``` r 167 | occ_ind <- "aame_ind.csv" 168 | replicates <- TRUE 169 | out_feval <- "Final_Models_evaluation" 170 | # Again, some of the variables used here as arguments were already created for the previous functions 171 | ``` 172 | 173 |
174 | 175 | The following is the code for using the function. 176 | 177 | ``` r 178 | fin_eval <- kuenm_feval(path = mod_dir, occ.joint = occ_joint, occ.ind = occ_ind, replicates = replicates, 179 | out.eval = out_feval, threshold = threshold, rand.percent = rand_percent, 180 | iterations = iterations, parallel.proc = paral_proc) 181 | ``` 182 | 183 |
184 | 185 | ### MOP analysis 186 | 187 | If transfers were performed when creating final models, the MOP analysis will help to identify areas of strict extrapolation and levels of similarity between the calibration area and the region or scenario of projection. The code below will help to see the function's documentation: 188 | 189 | ``` r 190 | help(kuenm_mmop) 191 | ``` 192 | 193 |
194 | 195 | Below, arguments for this functions will be defined. 196 | 197 | ``` r 198 | sets_var <- "Set3" 199 | out_mop <- "MOP_results" 200 | percent <- 10 201 | paral <- TRUE 202 | # Again, some of the variables used here as arguments were already created for the previous functions 203 | ``` 204 | 205 |
206 | 207 | The kuenm\_mmop function has the following syntax: 208 | 209 | ``` r 210 | kuenm_mmop(G.var.dir = G_var_dir, M.var.dir = M_var_dir, sets.var = sets_var, out.mop = out_mop, 211 | percent = percent, parallel = paral) 212 | ``` 213 | -------------------------------------------------------------------------------- /extra_vignettes/post-modeling.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Post-modeling analyses with kuenm" 3 | author: "Marlon E. Cobos, Luis Osorio-Olvera, A. Townsend Peterson" 4 | output: 5 | github_document: 6 | toc: yes 7 | toc_depth: 4 8 | --- 9 | 10 | 11 | ```{r setup, include = FALSE} 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>" 15 | ) 16 | ``` 17 | 18 |
19 | 20 | ## Description 21 | 22 | The set of tools presented here were created to help users in creating consensus and assessing levels of agreement among ecological niche models, as well as in evaluating variation in their outputs. Four main analyses can be performed: (1) calculation of model statistics across models (results from distinct parameter settings are allowed); (2) identification of changes of suitable areas and suitability in projections (considering multiple climate models, emission scenarios, and times); (3) creation of maps showing the variance contributed per each source of variation; (4) and hierarchical partitioning of the variance in models that comes from distinct sources of variation. The four potential sources of variation considered are: model replicates, parameter settings, GCMs, and RCPs. 23 | 24 |
25 | 26 | ## Data organization needed 27 | 28 | All the analyses performed by the following functions use several raster outputs of ecological niche modeling analyses. Loading all those layers to the R environment won't be possible in most computers because of RAM limitations. That is why a specific organization of the data is needed. The structure of the directory containing the models is produced automatically when models are created with the kuenm R package. See the figure below for details on the organization required. 29 | 30 | ```{r Fig.1, echo=FALSE, message=FALSE, warning=FALSE, fig.width=4, fig.cap="Figure 1. Directory structure and data for starting using the R functions for the assessment of variation in ecological niche model outputs. This figure represents models that were created using two distinct parameters settings and were projected to a bigger area in the current time and to future scenarios. Future conditions are represented by two climate models (GCM) in two emission scenarios (RCP). E, EC, and NE, represent three distinct options of extrapolation, free extrapolation, extrapolation and clamping, and no extrapolation, respectively."} 31 | 32 | knitr::include_graphics("Structure_variation.png") 33 | ``` 34 | 35 | It is important to highlight that the functions in this repository can handle distinct situations of final models. This is, models could be created with no projections, however, if projected, models could be created with distinct options of extrapolation (Figure 1), or with only one of them. In addition, models can be created with only one set of parameters, but, if more parameter settings exist, functions will recognize them. To handle all of these situations the user will be asked for details about: species name, exitence of replicates, extrapolation types, general scenarios, climate models (GCM), emission scenarios (RCPs), and times of projection, the last five, only if model projections were performed. 36 | 37 |
38 | 39 | ## How to use these functions 40 | 41 | The following lines will help users to load, prepare arguments, and use the functions to perform the analyses. After using each function check the working directory to see the outputs. 42 | 43 | ### Model statistics across multiple parameter settings 44 | 45 | Function's help: 46 | 47 | ```{r, eval=FALSE, include=TRUE} 48 | help(kuenm_modstats) 49 | ``` 50 | 51 | Preparing arguments: 52 | 53 | ```{r, eval=FALSE, include=TRUE} 54 | sp_name <- "sp1" 55 | fmod_dir <- "Final_Models" 56 | format <- "asc" 57 | project <- TRUE 58 | stats <- c("med", "range") 59 | rep <- TRUE 60 | scenarios <- c("current", "GCM1_RCP4.5", "GCM1_RCP8.5", "GCM2_RCP4.5", "GCM2_RCP8.5") 61 | ext_type <- c("E", "EC", "NE") # the type of extrapolation can be selected according to user requirements 62 | out_dir <- "Final_Model_Stats" 63 | 64 | # argument "time.periods" is not included in the example but it can be used when models 65 | # are projected to more than one time period, other than current. 66 | ``` 67 | 68 | The following lines will execute the function: 69 | 70 | ```{r, eval=FALSE, include=TRUE} 71 | kuenm_modstats(sp.name = sp_name, fmod.dir = fmod_dir, format = format, project = project, 72 | statistics = stats, replicated = rep, proj.scenarios = scenarios, 73 | ext.type = ext_type, out.dir = out_dir) 74 | ``` 75 | 76 | 77 | ### Changes of suitability and suitable areas between times 78 | 79 | Note: This function is useful only when models were projected in time. 80 | 81 | Function's help: 82 | 83 | ```{r, eval=FALSE, include=TRUE} 84 | help(kuenm_projchanges) 85 | ``` 86 | 87 | Preparing arguments: 88 | 89 | ```{r, eval=FALSE, include=TRUE} 90 | # other arguments were defined before 91 | occ <- "Sp_occ.csv" 92 | fmod_stats <- "Final_Model_Stats" 93 | thres <- 5 94 | curr <- "current" 95 | emi_scenarios <- c("RCP4.5", "RCP8.5") 96 | c_mods <- c("GCM1", "GCM2") 97 | ext_type <- c("E", "EC", "NE") 98 | out_dir1 <- "Projection_Changes" 99 | ``` 100 | 101 | The following lines will execute the function: 102 | 103 | ```{r, eval=FALSE, include=TRUE} 104 | kuenm_projchanges(occ = occ, fmod.stats = fmod_stats, threshold = thres, current = curr, 105 | emi.scenarios = emi_scenarios, clim.models = c_mods, ext.type = ext_type, 106 | out.dir = out_dir1) 107 | ``` 108 | 109 | 110 | ### Raster layers of variance coming from distinct sources 111 | 112 | Function's help: 113 | 114 | ```{r, eval=FALSE, include=TRUE} 115 | help(kuenm_modvar) 116 | ``` 117 | 118 | Preparing arguments: 119 | 120 | ```{r, eval=FALSE, include=TRUE} 121 | sp_name <- "sp1" 122 | fmod_dir <- "Final_Models" 123 | rep <- TRUE 124 | format <- "asc" 125 | project <- TRUE 126 | curr <- "current" 127 | emi_scenarios <- c("RCP4.5", "RCP8.5") 128 | c_mods <- c("GCM1", "GCM2") 129 | ext_type <- c("E", "EC", "NE") 130 | split <- 100 131 | out_dir2 <- "Variation_from_sources" 132 | ``` 133 | 134 | The following lines will execute the function: 135 | 136 | ```{r, eval=FALSE, include=TRUE} 137 | kuenm_modvar(sp.name = sp_name, fmod.dir = fmod_dir, replicated = rep, format = format, 138 | project = project, current = curr, emi.scenarios = emi_scenarios, 139 | clim.models = c_mods, ext.type = ext_type, split.length = split, out.dir = out_dir2) 140 | ``` 141 | 142 | 143 | ### Hierarchical partitioning of the variance coming from distinct sources 144 | 145 | Function's help: 146 | 147 | ```{r, eval=FALSE, include=TRUE} 148 | help(kuenm_hierpart) 149 | ``` 150 | 151 | Preparing arguments: 152 | 153 | ```{r, eval=FALSE, include=TRUE} 154 | sp_name <- "sp1" 155 | fmod_dir <- "Final_Models" 156 | rep <- TRUE 157 | format <- "asc" 158 | project <- TRUE 159 | curr <- "current" 160 | emi_scenarios <- c("RCP4.5", "RCP8.5") 161 | c_mods <- c("GCM1", "GCM2") 162 | ext_type <- c("E", "EC", "NE") 163 | iter <- 100 164 | s_size <- 1000 165 | out_dir3 <- "Hierarchical_partitioning" 166 | # argument "factors_col" is not defined here, but if default colors (grey scale) need to be changed, 167 | # you can use this argument. 168 | ``` 169 | 170 | The following lines will execute the function: 171 | 172 | ```{r, eval=FALSE, include=TRUE} 173 | kuenm_hierpart(sp.name = sp_name, fmod.dir = fmod_dir, replicated = rep, format = format, 174 | project = project, current = curr, emi.scenarios = emi_scenarios, 175 | clim.models = c_mods, ext.type = ext_type, iterations = iter, 176 | sample.size = s_size, out.dir = out_dir3) 177 | ``` 178 | -------------------------------------------------------------------------------- /R/result_description.R: -------------------------------------------------------------------------------- 1 | #' Helper function to describe results from complex functions 2 | #' 3 | #' @param process (character) name of the function which results derive from. 4 | #' Options include: "kuenm_projchanges", "kuenm_modvar", "kuenm_hierpart", 5 | #' "kuenm_mmop", and "kuenm_mopagree". 6 | #' @param result.table (data.frame) data.frame with the description of results. 7 | #' Only used when \code{process} = "kuenm_projchanges" or "kuenm_mopagree". 8 | #' Default = NULL. 9 | #' @param out.dir (character) name of the output directory where description 10 | #' file will be written. 11 | #' 12 | #' @export 13 | #' 14 | #' @usage 15 | #' result_description(process, result.table = NULL, out.dir) 16 | 17 | result_description <- function(process, result.table = NULL, out.dir) { 18 | 19 | if (missing(process)) { 20 | stop("Argument 'process' must be defined") 21 | } 22 | if (missing(out.dir)) { 23 | stop("Argument 'out.dir' must be defined") 24 | } 25 | if (!dir.exists(out.dir)) { 26 | stop("'out.dir' does not exist") 27 | } 28 | 29 | filenam <- paste0(out.dir, "/Result_description (", process,").txt") 30 | 31 | if (file.exists(filenam)) { 32 | unlink(filenam) 33 | } 34 | 35 | # Description of results of detecting changes in projections 36 | if (process == "kuenm_projchanges"){ 37 | if (is.null(result.table)) { 38 | stop("Argument 'result.table' must be defined") 39 | } 40 | 41 | cat("Description of results generated by the function kuenm_projchanges\n\n 42 | A folder or folders named starting with 'Changes' contain all results per extrapolation type. 43 | Each folder contains subfolders per each period of projection. Inside period-folders, 44 | subfolders per each scenario of projection can be found.\n\n 45 | Inside each scenario-folder you will find:\n 46 | - A folder (Binary) containing all binarized (thresholded) models.\n 47 | - A raster file with the comparison of continuous median models of current and projection 48 | periods. Name = continuous_comparison.\n 49 | - A raster file with the comparison of binary models considering agreement among distinct GCMs if 50 | more than one of them where used. Name: binary_comparison.\n\n 51 | Description of values in rasters resulted from comparisons:\n 52 | - Continuous comparison generally ranges from -1 to 1. Although the range may vary, negative 53 | values represent decrease in suitability and positive values represent increase; values 54 | close to zero represent more stability.\n 55 | - The table below indicates what each value in binary comparison represents.\n\n", file = filenam) 56 | 57 | suppressWarnings( 58 | write.table(result.table, filenam, sep = "\t", append = TRUE, 59 | quote = FALSE, row.names = FALSE) 60 | ) 61 | } 62 | 63 | # Description of results of creating maps of variance 64 | if (process == "kuenm_modvar"){ 65 | cat("Description of results generated by the function kuenm_modvar\n\n 66 | A folder or folders named starting with 'Variation' contain all results per extrapolation 67 | type. Each folder contains subfolders per each area of projection. Inside these subfolders, 68 | a raster file for each source of variation will be found.\n 69 | Values in these raster files represent the variance coming from each source of 70 | variation. These values start from zero and are comparable among distinct sources 71 | of variation.\n\n 72 | Using a continuous color scale to plot maps is recommended.", file = filenam) 73 | } 74 | 75 | # Description of results of hierarchical partitioning analyses 76 | if (process == "kuenm_hierpart"){ 77 | cat("Description of results generated by the function kuenm_hierpart\n\n 78 | A folder or folders named starting with 'HP_results' contain all results per extrapolation type. 79 | Each folder contains subfolders per each area/time of projection in which more than 80 | one source of variation is considered. Per each area of projection, a folder contains 81 | csv files with the numerical results of hierarchical partitioning analyses. If the 82 | argument keep.tables was defined as TRUE, an additional folder containing csv files 83 | with all sampled data per each iteration can be found. A bar-plot summarizing the 84 | total effects of each source of variation on the overall variance can also be found 85 | in each HP_results folder.\n\n 86 | Description of csv files resulted from hierarchical partitioning analyses:\n 87 | - hierpart_Goodness_fit.csv contains values that represent how well the satistical 88 | model fitted the data.\n 89 | - hierpart_Raw_effects.csv contains values of Independent, Joint, and Total effects 90 | for all iterations of the hierarchical partitioning analysis.\n 91 | - hierpart_Mean_effects.csv contains mean values of Independent, Joint, and Total 92 | effects from all iterations.\n 93 | - hierpart_Total_effects_percent.csv contains values of total effects for all 94 | iterations, expressed as percentages. These values are used to create the bar-plot.\n", file = filenam) 95 | } 96 | 97 | # Description of results of MOP analyses for multiple layers 98 | if (process == "kuenm_mmop"){ 99 | cat("Description of results generated by the function kuenm_mmop\n\n 100 | A folder or folders named as the sets of variables analyzed contain all results of 101 | extrapolation risk analyses using the MOP metric.\n\n 102 | Inside each set-folder, raster files resulted from MOP analyses can be found. The names 103 | of the raster layers will help to identify the specific time period, emission scenario, 104 | and/or general circulation model accordingly.\n\n 105 | Values in raster files range from zero to one, where zero represents strict extrapolative 106 | areas, and other values represent levels of similarity between the calibration area and 107 | the specific scenario of projection. Suitability in areas of strict extrapolation 108 | (values of zero), should be interpreted carefully.\n\n 109 | Representing strict extrapolative areas separate from areas with distinct levels of 110 | similarity is recommended. However, if multiple GCMs where used, users may find convenient to 111 | use the kuenm_mopagree function, which creates raster layers of strict extrapolative area 112 | agreement among distinct GCMs.", file = filenam) 113 | } 114 | 115 | # Description of results of agreement of extrapolative areas derived from MOP analyses 116 | if (process == "kuenm_mopagree"){ 117 | if (is.null(result.table)) { 118 | stop("Argument 'result.table' must be defined") 119 | } 120 | 121 | cat("Description of results generated by the function kuenm_mopagree\n\n 122 | A folder or folders named as the sets of variables analyzed contain all results of agreement 123 | of strict extrapolative areas among distinct GCMs, per each emission scenario.\n\n 124 | Inside each set-folder, raster files resulted from exploring the agreement of strict 125 | extrapolative areas among GCMs can be found. The names of the raster layers will help 126 | to identify the specific time period and emission scenario accordingly. 127 | A raster layer of MOP results for the current period can also be found here.\n\n 128 | Values in raster files range from zero to a number equal to the number of GCMs 129 | used to perform model projection. The table below indicates what each value in raster 130 | layers of agreement represents. Values in raster layer for the current period should be 131 | interpreted as in layers obtained with MOP analysis.\n\n", file = filenam) 132 | 133 | suppressWarnings( 134 | write.table(result.table, filenam, sep = "\t", append = TRUE, 135 | quote = FALSE, row.names = FALSE) 136 | ) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /R/kuenm_proc.R: -------------------------------------------------------------------------------- 1 | #' Partial ROC calculation for ecological niche models 2 | #' 3 | #' @description kuenm_proc applies partial ROC tests to model predictions. 4 | #' 5 | #' @param model RasterLayer or numeric vector of ecological niche model 6 | #' predictions to be evaluated. If RasterLayer, layer of predicted suitability. 7 | #' If numeric vector, predicted suitability values. 8 | #' @param occ.test matrix, data.frame, or numeric vector containing coordinates 9 | #' of occurrences to test model predictions to be evaluated. If matrix or 10 | #' data.frame, columns must include longitude and latitude in that order. 11 | #' If numeric, values of suitability in such occurrences. If a matrix or a 12 | #' data.frame is provided, \code{model} must be a RasterLayer. 13 | #' @param threshold (numeric) value from 0 to 100 to represent the percentage of 14 | #' potential error (E) that the data could have due to any source of uncertainty. 15 | #' Default = 5. 16 | #' @param iterations (numeric) number of bootstrap iterations to be performed; 17 | #' default = 500. 18 | #' @param rand.percent (numeric) percentage of testing data to be used in each 19 | #' bootstrapped process for calculating the partial ROC. Default = 50. 20 | #' @param parallel (logical) argument deprecated. Default = NULL. 21 | #' 22 | #' @return A list with the summary of the results and a data.frame containing 23 | #' the AUC values and AUC ratios calculated for all iterations. 24 | #' 25 | #' @usage 26 | #' kuenm_proc(occ.test, model, threshold = 5, rand.percent = 50, 27 | #' iterations = 500, parallel = FALSE) 28 | #' 29 | #' @details Partial ROC is calculated following Peterson et al. (2008; 30 | #' \url{http://dx.doi.org/10.1016/j.ecolmodel.2007.11.008}). 31 | #' 32 | #' @importFrom purrr map_df 33 | #' @useDynLib kuenm 34 | #' @export 35 | #' 36 | #' @examples 37 | #' data("sp_test", package = "kuenm") 38 | #' model <- raster::raster(system.file("extdata/sp_model.tif", 39 | #' package = "kuenm")) 40 | #' thres <- 5 41 | #' rand_perc <- 50 42 | #' iterac <- 500 43 | #' 44 | #' p_roc <- kuenm_proc(occ.test = sp_test, model = model, threshold = thres, 45 | #' rand.percent = rand_perc, iterations = iterac) 46 | 47 | kuenm_proc <- function(occ.test, model, threshold = 5, rand.percent = 50, 48 | iterations = 500, parallel = NULL) { 49 | 50 | # ----------- 51 | # detecting potential errors, other potential problems tested in code 52 | if (missing(model)) { 53 | stop("Argument 'model' is necessary to perform the analysis.") 54 | } 55 | if (missing(occ.test)) { 56 | stop("Argument 'occ.test' is necessary to perform the analysis.") 57 | } 58 | c_pred <- class(model)[1] 59 | if (!c_pred %in% c("RasterLayer", "numeric")) { 60 | stop("'model' must be of class RasterLayer or numeric.") 61 | } 62 | c_tdat <- class(occ.test)[1] 63 | if (!c_tdat %in% c("matrix", "data.frame", "numeric")) { 64 | stop("'occ.test' must be of class matrix, data.frame, or numeric.") 65 | } 66 | if (c_pred == "numeric" & c_tdat != "numeric") { 67 | stop("'occ.test' must be of class numeric if model is a numeric vector.") 68 | } 69 | if (!is.null(parallel)) { 70 | message("'parallel' is a deprecated argument.") 71 | } 72 | 73 | 74 | # ----------- 75 | # package needed 76 | suppressPackageStartupMessages(library(dplyr)) 77 | 78 | # ----------- 79 | # preparing data 80 | if (c_pred == "RasterLayer") { 81 | model <- raster::setMinMax(model) 82 | } 83 | 84 | min_pred <- ifelse(c_pred == "numeric", min(model, na.rm = TRUE), 85 | model@data@min) 86 | max_pred <- ifelse(c_pred == "numeric", max(model, na.rm = TRUE), 87 | model@data@max) 88 | 89 | if (c_pred == "RasterLayer") { 90 | if (c_tdat != "numeric") { 91 | test_data <- na.omit(raster::extract(model, 92 | occ.test[, 1:2])) 93 | } else { 94 | test_data <- na.omit(occ.test) 95 | } 96 | } else { 97 | test_data <- na.omit(occ.test) 98 | } 99 | 100 | vals <- na.omit(model[]) 101 | 102 | # ndec <- dec_places_proc(min_pred, min(test_data)) 103 | # fix_dec <- as.numeric(paste0("1e+", ndec)) 104 | # 105 | # test_data <- test_data * fix_dec 106 | # vals <- vals * fix_dec 107 | # 108 | # minmin <- min(c(vals, test_data)) 109 | # 110 | # test_data <- round(test_data / minmin) 111 | # vals <- round(vals / minmin) 112 | 113 | nvals <- length(vals) 114 | vals <- c(vals, test_data) 115 | vals <- as.numeric(cut(vals, 500)) 116 | test_data <- vals[(nvals + 1):length(vals)] 117 | vals <- vals[1:nvals] 118 | 119 | classpixels <- as.data.frame(table(vals), stringsAsFactors = FALSE) 120 | colnames(classpixels) <- c("value", "count") 121 | 122 | # ----------- 123 | # analysis 124 | if(min_pred == max_pred){ 125 | warning("\nmodel has no variability, pROC will return NA.\n") 126 | 127 | p_roc <- rep(NA, 2) 128 | names(p_roc) <- c(paste0("Mean_AUC_ratio_at_", threshold, "%"), "pval_pROC") 129 | 130 | auc_ratios <- rep(NA, 3) 131 | names(auc_ratios) <- c("Model_partial_AUC", "Random_curve_partial_AUC", 132 | "AUC_ratio") 133 | 134 | p_roc_res <- list(pROC_summary = p_roc, pROC_results = auc_ratios) 135 | 136 | } else { 137 | classpixels <- classpixels %>% 138 | dplyr::mutate(value = rev(value), 139 | count = rev(count), 140 | totpixperclass = cumsum(count), 141 | percentpixels = totpixperclass/sum(count)) %>% 142 | dplyr::arrange(value) 143 | 144 | error_sens <- 1 - (threshold / 100) 145 | prediction_errors <- classpixels[, "value"] 146 | fractional_area <- classpixels[, "percentpixels"] 147 | n_data <- length(test_data) 148 | n_samp <- ceiling((rand.percent / 100) * n_data) 149 | 150 | big_classpixels <- matrix(rep(prediction_errors, each = n_samp), 151 | ncol = length(prediction_errors)) 152 | 153 | partial_AUC <- 1:iterations %>% 154 | purrr::map_df(~calc_aucDF(big_classpixels, fractional_area, test_data, 155 | n_data, n_samp, error_sens)) 156 | 157 | naID <- !is.na(partial_AUC$auc_ratio) 158 | nona_valproc <- partial_AUC$auc_ratio[naID] 159 | mauc <- mean(nona_valproc) 160 | proc <- sum(nona_valproc <= 1) / length(nona_valproc) 161 | 162 | p_roc <- c(mauc, proc) 163 | names(p_roc) <- c(paste0("Mean_AUC_ratio_at_", threshold, "%"), "pval_pROC") 164 | 165 | auc_ratios <- partial_AUC 166 | names(auc_ratios) <- c("Model_partial_AUC", "Random_curve_partial_AUC", 167 | "AUC_ratio") 168 | 169 | p_roc_res <- list(pROC_summary = p_roc, pROC_results = auc_ratios) 170 | } 171 | 172 | return(p_roc_res) 173 | } 174 | 175 | 176 | dec_places_proc <- function(model_vals, test_vals) { 177 | 178 | x <- c(model_vals, test_vals) 179 | x <- x[abs(x - round(x)) > .Machine$double.eps^0.5] 180 | 181 | x <- do.call(rbind, strsplit(sub('0+$', '', as.character(x)), ".", 182 | fixed = TRUE))[, 2] 183 | 184 | return(max(nchar(x), na.rm = TRUE)) 185 | } 186 | 187 | 188 | calc_aucDF <- function(big_classpixels, fractional_area, test_data, n_data, 189 | n_samp, error_sens) { 190 | rowsID <- sample(x = n_data, size = n_samp, replace = TRUE) 191 | test_data1 <- test_data[rowsID] 192 | omssion_matrix <- big_classpixels > test_data1 193 | sensibility <- 1 - colSums(omssion_matrix) / n_samp 194 | xyTable <- data.frame(fractional_area, sensibility) 195 | less_ID <- which(xyTable$sensibility <= error_sens) 196 | xyTable <- xyTable[-less_ID, ] 197 | xyTable <- xyTable[order(xyTable$fractional_area, decreasing = F), ] 198 | auc_pmodel <- kuenm:::trap_roc(xyTable$fractional_area, xyTable$sensibility) 199 | auc_prand <- kuenm:::trap_roc(xyTable$fractional_area, xyTable$fractional_area) 200 | auc_ratio <- auc_pmodel / auc_prand 201 | auc_table <- data.frame(auc_pmodel, auc_prand, auc_ratio = auc_ratio ) 202 | return(auc_table) 203 | } 204 | -------------------------------------------------------------------------------- /R/kuenm_feval_swd.R: -------------------------------------------------------------------------------- 1 | #' Evaluation of final Maxent models with independent data in SWD format 2 | #' 3 | #' @description kuenm_feval_swd evaluates final Maxent models in terms of 4 | #' statistical significance (partial ROC) and omission rates with a user-defined 5 | #' threshold (E). This function works for models created in SWD format. 6 | #' 7 | #' @param path (character) directory in which folders containing final models 8 | #' were created. 9 | #' @param occ.joint (character) the csv file with training and testing 10 | #' occurrences combined, or the file containing occurrences used to create final 11 | #' models; columns must be: species, longitude, latitude, and two or more 12 | #' columns representing distinct variables. 13 | #' @param occ.ind (character) the name of the csv file with independent 14 | #' occurrences for model evaluation; these occurrences were not used when 15 | #' creating final models; columns as in \code{occ.joint}. Prepare this 16 | #' file with \code{\link{prep_independent_swd}}. 17 | #' @param replicates (logical) whether or not final models were created 18 | #' with replicates. 19 | #' @param out.eval (character) name of the folder where evaluation results will 20 | #' be written. 21 | #' @param threshold (numeric) the percentage of omission error allowed (E), 22 | #' default = 5. 23 | #' @param rand.percent (numeric) the percentage of data to be used for the 24 | #' bootstrapping process when calculating partial ROCs; default = 50. 25 | #' @param iterations (numeric) the number of times that the bootstrap is going 26 | #' to be repeated; default = 500. 27 | #' 28 | #' @return A list with two data.frame objects containing results from the 29 | #' evaluation process, and a folder, in the working directory, containing a 30 | #' csv file with the results from final model evaluation. 31 | #' 32 | #' @usage 33 | #' kuenm_feval_swd(path, occ.joint, occ.ind, replicates, out.eval, threshold = 5, 34 | #' rand.percent = 50, iterations = 500) 35 | #' 36 | #' @export 37 | #' 38 | #' @details This function is used after the creation of final models. 39 | 40 | kuenm_feval_swd <- function(path, occ.joint, occ.ind, replicates, out.eval, 41 | threshold = 5, rand.percent = 50, iterations = 500) { 42 | 43 | #Checking potential issues 44 | if (missing(path)) { 45 | stop(paste("Argument path is not defined, this is necessary for reading the", 46 | "\nfinal models created with the kuenm_mod function.")) 47 | } 48 | if (!dir.exists(path)) { 49 | stop(paste(path, "does not exist in the working directory, check folder name", 50 | "\nor its existence.")) 51 | } 52 | if (!file.exists(occ.joint)) { 53 | stop(paste(occ.joint, "does not exist in the working directory, check file name", 54 | "\nor extension, example: species_joint.csv")) 55 | } 56 | if (!file.exists(occ.ind)) { 57 | stop(paste(occ.ind, "does not exist in the working directory, check file name", 58 | "\nor extension, example: species_ind.csv")) 59 | } 60 | if (missing(out.eval)) { 61 | stop(paste("Argument out.eval is not defined, this is necessary for creating a folder", 62 | "\nwith the outputs of this function.")) 63 | } 64 | if (missing(replicates)) { 65 | stop(paste("Logical argument replicates is not defined, this is necessary for", 66 | "\nselecting the layer that will be evaluated; it can be TRUE or FALSE.")) 67 | } 68 | 69 | ##### 70 | #Data 71 | ###Model(s) for evaluation 72 | u_fmodels <- dir(path) 73 | u_fmodels <- gsub("_E$", "", u_fmodels) 74 | u_fmodels <- gsub("_EC$", "", u_fmodels) 75 | u_fmodels <- unique(gsub("_NE$", "", u_fmodels)) 76 | 77 | ##Joint set and independent occurrences 78 | occ <- read.csv(occ.joint) #read joint occurrences 79 | sp <- as.character(read.csv(occ.joint)[1, 1]) #species name 80 | sp <- gsub(" ", "_", sp) 81 | occ <- occ[, -1] #erase species name column 82 | 83 | occ1 <- read.csv(occ.ind) #read test occurrences 84 | occ1 <- occ1[, -1] #erase species name column 85 | 86 | ##### 87 | #pROCs and omission rates calculation 88 | cat("\nPartial ROCs and omission rates calculation, please wait...\n") 89 | 90 | proc_res <- list() #empty list of pROC values 91 | om_rates <- vector() #empty vector of omision rates 92 | 93 | if(.Platform$OS.type == "unix") { 94 | pb <- txtProgressBar(min = 0, max = length(u_fmodels), style = 3) 95 | } else { 96 | pb <- winProgressBar(title = "Progress bar", min = 0, max = length(u_fmodels), 97 | width = 300) #progress bar 98 | } 99 | 100 | for(i in 1:length(u_fmodels)) { 101 | Sys.sleep(0.1) 102 | if(.Platform$OS.type == "unix") { 103 | setTxtProgressBar(pb, i) 104 | } else { 105 | setWinProgressBar(pb, i, title = paste(round(i / length(u_fmodels) * 100, 2), 106 | "% of the evaluation process has finished")) 107 | } 108 | 109 | # Path to model for evaluation 110 | pathm <- dir(path = path, pattern = u_fmodels[i], full.names = TRUE) 111 | pathm <- pathm[length(pathm)] 112 | 113 | #Models to be evaluated 114 | if(replicates == TRUE) { 115 | mods1 <- list.files(pathm, pattern = paste(sp, "median.csv", sep = "_"), 116 | full.names = TRUE) #csv models 117 | } else { 118 | mods1 <- list.files(pathm, pattern = paste0(sp, ".csv"), 119 | full.names = TRUE) #ascii models 120 | } 121 | 122 | mod1 <- read.csv(mods1) 123 | tval <- mod1[paste(mod1[, 1], mod1[, 2]) %in% paste(occ1[, 1], occ1[, 2]), 3] 124 | 125 | #partialROC calculation 126 | proc <- kuenm_proc(tval, mod1[, 3], threshold, rand.percent, iterations) 127 | 128 | #pROCs table 129 | proc_res[[i]] <- proc[[1]] 130 | 131 | #Omission rates calculation 132 | om_rates[i] <- or(mod1, occ, occ1, threshold) 133 | 134 | } 135 | if(.Platform$OS.type != "unix") { 136 | suppressMessages(close(pb)) 137 | } 138 | n.mod <- i 139 | 140 | ##Creating final tables 141 | ###From pROC analyses 142 | proc_res1 <- do.call(rbind, proc_res) #joining tables of the pROC results 143 | proc_res_m <- data.frame(u_fmodels, proc_res1) #adding a new column with the number of AUC ratios interations < 1 144 | 145 | ##### 146 | #Joining the results 147 | ku_enm_eval <- data.frame(proc_res_m, om_rates) 148 | colnames(ku_enm_eval) <- c("Model", "Mean_AUC_ratio", "Partial_ROC",#changing column names in the final table 149 | paste("Omission_rate_at_", threshold, "%", sep = "")) 150 | 151 | ##### 152 | #Statistics of the process 153 | ##Counting 154 | ku_enm_pROC <- ku_enm_eval[ku_enm_eval[, 3] <= threshold / 100, ] 155 | 156 | ku_enm_OR <- ku_enm_eval[ku_enm_eval[, 4] <= threshold / 100, ] 157 | 158 | ku_enm_pROC_OR <- ku_enm_pROC[ku_enm_pROC[, 4] <= threshold / 100, ] 159 | 160 | ##Preparing the table 161 | r_names <- c("All final models", "Statistically significant models", 162 | "Models meeting omission rate criteria", "Models meeting pROC and omission rate critera") 163 | statis <- c(length(ku_enm_eval[, 1]), 164 | length(ku_enm_pROC[, 3]), 165 | length(ku_enm_OR[, 4]), 166 | length(ku_enm_pROC_OR[, 4])) 167 | 168 | ku_enm_stats <- data.frame(r_names, statis) 169 | colnames(ku_enm_stats) <- c("Criteria", "Number of models") 170 | 171 | ##### 172 | #Writing the results 173 | ##csv files 174 | cat("\nWriting kuenm_feval results...\n") 175 | dir.create(out.eval) 176 | 177 | name <- paste(out.eval, "fm_evaluation_results.csv", sep = "/") 178 | 179 | write.csv(ku_enm_eval, file = name, eol = "\n", na = "NA", row.names = FALSE) 180 | 181 | ##Retuning objects 182 | ###Dataframes in a list 183 | list_res <- list(ku_enm_stats, ku_enm_eval) 184 | names(list_res) <- c("Summary", "Evaluated models") 185 | 186 | ##### 187 | #Finalizing the function 188 | cat("\nProcess finished\n") 189 | cat(paste("A folder containing results of the evaluation of", n.mod, 190 | "\nfinal models has been written: ", out.eval, "\n")) 191 | cat(paste("\nCheck your working directory!!!", getwd(), sep = " ")) 192 | 193 | return(list_res) 194 | } 195 | 196 | --------------------------------------------------------------------------------