├── .ignore ├── man-roxygen ├── param_row_ids.R ├── param_cols.R ├── param_id.R ├── param_label.R ├── param_na_rm.R ├── param_target.R ├── field_param_set.R ├── param_param_set.R ├── param_view.R ├── param_measures.R ├── param_task_properties.R ├── field_id.R ├── field_packages.R ├── param_primary_key.R ├── field_iters.R ├── field_label.R ├── param_rows.R ├── param_range.R ├── param_extra_args.R ├── param_predict_types.R ├── field_man.R ├── param_allow_hotstart.R ├── param_feature_types.R ├── param_task_type.R ├── param_callbacks.R ├── param_man.R ├── param_predict_type.R ├── param_backend.R ├── param_store_models.R ├── param_convert_predictions.R ├── field_task_type.R ├── param_packages.R ├── param_minimize.R ├── resampling.R ├── param_unmarshal.R ├── param_clone.R ├── task.R ├── seealso_databackend.R ├── seealso_resample.R ├── seealso_benchmark.R ├── task_generator.R ├── param_store_backends.R ├── section_progress_bars.R ├── param_ties_method.R ├── learner.R ├── measure.R ├── field_col_hashes.R ├── param_predict_sets.R ├── seealso_prediction.R ├── seealso_task_generator.R ├── param_param_values.R ├── param_aggregator.R ├── seealso_resampling.R ├── section_logging.R ├── seealso_measure.R ├── param_encapsulate.R ├── section_predict_sets.R ├── section_parallelization.R ├── field_predict_sets.R ├── seealso_task.R ├── param_measure_properties.R ├── param_average.R ├── measure_regr.R ├── seealso_learner.R ├── measure_binary.R ├── measure_classif.R ├── measure_similarity.R └── param_learner_properties.R ├── man ├── figures │ ├── logo.png │ ├── block.png │ └── block.svg ├── assert_empty_ellipsis.Rd ├── as_benchmark_result.Rd ├── reexports.Rd ├── assert_resample_callback.Rd ├── print.roc_measures.Rd ├── mlr3.holdout_task.Rd ├── col_info.Rd ├── as_learner.Rd ├── default_fallback.Rd ├── as_resampling.Rd ├── auto_convert.Rd ├── default_measures.Rd ├── deprecated_binding.Rd ├── task_check_col_roles.Rd ├── warn_deprecated.Rd ├── as_prediction_regr.Rd ├── as_prediction_data.Rd ├── convert_task.Rd ├── as_prediction_classif.Rd ├── partition.Rd ├── mlr3.model_extractor.Rd ├── as_task.Rd ├── as_measure.Rd ├── score_roc_measures.Rd ├── as_task_unsupervised.Rd ├── as_resample_result.Rd ├── as_prediction.Rd ├── predict.Learner.Rd ├── uhash.Rd ├── set_threads.Rd └── mlr_measures_sim.jaccard.Rd ├── inst ├── extdata │ ├── spam.rds │ ├── wine.rds │ ├── german_credit.rds │ ├── california_housing.rds │ ├── spam.R │ ├── german_credit.R │ ├── california_housing.R │ └── wine.R ├── testthat │ ├── helper_debugging.R │ └── helper_misc.R └── CITATION ├── pkgdown └── favicon │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── apple-touch-icon.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ └── apple-touch-icon-180x180.png ├── tests ├── testthat │ ├── teardown.R │ ├── _object_snapshots │ │ ├── bmr.rds │ │ ├── rr.rds │ │ ├── measure.rds │ │ ├── resampling.rds │ │ ├── task_regr.rds │ │ ├── learner_regr.rds │ │ ├── task_classif.rds │ │ ├── learner_classif.rds │ │ └── snapshot.R │ ├── helper.R │ ├── test_TaskSupervised.R │ ├── test_mlr_measures_classif.auc.R │ ├── test_expectations.R │ ├── test_mlr_measures_selected_features.R │ ├── setup.R │ ├── test_set_threads.R │ ├── test_mlr_sugar.R │ ├── test_assertions.R │ ├── test_score_roc_measures.R │ ├── test_ContextEvaluation.R │ ├── test_as_resampling.R │ ├── helper_resampling.R │ ├── test_as_measure.R │ ├── test_mlr_resampling_loo.R │ ├── test_MeasureClassif.R │ ├── test_resampling_insample.R │ ├── test_DataBackend.R │ ├── test_Dictionary.R │ ├── test_MeasureRegrPinball.R │ ├── test_partition.R │ ├── _snaps │ │ └── Task.md │ ├── test_MeasureRegr.R │ ├── test_mlr_tasks.R │ ├── test_as_learner.R │ ├── test_mlr_resampling_subsampling.R │ ├── test_DataBackendDataTable.R │ ├── test_mlr_learners.R │ ├── test_mlr_measures_similarity.R │ ├── test_install_pkgs.R │ ├── test_as_task.R │ ├── test_mlr_resamplings.R │ ├── test_mlr_resampling_custom.R │ ├── test_mlr_task_generators.R │ ├── test_mlr_resampling_cv.R │ ├── test_hashes.R │ ├── test_default_fallback.R │ ├── test_MeasureRegrRQR.R │ ├── test_warn_deprecated.R │ ├── test_mlr_callbacks.R │ ├── test_mlr_measures.R │ ├── test_mlr_resampling_holdout.R │ ├── test_mlr_resampling_repeated_cv.R │ ├── test_mlr_resampling_bootstrap.R │ └── test_MeasureInternalValidScore.R └── testthat.R ├── .github ├── dependabot.yml └── workflows │ ├── pkgdown.yml │ ├── r-cmd-check.yml │ ├── no-suggest-cmd-check.yml │ └── dev-cmd-check.yml ├── R ├── reexports.R ├── fix_factor_levels.R ├── TaskClassif_sonar.R ├── TaskClassif_pima.R ├── TaskClassif_iris.R ├── as_benchmark_result.R ├── TaskClassif_zoo.R ├── as_resampling.R ├── TaskUnsupervised.R ├── default_measures.R ├── TaskRegr_mtcars.R ├── as_learner.R ├── set_validate.R ├── TaskRegr_california_housing.R ├── TaskClassif_wine.R ├── TaskClassif_breast_cancer.R ├── as_prediction_regr.R ├── TaskClassif_penguins.R ├── TaskClassif_spam.R ├── as_resample_result.R ├── TaskGeneratorFriedman1.R ├── MeasureOOBError.R ├── ResamplingInsample.R ├── as_measure.R ├── as_prediction.R ├── as_task.R ├── TaskSupervised.R ├── MeasureBIC.R ├── helper_data_table.R ├── MeasureRegr.R ├── MeasureClassif.R ├── TaskGeneratorPeak.R ├── MeasureDebug.R ├── mlr_task_generators.R ├── MeasureAIC.R ├── helper_hashes.R ├── mlr_resamplings.R ├── TaskGeneratorXor.R ├── TaskClassif_german_credit.R ├── as_prediction_data.R ├── as_prediction_classif.R ├── partition.R ├── TaskRegr.R ├── as_task_unsupervised.R └── TaskGenerator2DNormals.R ├── .editorconfig ├── mlr3.Rproj ├── .Rbuildignore ├── .lintr └── CONTRIBUTING.md /.ignore: -------------------------------------------------------------------------------- 1 | man/ 2 | attic/ 3 | pkgdown/ 4 | revdep/ 5 | docs/ 6 | -------------------------------------------------------------------------------- /man-roxygen/param_row_ids.R: -------------------------------------------------------------------------------- 1 | #' @param row_ids `integer()`\cr 2 | #' Row indices. 3 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /inst/extdata/spam.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/inst/extdata/spam.rds -------------------------------------------------------------------------------- /inst/extdata/wine.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/inst/extdata/wine.rds -------------------------------------------------------------------------------- /man/figures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/man/figures/block.png -------------------------------------------------------------------------------- /man-roxygen/param_cols.R: -------------------------------------------------------------------------------- 1 | #' @param cols (`character()`)\cr 2 | #' Vector of column names. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_id.R: -------------------------------------------------------------------------------- 1 | #' @param id (`character(1)`)\cr 2 | #' Identifier for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_label.R: -------------------------------------------------------------------------------- 1 | #' @param label (`character(1)`)\cr 2 | #' Label for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_na_rm.R: -------------------------------------------------------------------------------- 1 | #' @param na_rm `logical(1)`\cr 2 | #' Whether to remove NAs or not. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_target.R: -------------------------------------------------------------------------------- 1 | #' @param target (`character(1)`)\cr 2 | #' Name of the target column. 3 | -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /inst/extdata/german_credit.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/inst/extdata/german_credit.rds -------------------------------------------------------------------------------- /man-roxygen/field_param_set.R: -------------------------------------------------------------------------------- 1 | #' @field param_set ([paradox::ParamSet])\cr 2 | #' Set of hyperparameters. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_param_set.R: -------------------------------------------------------------------------------- 1 | #' @param param_set ([paradox::ParamSet])\cr 2 | #' Set of hyperparameters. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_view.R: -------------------------------------------------------------------------------- 1 | #' @param view `character(1)`\cr 2 | #' Single `uhash` to restrict the results to. 3 | -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | options(old_opts) 2 | lg$set_threshold(old_threshold) 3 | future::plan(old_plan) 4 | -------------------------------------------------------------------------------- /inst/extdata/california_housing.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/inst/extdata/california_housing.rds -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /man-roxygen/param_measures.R: -------------------------------------------------------------------------------- 1 | #' @param measures ([Measure] | list of [Measure])\cr 2 | #' Measure(s) to calculate. 3 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/bmr.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/bmr.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/rr.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/rr.rds -------------------------------------------------------------------------------- /man-roxygen/param_task_properties.R: -------------------------------------------------------------------------------- 1 | #' @param task_properties (`character()`)\cr 2 | #' Required task properties, see [Task]. 3 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /man-roxygen/field_id.R: -------------------------------------------------------------------------------- 1 | #' @field id (`character(1)`)\cr 2 | #' Identifier of the object. 3 | #' Used in tables, plot and text output. 4 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/measure.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/measure.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/resampling.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/resampling.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/task_regr.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/task_regr.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/learner_regr.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/learner_regr.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/task_classif.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/task_classif.rds -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/learner_classif.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3/HEAD/tests/testthat/_object_snapshots/learner_classif.rds -------------------------------------------------------------------------------- /man-roxygen/field_packages.R: -------------------------------------------------------------------------------- 1 | #' @field packages (`character(1)`)\cr 2 | #' Set of required packages. 3 | #' These packages are loaded, but not attached. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_primary_key.R: -------------------------------------------------------------------------------- 1 | #' @param primary_key (`character(1)` | `integer()`)\cr 2 | #' Name of the primary key column, or integer vector of row ids. 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /man-roxygen/field_iters.R: -------------------------------------------------------------------------------- 1 | #' @field iters (`integer(1)`)\cr 2 | #' Returns the number of resampling iterations, depending on the values stored in the `param_set`. 3 | -------------------------------------------------------------------------------- /man-roxygen/field_label.R: -------------------------------------------------------------------------------- 1 | #' @field label (`character(1)`)\cr 2 | #' Label for this object. 3 | #' Can be used in tables, plot and text output instead of the ID. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_rows.R: -------------------------------------------------------------------------------- 1 | #' @param rows (positive `integer()`)\cr 2 | #' Vector or row indices. 3 | #' Always refers to the complete data set, even after filtering. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_range.R: -------------------------------------------------------------------------------- 1 | #' @param range (`numeric(2)`)\cr 2 | #' Feasible range for this measure as `c(lower_bound, upper_bound)`. 3 | #' Both bounds may be infinite. 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("testthat", quietly = TRUE)) { 2 | library("checkmate") 3 | library("testthat") 4 | library("mlr3") 5 | test_check("mlr3") 6 | } 7 | -------------------------------------------------------------------------------- /man-roxygen/param_extra_args.R: -------------------------------------------------------------------------------- 1 | #' @param extra_args (named `list()`)\cr 2 | #' Named list of constructor arguments, required for converting task types 3 | #' via [convert_task()]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_types.R: -------------------------------------------------------------------------------- 1 | #' @param predict_types (`character()`)\cr 2 | #' Supported predict types. Must be a subset of [`mlr_reflections$learner_predict_types`][mlr_reflections]. 3 | -------------------------------------------------------------------------------- /inst/extdata/spam.R: -------------------------------------------------------------------------------- 1 | root = rprojroot::find_package_root_file() 2 | data = mlr3misc::load_dataset("spam", "kernlab") 3 | saveRDS(data, file = file.path(root, "inst", "extdata", "spam.rds"), version = 2L) 4 | -------------------------------------------------------------------------------- /man-roxygen/field_man.R: -------------------------------------------------------------------------------- 1 | #' @field man (`character(1)`)\cr 2 | #' String in the format `[pkg]::[topic]` pointing to a manual page for this object. 3 | #' Defaults to `NA`, but can be set by child classes. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_allow_hotstart.R: -------------------------------------------------------------------------------- 1 | #' @param allow_hotstart (`logical(1)`)\cr 2 | #' Determines if learner(s) are hot started with trained models in 3 | #' `$hotstart_stack`. See also [HotstartStack]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_feature_types.R: -------------------------------------------------------------------------------- 1 | #' @param feature_types (`character()`)\cr 2 | #' Feature types the learner operates on. Must be a subset of [`mlr_reflections$task_feature_types`][mlr_reflections]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_task_type.R: -------------------------------------------------------------------------------- 1 | #' @param task_type (`character(1)`)\cr 2 | #' Type of task, e.g. `"regr"` or `"classif"`. 3 | #' Must be an element of [mlr_reflections$task_types$type][mlr_reflections]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_callbacks.R: -------------------------------------------------------------------------------- 1 | #' @param callbacks (List of [mlr3misc::Callback])\cr 2 | #' Callbacks to be executed during the resampling process. 3 | #' See [CallbackResample] and [ContextResample] for details. 4 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | library(mlr3) 2 | library(checkmate) 3 | library(testthat) 4 | 5 | lapply(list.files(system.file("testthat", package = "mlr3"), pattern = "^helper.*\\.[rR]", full.names = TRUE), source) 6 | -------------------------------------------------------------------------------- /man-roxygen/param_man.R: -------------------------------------------------------------------------------- 1 | #' @param man (`character(1)`)\cr 2 | #' String in the format `[pkg]::[topic]` pointing to a manual page for this object. 3 | #' The referenced help package can be opened via method `$help()`. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_type.R: -------------------------------------------------------------------------------- 1 | #' @param predict_type (`character(1)`)\cr 2 | #' Required predict type of the [Learner]. 3 | #' Possible values are stored in [mlr_reflections$learner_predict_types][mlr_reflections]. 4 | -------------------------------------------------------------------------------- /inst/extdata/german_credit.R: -------------------------------------------------------------------------------- 1 | root = rprojroot::find_package_root_file() 2 | data = mlr3misc::load_dataset("german", "rchallenge") 3 | saveRDS(data, file = file.path(root, "inst", "extdata", "german_credit.rds"), version = 2L) 4 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | data.table::as.data.table 3 | 4 | #' @export 5 | data.table::data.table 6 | 7 | #' @export 8 | mlr3misc::mlr_callbacks 9 | 10 | #' @export 11 | mlr3misc::clbk 12 | 13 | #' @export 14 | mlr3misc::clbks 15 | -------------------------------------------------------------------------------- /man-roxygen/param_backend.R: -------------------------------------------------------------------------------- 1 | #' @param backend ([DataBackend])\cr 2 | #' Either a [DataBackend], or any object which is convertible to a [DataBackend] with `as_data_backend()`. 3 | #' E.g., a `data.frame()` will be converted to a [DataBackendDataTable]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_store_models.R: -------------------------------------------------------------------------------- 1 | #' @param store_models (`logical(1)`)\cr 2 | #' Store the fitted model in the resulting object= 3 | #' Set to `TRUE` if you want to further analyse the models or want to 4 | #' extract information like variable importance. 5 | -------------------------------------------------------------------------------- /man-roxygen/param_convert_predictions.R: -------------------------------------------------------------------------------- 1 | #' @param convert_predictions (`logical(1)`)\cr 2 | #' If `TRUE` (default), the converts the internal [PredictionData] objects to 3 | #' regular [Prediction] objects. 4 | #' Uses all predict sets specified via `predict_sets`. 5 | -------------------------------------------------------------------------------- /man-roxygen/field_task_type.R: -------------------------------------------------------------------------------- 1 | #' @field task_type (`character(1)`)\cr 2 | #' Task type, e.g. `"classif"` or `"regr"`. 3 | #' 4 | #' For a complete list of possible task types (depending on the loaded packages), 5 | #' see [`mlr_reflections$task_types$type`][mlr_reflections]. 6 | -------------------------------------------------------------------------------- /man-roxygen/param_packages.R: -------------------------------------------------------------------------------- 1 | #' @param packages (`character()`)\cr 2 | #' Set of required packages. 3 | #' A warning is signaled by the constructor if at least one of the packages is not installed, 4 | #' but loaded (not attached) later on-demand via [requireNamespace()]. 5 | -------------------------------------------------------------------------------- /man-roxygen/param_minimize.R: -------------------------------------------------------------------------------- 1 | #' @param minimize (`logical(1)`)\cr 2 | #' Set to `TRUE` if good predictions correspond to small values, 3 | #' and to `FALSE` if good predictions correspond to large values. 4 | #' If set to `NA` (default), tuning this measure is not possible. 5 | -------------------------------------------------------------------------------- /man-roxygen/resampling.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [Resampling] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_resamplings] or with the associated sugar function [rsmp()]: 3 | #' ``` 4 | #' mlr_resamplings$get("<%= id %>") 5 | #' rsmp("<%= id %>") 6 | #' ``` 7 | -------------------------------------------------------------------------------- /man-roxygen/param_unmarshal.R: -------------------------------------------------------------------------------- 1 | #' @param unmarshal [`Learner`]\cr 2 | #' Whether to unmarshal learners that were marshaled during the execution. 3 | #' If `TRUE` all models are stored in unmarshaled form. 4 | #' If `FALSE`, all learners (that need marshaling) are stored in marshaled form. 5 | -------------------------------------------------------------------------------- /tests/testthat/test_TaskSupervised.R: -------------------------------------------------------------------------------- 1 | test_that("TaskSupervised Construction", { 2 | b = as_data_backend(iris) 3 | task = TaskSupervised$new(id = "foo", "classif", b, target = "Species") 4 | b = task$backend 5 | expect_backend(b) 6 | expect_task(task) 7 | expect_task_supervised(task) 8 | }) 9 | -------------------------------------------------------------------------------- /man-roxygen/param_clone.R: -------------------------------------------------------------------------------- 1 | #' @param clone (`character()`)\cr 2 | #' Select the input objects to be cloned before proceeding by 3 | #' providing a set with possible values `"task"`, `"learner"` and 4 | #' `"resampling"` for [Task], [Learner] and [Resampling], respectively. 5 | #' Per default, all input objects are cloned. 6 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_measures_classif.auc.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_measures_auc", { 2 | task = tsk("sonar") 3 | lrn = lrn("classif.featureless", predict_type = "prob") 4 | m = msr("classif.auc") 5 | measures = list(m) 6 | 7 | p = lrn$train(task)$predict(task) 8 | perf = p$score(measures) 9 | expect_equal(unname(perf), 0.5) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test_expectations.R: -------------------------------------------------------------------------------- 1 | test_that("expecations do not use globals", { 2 | skip_if_not_installed("codetools") 3 | 4 | ee = new.env() 5 | path = system.file(file.path("testthat", "helper_expectations.R"), package = "mlr3") 6 | sys.source(path, envir = ee) 7 | 8 | testthat::expect_silent(codetools::checkUsageEnv(ee)) 9 | }) 10 | -------------------------------------------------------------------------------- /inst/extdata/california_housing.R: -------------------------------------------------------------------------------- 1 | # download data from https://www.kaggle.com/datasets/camnugent/california-housing-prices 2 | root = rprojroot::find_package_root_file() 3 | data = data.table::fread("housing.csv") 4 | data[, ocean_proximity := as.factor(ocean_proximity)] 5 | saveRDS(data, file = file.path(root, "inst", "extdata", "california_housing.rds"), version = 2L) 6 | -------------------------------------------------------------------------------- /man-roxygen/task.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [Task] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_tasks] or with the associated sugar function [tsk()]: 3 | #' ``` 4 | #' mlr_tasks$get("<%= id %>") 5 | #' tsk("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::tsk("<%= id %>"))` 10 | #' @md 11 | -------------------------------------------------------------------------------- /man-roxygen/seealso_databackend.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-backends} 5 | #' * Package \CRANpkg{mlr3db} to interface out-of-memory data, 6 | #' e.g. SQL servers or \CRANpkg{duckdb}. 7 | #' 8 | #' @family DataBackend 9 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_measures_selected_features.R: -------------------------------------------------------------------------------- 1 | test_that("selected_features", { 2 | task = tsk("sonar") 3 | lrn = lrn("classif.rpart") 4 | measures = list(msr("classif.ce"), msr("selected_features")) 5 | 6 | rr = resample(task, lrn, rsmp("holdout"), store_models = TRUE) 7 | perf = rr$aggregate(measures) 8 | expect_count(perf[["selected_features"]]) 9 | }) 10 | -------------------------------------------------------------------------------- /man-roxygen/seealso_resample.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * [as_benchmark_result()] to convert to a [BenchmarkResult]. 4 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 5 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter3/evaluation_and_benchmarking.html#sec-resampling} 6 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 7 | #' 8 | #' @family resample 9 | -------------------------------------------------------------------------------- /man-roxygen/seealso_benchmark.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter3/evaluation_and_benchmarking.html#sec-benchmarking} 5 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 6 | #' * \CRANpkg{mlr3benchmark} for post-hoc analysis of benchmark results. 7 | #' 8 | #' @family benchmark 9 | -------------------------------------------------------------------------------- /man-roxygen/task_generator.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [TaskGenerator] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_task_generators] or with the associated sugar function [tgen()]: 3 | #' ``` 4 | #' mlr_task_generators$get("<%= id %>") 5 | #' tgen("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Parameters: 9 | #' `r mlr3misc::rd_info(mlr3::tgen("<%= id %>")$param_set)` 10 | #' @md 11 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | old_opts = options( 2 | warnPartialMatchArgs = TRUE, 3 | warnPartialMatchAttr = TRUE, 4 | warnPartialMatchDollar = TRUE 5 | ) 6 | 7 | # https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 8 | old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) 9 | 10 | old_threshold = lg$threshold 11 | old_plan = future::plan() 12 | lg$set_threshold(0) 13 | future::plan("sequential") 14 | -------------------------------------------------------------------------------- /tests/testthat/test_set_threads.R: -------------------------------------------------------------------------------- 1 | test_that("set_threads", { 2 | l1 = lrn("classif.featureless") 3 | expect_learner(set_threads(l1)) 4 | 5 | l2 = lrn("classif.debug") 6 | expect_null(l2$param_set$values$threads) 7 | expect_learner(set_threads(l2, 1)) 8 | expect_equal(l2$param_set$values$threads, 1) 9 | 10 | x = list(l1, l2) 11 | expect_list(set_threads(x, 2)) 12 | expect_equal(l2$param_set$values$threads, 2) 13 | }) 14 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # See http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | trim_trailing_whitespace = true 10 | 11 | [*.{r,R,md,Rmd}] 12 | indent_size = 2 13 | 14 | [*.{c,h}] 15 | indent_size = 4 16 | 17 | [*.{cpp,hpp}] 18 | indent_size = 4 19 | 20 | [{NEWS.md,DESCRIPTION,LICENSE}] 21 | max_line_length = 80 22 | 23 | [CITATION.cff] 24 | indent_size = 2 25 | -------------------------------------------------------------------------------- /man-roxygen/param_store_backends.R: -------------------------------------------------------------------------------- 1 | #' @param store_backends (`logical(1)`)\cr 2 | #' Keep the [DataBackend] of the [Task] in the [ResampleResult]? 3 | #' Set to `TRUE` if your performance measures require a [Task], 4 | #' or to analyse results more conveniently. 5 | #' Set to `FALSE` to reduce the file size and memory footprint 6 | #' after serialization. 7 | #' The current default is `TRUE`, but this eventually will be changed 8 | #' in a future release. 9 | -------------------------------------------------------------------------------- /man-roxygen/section_progress_bars.R: -------------------------------------------------------------------------------- 1 | #' @section Progress Bars: 2 | #' This function supports progress bars via the package \CRANpkg{progressr}. 3 | #' Simply wrap the function call in [progressr::with_progress()] to enable them. 4 | #' Alternatively, call [progressr::handlers()] with `global = TRUE` to enable progress bars 5 | #' globally. 6 | #' We recommend the \CRANpkg{progress} package as backend which can be enabled with 7 | #' `progressr::handlers("progress")`. 8 | -------------------------------------------------------------------------------- /man-roxygen/param_ties_method.R: -------------------------------------------------------------------------------- 1 | #' @param ties_method (`character(1)`)\cr 2 | #' Method to handle ties in probabilities when selecting a class label. 3 | #' Must be one of `"random"`, `"first"` or `"last"` (corresponding to the same options in [max.col()]). 4 | #' * `"random"`: Randomly select one of the tied class labels (default). 5 | #' * `"first"`: Select the first class label among tied values. 6 | #' * `"last"`: Select the last class label among tied values. 7 | -------------------------------------------------------------------------------- /inst/testthat/helper_debugging.R: -------------------------------------------------------------------------------- 1 | `[[.R6` = function(x, i, ...) { 2 | if (exists(i, envir = x, inherits = FALSE)) 3 | return(get(i, envir = x)) 4 | stop("R6 class ", paste0(class(x), collapse = "/") ," does not have slot '", i, "'!") 5 | } 6 | 7 | `$.R6` = function(x, name) { 8 | if (exists(name, envir = x, inherits = FALSE)) 9 | return(get(name, envir = x)) 10 | stop("R6 class ", paste0(class(x), collapse = "/") ," does not have slot '", name, "'!") 11 | } 12 | -------------------------------------------------------------------------------- /mlr3.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageCheckArgs: --no-manual --as-cran --no-tests 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man-roxygen/learner.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [Learner] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_learners] or with the associated sugar function [lrn()]: 3 | #' ``` 4 | #' mlr_learners$get("<%= id %>") 5 | #' lrn("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))` 10 | #' @md 11 | #' 12 | #' @section Parameters: 13 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)` 14 | #' @md 15 | -------------------------------------------------------------------------------- /man-roxygen/measure.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [Measure] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_measures] or with the associated sugar function [msr()]: 3 | #' ``` 4 | #' mlr_measures$get("<%= id %>") 5 | #' msr("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::msr("<%= id %>"))` 10 | #' @md 11 | #' 12 | #' @section Parameters: 13 | #' `r mlr3misc::rd_info(mlr3::msr("<%= id %>")$param_set)` 14 | #' @md 15 | -------------------------------------------------------------------------------- /man-roxygen/field_col_hashes.R: -------------------------------------------------------------------------------- 1 | #' @field col_hashes (named `character`)\cr 2 | #' Hash (unique identifier) for all columns except the `primary_key`: A `character` vector, named by the columns that each element refers to.\cr 3 | #' Columns of different [`Task`]s or [`DataBackend`]s that have agreeing `col_hashes` always represent the same data, given that the same `row`s are selected. 4 | #' The reverse is not necessarily true: There can be columns with the same content that have different `col_hashes`. 5 | -------------------------------------------------------------------------------- /inst/extdata/wine.R: -------------------------------------------------------------------------------- 1 | root = rprojroot::find_package_root_file() 2 | data = data.table::fread("https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data", data.table = FALSE) 3 | names(data) = c("type", "alcohol", "malic", "ash", "alcalinity", "magnesium", "phenols", 4 | "flavanoids", "nonflavanoids", "proanthocyanins", "color", "hue", "dilution", "proline") 5 | data$type = factor(data$type, levels = 1:3) 6 | saveRDS(data, file = file.path(root, "inst", "extdata", "wine.rds"), version = 2L) 7 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_sets.R: -------------------------------------------------------------------------------- 1 | #' @param predict_sets (`character()`)\cr 2 | #' Prediction sets to operate on, used in `aggregate()` to extract the matching `predict_sets` from the [ResampleResult]. 3 | #' Multiple predict sets are calculated by the respective [Learner] during [resample()]/[benchmark()]. 4 | #' Must be a non-empty subset of `{"train", "test", "internal_valid"}`. 5 | #' If multiple sets are provided, these are first combined to a single prediction object. 6 | #' Default is `"test"`. 7 | -------------------------------------------------------------------------------- /man-roxygen/seealso_prediction.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} 5 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 6 | #' * Extension packages for additional task types: 7 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 8 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 9 | #' 10 | #' @family Prediction 11 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_sugar.R: -------------------------------------------------------------------------------- 1 | test_that("singular sugar functions", { 2 | expect_task(tsk("iris")) 3 | expect_learner(lrn("classif.featureless")) 4 | expect_resampling(rsmp("cv")) 5 | expect_measure(msr("classif.ce")) 6 | }) 7 | 8 | test_that("plural sugar functions", { 9 | expect_list(tsks("iris"), "Task", len = 1L) 10 | expect_list(lrns("classif.featureless"), "Learner", len = 1L) 11 | expect_list(rsmps("cv"), "Resampling", len = 1L) 12 | expect_list(msrs("classif.ce"), "Measure", len = 1L) 13 | }) 14 | -------------------------------------------------------------------------------- /R/fix_factor_levels.R: -------------------------------------------------------------------------------- 1 | fix_factor_levels = function(data, levels, ...) { 2 | UseMethod("fix_factor_levels") 3 | } 4 | 5 | #' @export 6 | fix_factor_levels.data.table = function(data, levels, ...) { # nolint 7 | levels = levels[intersect(names(levels), names(data))] 8 | iwalk(levels, function(lvls, id, data) { 9 | x = data[[id]] 10 | if (!identical(levels(x), lvls)) { 11 | set(data, j = id, value = factor(x, levels = lvls, ordered = is.ordered(x))) 12 | } 13 | }, data = data) 14 | data[] 15 | } 16 | -------------------------------------------------------------------------------- /tests/testthat/test_assertions.R: -------------------------------------------------------------------------------- 1 | test_that("assert_empty_ellipsis works", { 2 | expect_error(assert_empty_ellipsis(1), "Received 1 unnamed argument") 3 | expect_error(assert_empty_ellipsis(1, 2), "Received 2 unnamed argument") 4 | expect_error(assert_empty_ellipsis(a = 1), "that were unused: a") 5 | expect_error(assert_empty_ellipsis(a = 1, b = 2), "that were unused: a, b") 6 | expect_error(assert_empty_ellipsis(a = 1, b = 1, 2), "1 unnamed, as well as named arguments a, b") 7 | expect_null(assert_empty_ellipsis()) 8 | }) 9 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE$ 2 | ^README.html$ 3 | ^README\.Rmd$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | ^\.ccache$ 7 | ^\.editorconfig$ 8 | ^\.git$ 9 | ^\.github$ 10 | ^\.github$ 11 | ^\.gitignore$ 12 | ^\.ignore$ 13 | ^\.lintr$ 14 | ^\.vscode$ 15 | ^docs$ 16 | ^inst/extdata/.+\.R$ 17 | ^man-roxygen$ 18 | ^paper$ 19 | ^pkgdown$ 20 | ^revdep$ 21 | ^CITATION.cff$ 22 | ^CONTRIBUTING.md 23 | ^cran-comments\.md$ 24 | ^CRAN-SUBMISSION$ 25 | ^benchmark$ 26 | ^attic$ 27 | ^.cursor$ 28 | ^local_attic$ 29 | ^tests/testthat/_object_snapshots$ 30 | -------------------------------------------------------------------------------- /tests/testthat/test_score_roc_measures.R: -------------------------------------------------------------------------------- 1 | test_that("score_roc_measure works", { 2 | learner = lrn("classif.rpart", predict_type = "prob") 3 | splits = partition(task = tsk("pima"), ratio = 0.7) 4 | task = tsk("pima") 5 | learner$train(task) 6 | pred = learner$predict(task) 7 | res = score_roc_measures(pred) 8 | 9 | expect_list(res, len = 2) 10 | expect_named(res, c("confusion_matrix", "measures")) 11 | expect_named(res$measures, c("tpr", "fpr", "fnr", "tnr", "ppv", "fdr", "npv", "fomr", "acc", "lr_plus", "lr_minus", "dor")) 12 | }) 13 | -------------------------------------------------------------------------------- /man-roxygen/seealso_task_generator.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * [Dictionary][mlr3misc::Dictionary] of [TaskGenerators][TaskGenerator]: [mlr_task_generators] 4 | #' * `as.data.table(mlr_task_generators)` for a table of available [TaskGenerators][TaskGenerator] in the running session (depending on the loaded packages). 5 | #' * Extension packages for additional task types: 6 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 7 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 8 | #' 9 | #' @family TaskGenerator 10 | -------------------------------------------------------------------------------- /man/assert_empty_ellipsis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assertions.R 3 | \name{assert_empty_ellipsis} 4 | \alias{assert_empty_ellipsis} 5 | \title{Assert Empty Ellipsis} 6 | \usage{ 7 | assert_empty_ellipsis(...) 8 | } 9 | \arguments{ 10 | \item{...}{(any)\cr 11 | Ellipsis arguments to check.} 12 | } 13 | \value{ 14 | \code{NULL} 15 | } 16 | \description{ 17 | Assert that \code{...} arguments are empty. 18 | Use this function in S3-methods to ensure that misspelling of arguments does not go unnoticed. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man-roxygen/param_param_values.R: -------------------------------------------------------------------------------- 1 | #' @param param_values (`list()`)\cr 2 | #' If you want to try many parameter settings for learners, you can pass them through the design 3 | #' which is optimized to be faster than creating learners for each setting. 4 | #' 5 | #' A list of lists of named lists, from outer to inner: 6 | #' 1. One list element for each [Learner]. 7 | #' 2. One list element for each hyperparameter configuration to try. 8 | #' 3. Named list of hyperparameter settings to set in the Learner, possibly overwriting 9 | #' already set set hyperparameters in the [Learner]. 10 | -------------------------------------------------------------------------------- /man-roxygen/param_aggregator.R: -------------------------------------------------------------------------------- 1 | #' @param aggregator (`function()`)\cr 2 | #' Function to aggregate over multiple iterations. The role of this function depends on 3 | #' the value of field `"average"`: 4 | #' 5 | #' * `"macro"`: A numeric vector of scores (one per iteration) is passed. 6 | #' The aggregate function defaults to [mean()] in this case. 7 | #' * `"micro"`: The `aggregator` function is not used. 8 | #' Instead, predictions from multiple iterations are first combined and then 9 | #' scored in one go. 10 | #' * `"custom"`: A [ResampleResult] is passed to the aggregate function. 11 | -------------------------------------------------------------------------------- /tests/testthat/test_ContextEvaluation.R: -------------------------------------------------------------------------------- 1 | test_that("ContextResample works", { 2 | task = tsk("pima") 3 | learner = lrn("classif.rpart") 4 | resampling = rsmp("cv", folds = 3) 5 | iteration = 1 6 | 7 | ctx = ContextResample$new(task, learner, resampling, iteration) 8 | 9 | expect_task(ctx$task) 10 | expect_learner(ctx$learner) 11 | expect_resampling(ctx$resampling) 12 | expect_equal(ctx$iteration, iteration) 13 | 14 | expect_error({ctx$task = tsk("spam")}, "read-only") 15 | expect_error({ctx$resampling = rsmp("cv", folds = 5)}, "read-only") 16 | expect_error({ctx$iteration = 2}, "read-only") 17 | }) 18 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | # lintr defaults: https://lintr.r-lib.org/reference/default_linters.html 3 | # the following setup changes/removes certain linters 4 | assignment_linter = NULL, # do not force using <- for assignments 5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names 6 | cyclocomp_linter = NULL, # do not check function complexity 7 | commented_code_linter = NULL, # allow code in comments 8 | line_length_linter = line_length_linter(180L), 9 | indentation_linter(indent = 2L, hanging_indent_style = "never") 10 | ) 11 | 12 | -------------------------------------------------------------------------------- /man-roxygen/seealso_resampling.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | # 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter3/evaluation_and_benchmarking.html#sec-resampling} 5 | #' * Package \CRANpkg{mlr3spatiotempcv} for spatio-temporal resamplings. 6 | #' * [Dictionary][mlr3misc::Dictionary] of [Resamplings][Resampling]: [mlr_resamplings] 7 | #' * `as.data.table(mlr_resamplings)` for a table of available [Resamplings][Resampling] in the running session (depending on the loaded packages). 8 | #' * \CRANpkg{mlr3spatiotempcv} for additional [Resampling]s for spatio-temporal 9 | #' tasks. 10 | #' 11 | #' @family Resampling 12 | -------------------------------------------------------------------------------- /tests/testthat/test_as_resampling.R: -------------------------------------------------------------------------------- 1 | test_that("as_resampling conversion", { 2 | resampling = rsmp("subsampling") 3 | converted = as_resampling(resampling) 4 | cloned = as_resampling(resampling, clone = TRUE) 5 | 6 | expect_class(converted, "Resampling") 7 | expect_same_address(resampling, converted) 8 | expect_different_address(resampling, cloned) 9 | 10 | expect_list(as_resamplings(resampling), types = "Resampling") 11 | expect_list(as_resamplings(list(resampling)), types = "Resampling") 12 | }) 13 | 14 | test_that("error when arguments are misspelled", { 15 | expect_error(as_resampling(rsmp("holdout"), clone2 = TRUE), "Received the following") 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/helper_resampling.R: -------------------------------------------------------------------------------- 1 | expect_grouping_works = function(r) { 2 | data = insert_named(as.data.table(iris), list(grp = rep_len(letters[1:10], 150))) 3 | task = TaskClassif$new("iris-grp", as_data_backend(data), target = "Species") 4 | task$col_roles$group = "grp" 5 | 6 | r$instantiate(task) 7 | for (i in seq_len(r$iters)) { 8 | expect_integer(r$train_set(i), lower = 1L, upper = 150L, any.missing = FALSE) 9 | expect_integer(r$test_set(i), lower = 1L, upper = 150L, any.missing = FALSE) 10 | if (!inherits(r, "ResamplingInsample")) { 11 | expect_length(intersect(data[r$train_set(i), get("grp")], data[r$test_set(i), get("grp")]), 0L) 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /man-roxygen/section_logging.R: -------------------------------------------------------------------------------- 1 | #' @section Logging: 2 | #' 3 | #' The \CRANpkg{mlr3} uses the \CRANpkg{lgr} package for logging. 4 | #' \CRANpkg{lgr} supports multiple log levels which can be queried with 5 | #' `getOption("lgr.log_levels")`. 6 | #' 7 | #' To suppress output and reduce verbosity, you can lower the log from the 8 | #' default level `"info"` to `"warn"`: 9 | #' ``` 10 | #' lgr::get_logger("mlr3")$set_threshold("warn") 11 | #' ``` 12 | #' 13 | #' To get additional log output for debugging, increase the log level to `"debug"` 14 | #' or `"trace"`: 15 | #' ``` 16 | #' lgr::get_logger("mlr3")$set_threshold("debug") 17 | #' ``` 18 | #' 19 | #' To log to a file or a data base, see the documentation of [lgr::lgr-package]. 20 | -------------------------------------------------------------------------------- /tests/testthat/test_as_measure.R: -------------------------------------------------------------------------------- 1 | test_that("as_measure conversion", { 2 | measure = msr("classif.ce") 3 | converted = as_measure(measure) 4 | 5 | expect_class(converted, "Measure") 6 | expect_same_address(measure, converted) 7 | 8 | expect_list(as_measures(measure), types = "Measure") 9 | expect_list(as_measures(list(measure)), types = "Measure") 10 | 11 | default = as_measure(NULL, task_type = "classif") 12 | expect_class(default, "Measure") 13 | 14 | default = as_measures(NULL, task_type = "classif") 15 | expect_list(default, types = "Measure") 16 | }) 17 | 18 | test_that("error when arguments are misspelled", { 19 | expect_error(as_measure(msr("classif.acc"), clone2 = TRUE), "Received the following") 20 | }) 21 | -------------------------------------------------------------------------------- /man-roxygen/seealso_measure.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-eval} 5 | #' * Package \CRANpkg{mlr3measures} for the scoring functions. 6 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][Measure]: [mlr_measures] 7 | #' `as.data.table(mlr_measures)` for a table of available [Measures][Measure] in the running session (depending on the loaded packages). 8 | #' * Extension packages for additional task types: 9 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 10 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 11 | #' 12 | #' @family Measure 13 | -------------------------------------------------------------------------------- /man/as_benchmark_result.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_benchmark_result.R 3 | \name{as_benchmark_result} 4 | \alias{as_benchmark_result} 5 | \alias{as_benchmark_result.BenchmarkResult} 6 | \alias{as_benchmark_result.ResampleResult} 7 | \title{Convert to BenchmarkResult} 8 | \usage{ 9 | as_benchmark_result(x, ...) 10 | 11 | \method{as_benchmark_result}{BenchmarkResult}(x, ...) 12 | 13 | \method{as_benchmark_result}{ResampleResult}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{(any)\cr 17 | Object to convert.} 18 | 19 | \item{...}{(any)\cr 20 | Additional arguments.} 21 | } 22 | \value{ 23 | (\link{BenchmarkResult}). 24 | } 25 | \description{ 26 | Convert object to a \link{BenchmarkResult}. 27 | } 28 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{as.data.table} 7 | \alias{data.table} 8 | \alias{mlr_callbacks} 9 | \alias{clbk} 10 | \alias{clbks} 11 | \title{Objects exported from other packages} 12 | \keyword{internal} 13 | \description{ 14 | These objects are imported from other packages. Follow the links 15 | below to see their documentation. 16 | 17 | \describe{ 18 | \item{data.table}{\code{\link[data.table]{as.data.table}}, \code{\link[data.table]{data.table}}} 19 | 20 | \item{mlr3misc}{\code{\link[mlr3misc]{clbk}}, \code{\link[mlr3misc:clbk]{clbks}}, \code{\link[mlr3misc]{mlr_callbacks}}} 21 | }} 22 | 23 | -------------------------------------------------------------------------------- /man-roxygen/param_encapsulate.R: -------------------------------------------------------------------------------- 1 | #' @param encapsulate (`character(1)`)\cr 2 | #' If not `NA`, enables encapsulation by setting the field 3 | #' `Learner$encapsulate` to one of the supported values: 4 | #' `"none"` (disable encapsulation), 5 | #' `"try"` (captures errors but output is printed to the console and not logged), 6 | #' `"evaluate"` (execute via \CRANpkg{evaluate}) and 7 | #' `"callr"` (start in external session via \CRANpkg{callr}). 8 | #' If `NA`, encapsulation is not changed, i.e. the settings of the 9 | #' individual learner are active. 10 | #' Additionally, if encapsulation is set to `"evaluate"` or `"callr"`, 11 | #' the fallback learner is set to the featureless learner if the learner 12 | #' does not already have a fallback configured. 13 | -------------------------------------------------------------------------------- /man-roxygen/section_predict_sets.R: -------------------------------------------------------------------------------- 1 | #' @section Predict Sets: 2 | #' If you want to compare the performance of a learner on the training with the performance 3 | #' on the test set, you have to configure the [Learner] to predict on multiple sets by 4 | #' setting the field `predict_sets` to `c("train", "test")` (default is `"test"`). 5 | #' Each set yields a separate [Prediction] object during resampling. 6 | #' In the next step, you have to configure the measures to operate on the respective Prediction object: 7 | #' ``` 8 | #' m1 = msr("classif.ce", id = "ce.train", predict_sets = "train") 9 | #' m2 = msr("classif.ce", id = "ce.test", predict_sets = "test") 10 | #' ``` 11 | #' 12 | #' The (list of) created measures can finally be passed to `$aggregate()` or `$score()`. 13 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_loo.R: -------------------------------------------------------------------------------- 1 | test_that("loo has no duplicated ids", { 2 | r = rsmp("loo") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("stratification", { 7 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 8 | b = as_data_backend(data) 9 | task = TaskClassif$new("stratify_data", b, target = "y") 10 | task$col_roles$stratum = task$target_names 11 | 12 | r = rsmp("loo") 13 | r$instantiate(task) 14 | 15 | i = 1L 16 | expect_data_table(task$data(r$train_set(i)), nrows = 99) 17 | expect_data_table(task$data(r$test_set(i)), nrows = 1) 18 | }) 19 | 20 | test_that("grouping", { 21 | r = rsmp("loo") 22 | expect_grouping_works(r) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureClassif.R: -------------------------------------------------------------------------------- 1 | test_that("Classification measures", { 2 | keys = mlr_measures$keys("^classif\\.") 3 | task = tsk("sonar") 4 | learner = lrn("classif.rpart", predict_type = "prob") 5 | learner$train(task) 6 | p = learner$predict(task) 7 | 8 | for (key in keys) { 9 | m = mlr_measures$get(key) 10 | if (is.na(m$task_type) || m$task_type == "classif") { 11 | if (key == "classif.costs") { 12 | costs = 1 - diag(length(task$class_names)) 13 | rownames(costs) = colnames(costs) = task$class_names 14 | m$costs = costs 15 | } 16 | perf = m$score(prediction = p, task = task, learner = learner) 17 | expect_number(perf, na.ok = FALSE, lower = m$range[1], upper = m$range[2]) 18 | } 19 | } 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test_resampling_insample.R: -------------------------------------------------------------------------------- 1 | test_that("insample has no duplicated ids", { 2 | r = rsmp("insample") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("stratification", { 7 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 8 | b = as_data_backend(data) 9 | task = TaskClassif$new("stratify_data", b, target = "y") 10 | task$col_roles$stratum = task$target_names 11 | 12 | r = rsmp("insample") 13 | r$instantiate(task) 14 | 15 | i = 1L 16 | expect_set_equal(r$train_set(i), task$row_ids) 17 | expect_set_equal(r$test_set(i), task$row_ids) 18 | }) 19 | 20 | test_that("grouping", { 21 | r = rsmp("insample") 22 | expect_grouping_works(r) 23 | }) 24 | -------------------------------------------------------------------------------- /R/TaskClassif_sonar.R: -------------------------------------------------------------------------------- 1 | #' @title Sonar Classification Task 2 | #' 3 | #' @name mlr_tasks_sonar 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the [mlbench::Sonar] data set. 9 | #' Positive class is set to "M" (Mine). 10 | #' 11 | #' @templateVar id sonar 12 | #' @template task 13 | #' 14 | #' @template seealso_task 15 | NULL 16 | 17 | load_task_sonar = function(id = "sonar") { 18 | b = as_data_backend(load_dataset("Sonar", "mlbench")) 19 | task = TaskClassif$new(id, b, target = "Class", positive = "M", 20 | label = "Sonar: Mines vs. Rocks") 21 | b$hash = task$man = "mlr3::mlr_tasks_sonar" 22 | task 23 | } 24 | 25 | #' @include mlr_tasks.R 26 | mlr_tasks$add("sonar", load_task_sonar) 27 | -------------------------------------------------------------------------------- /man/assert_resample_callback.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CallbackResample.R 3 | \name{assert_resample_callback} 4 | \alias{assert_resample_callback} 5 | \alias{assert_resample_callbacks} 6 | \title{Assertions for Callbacks} 7 | \usage{ 8 | assert_resample_callback(callback, null_ok = FALSE) 9 | 10 | assert_resample_callbacks(callbacks, null_ok = FALSE) 11 | } 12 | \arguments{ 13 | \item{callback}{(\link{CallbackResample}).} 14 | 15 | \item{null_ok}{(\code{logical(1)})\cr 16 | If \code{TRUE}, \code{NULL} is allowed.} 17 | 18 | \item{callbacks}{(list of \link{CallbackResample}).} 19 | } 20 | \value{ 21 | \link{CallbackResample} | List of \link{CallbackResample}s. 22 | } 23 | \description{ 24 | Assertions for \link{CallbackResample} class. 25 | } 26 | -------------------------------------------------------------------------------- /man/print.roc_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/score_roc_measures.R 3 | \name{print.roc_measures} 4 | \alias{print.roc_measures} 5 | \title{Print ROC Measures} 6 | \usage{ 7 | \method{print}{roc_measures}(x, abbreviations = TRUE, digits = 2L, ...) 8 | } 9 | \arguments{ 10 | \item{x}{(\code{roc_measures})\cr 11 | The object returned by \code{score_roc_measures}.} 12 | 13 | \item{abbreviations}{(\code{logical(1)})\cr 14 | If \code{TRUE}, print a list of abbreviations for the measures.} 15 | 16 | \item{digits}{(\code{integer(1)})\cr 17 | Number of digits to round the measures to.} 18 | 19 | \item{...}{(\code{any})\cr 20 | Additional parameters, currently unused.} 21 | } 22 | \description{ 23 | Print the confusion matrix and a set of roc performance measures. 24 | } 25 | -------------------------------------------------------------------------------- /R/TaskClassif_pima.R: -------------------------------------------------------------------------------- 1 | #' @title Pima Indian Diabetes Classification Task 2 | #' 3 | #' @name mlr_tasks_pima 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the [mlbench::PimaIndiansDiabetes2] data set. 9 | #' Positive class is set to `"pos"`. 10 | #' 11 | #' @templateVar id pima 12 | #' @template task 13 | #' 14 | #' @template seealso_task 15 | NULL 16 | 17 | load_task_pima = function(id = "pima") { 18 | b = as_data_backend(load_dataset("PimaIndiansDiabetes2", "mlbench")) 19 | task = TaskClassif$new(id, b, target = "diabetes", positive = "pos", 20 | label = "Pima Indian Diabetes") 21 | b$hash = task$man = "mlr3::mlr_tasks_pima" 22 | task 23 | } 24 | 25 | #' @include mlr_tasks.R 26 | mlr_tasks$add("pima", load_task_pima) 27 | -------------------------------------------------------------------------------- /man/mlr3.holdout_task.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mlr_callbacks.R 3 | \name{mlr3.holdout_task} 4 | \alias{mlr3.holdout_task} 5 | \title{Callback Holdout Task} 6 | \arguments{ 7 | \item{task}{(\link{Task})\cr 8 | The holdout task.} 9 | } 10 | \description{ 11 | This \link{CallbackResample} predicts on an additional holdout task after training. 12 | } 13 | \examples{ 14 | task = tsk("pima") 15 | task_holdout = task$clone() 16 | learner = lrn("classif.rpart") 17 | resampling = rsmp("cv", folds = 3) 18 | splits = partition(task, 0.7) 19 | 20 | task$filter(splits$train) 21 | task_holdout$filter(splits$test) 22 | 23 | callback = clbk("mlr3.holdout_task", task = task_holdout) 24 | 25 | rr = resample(task, learner, resampling = resampling, callbacks = callback) 26 | 27 | rr$data_extra 28 | } 29 | -------------------------------------------------------------------------------- /R/TaskClassif_iris.R: -------------------------------------------------------------------------------- 1 | #' @title Iris Classification Task 2 | #' 3 | #' @name mlr_tasks_iris 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the popular [datasets::iris] data set. 9 | #' 10 | #' @templateVar id iris 11 | #' @template task 12 | #' 13 | #' @source 14 | #' \url{https://en.wikipedia.org/wiki/Iris_flower_data_set} 15 | #' 16 | #' `r format_bib("anderson_1936")` 17 | #' 18 | #' @template seealso_task 19 | NULL 20 | 21 | load_task_iris = function(id = "iris") { 22 | b = as_data_backend(load_dataset("iris", "datasets")) 23 | task = TaskClassif$new(id, b, target = "Species", label = "Iris Flowers") 24 | b$hash = task$man = "mlr3::mlr_tasks_iris" 25 | task 26 | } 27 | 28 | #' @include mlr_tasks.R 29 | mlr_tasks$add("iris", load_task_iris) 30 | -------------------------------------------------------------------------------- /R/as_benchmark_result.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to BenchmarkResult 2 | #' 3 | #' @description 4 | #' Convert object to a [BenchmarkResult]. 5 | #' 6 | #' @inheritParams as_task 7 | #' 8 | #' @return ([BenchmarkResult]). 9 | #' @export 10 | as_benchmark_result = function(x, ...) { 11 | UseMethod("as_benchmark_result") 12 | } 13 | 14 | 15 | #' @rdname as_benchmark_result 16 | #' @export 17 | as_benchmark_result.BenchmarkResult = function(x, ...) { # nolint 18 | x 19 | } 20 | 21 | #' @rdname as_benchmark_result 22 | #' @export 23 | as_benchmark_result.ResampleResult = function(x, ...) { # nolint 24 | private = get_private(x) 25 | rdata = private$.data$clone(deep = TRUE) 26 | if (!is.null(private$.view)) { 27 | rdata$data$fact = rdata$data$fact[list(private$.view), on = "uhash", nomatch = NULL] 28 | rdata$sweep() 29 | } 30 | BenchmarkResult$new(rdata) 31 | } 32 | -------------------------------------------------------------------------------- /man-roxygen/section_parallelization.R: -------------------------------------------------------------------------------- 1 | #' @section Parallelization: 2 | #' 3 | #' This function can be parallelized with the \CRANpkg{future} or \CRANpkg{mirai} package. 4 | #' One job is one resampling iteration. 5 | #' All jobs are send to an apply function from \CRANpkg{future.apply} or `mirai::mirai_map()` in a single batch. 6 | #' To select a parallel backend, use [future::plan()]. 7 | #' To use `mirai`, call `mirai::daemons(.compute = "mlr3_parallelization")` before calling this function. 8 | #' The `future` package guarantees reproducible results independent of the parallel backend. 9 | #' The results of `mirai` will not be the same but can be made reproducible by setting a `seed` when calling `mirai::daemons()`. 10 | #' More on parallelization can be found in the book: 11 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html} 12 | -------------------------------------------------------------------------------- /tests/testthat/test_DataBackend.R: -------------------------------------------------------------------------------- 1 | test_that("Nested backends", { 2 | data = as.data.table(iris) 3 | data$Petal.Length[91:120] = NA 4 | data$id = 1:150 5 | 6 | b1 = as_data_backend(data[1:100, -"Sepal.Length"], primary_key = "id") 7 | b2 = as_data_backend(data[101:130, -"Sepal.Length"], primary_key = "id") 8 | b3 = DataBackendRbind$new(b1, b2) 9 | expect_backend(b3) 10 | 11 | b4 = as_data_backend(data[131:150, -"Sepal.Length"], primary_key = "id") 12 | b5 = DataBackendRbind$new(b3, b4) 13 | expect_backend(b5) 14 | 15 | b6 = as_data_backend(data[, c("id", "Sepal.Length")], primary_key = "id") 16 | b7 = DataBackendCbind$new(b5, b6) 17 | expect_backend(b7) 18 | 19 | expect_iris_backend(b7, n_missing = 30L) 20 | 21 | x = b7$missings(b7$rownames, c("Petal.Width", "Petal.Length")) 22 | expect_equal(x, set_names(c(0L, 30L), c("Petal.Width", "Petal.Length"))) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test_Dictionary.R: -------------------------------------------------------------------------------- 1 | test_that("Dictionary: clone works", { 2 | t1 = tsk("iris") 3 | expect_task(t1) 4 | t2 = tsk("iris") 5 | expect_task(t2) 6 | expect_different_address(t1, t2) 7 | }) 8 | 9 | test_that("$keys(pattern) works", { 10 | expect_subset(mlr_learners$keys("classif"), mlr_learners$keys(), empty.ok = FALSE) 11 | }) 12 | 13 | test_that("dictionary to data.table conversion works with prototype arguments", { 14 | LearnerRegrRpart2 = R6Class("LearnerRegrRpart2", 15 | inherit = LearnerRegrRpart, 16 | public = list( 17 | x = NULL, 18 | initialize = function(x) { 19 | self$x = x 20 | super$initialize() 21 | } 22 | ) 23 | ) 24 | on.exit(mlr_learners$remove("regr.rpart2")) 25 | mlr_learners$add("regr.rpart2", LearnerRegrRpart2, .prototype_args = list(x = 123)) 26 | 27 | expect_data_table(as.data.table(mlr_learners)) 28 | }) 29 | -------------------------------------------------------------------------------- /man-roxygen/field_predict_sets.R: -------------------------------------------------------------------------------- 1 | #' @field predict_sets (`character()`)\cr 2 | #' During [resample()]/[benchmark()], a [Learner] can predict on multiple sets. 3 | #' Per default, a learner only predicts observations in the test set (`predict_sets == "test"`). 4 | #' To change this behavior, set `predict_sets` to a non-empty subset of `{"train", "test", "internal_valid"}`. 5 | #' The `"train"` predict set contains the train ids from the resampling. This means that if a learner does validation and 6 | #' sets `$validate` to a ratio (creating the validation data from the training data), the train predictions 7 | #' will include the predictions for the validation data. 8 | #' Each set yields a separate [Prediction] object. 9 | #' Those can be combined via getters in [ResampleResult]/[BenchmarkResult], or [Measure]s can be configured 10 | #' to operate on specific subsets of the calculated prediction sets. 11 | -------------------------------------------------------------------------------- /man/col_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Task.R 3 | \name{col_info} 4 | \alias{col_info} 5 | \alias{col_info.data.table} 6 | \alias{col_info.DataBackend} 7 | \title{Column Information for Backend} 8 | \usage{ 9 | col_info(x, ...) 10 | 11 | \method{col_info}{data.table}(x, primary_key = character(), ...) 12 | 13 | \method{col_info}{DataBackend}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{(any)\cr 17 | A backend-like object for which to retrieve column information.} 18 | 19 | \item{...}{(any)\cr 20 | Additional arguments.} 21 | 22 | \item{primary_key}{(\code{character()})\cr 23 | The primary key of the backend.} 24 | } 25 | \description{ 26 | Collects column information for backend. 27 | 28 | Currently, this includes: 29 | \itemize{ 30 | \item storage type 31 | \item levels (factor / ordered), but not for the primary key column 32 | } 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /R/TaskClassif_zoo.R: -------------------------------------------------------------------------------- 1 | #' @title Zoo Classification Task 2 | #' 3 | #' @name mlr_tasks_zoo 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the [mlbench::Zoo] data set. 9 | #' Rownames are stored as variable `"..rownames"` with column role `"name"`. 10 | #' 11 | #' @templateVar id zoo 12 | #' @template task 13 | #' 14 | #' @template seealso_task 15 | NULL 16 | 17 | load_task_zoo = function(id = "zoo") { 18 | b = as_data_backend(load_dataset("Zoo", "mlbench", keep_rownames = TRUE), keep_rownames = "animal") 19 | task = TaskClassif$new(id, b, target = "type", label = "Zoo Animals") 20 | b$hash = task$man = "mlr3::mlr_tasks_zoo" 21 | task$col_roles$name = "animal" 22 | task$col_roles$feature = setdiff(task$col_roles$feature, "animal") 23 | task 24 | } 25 | 26 | #' @include mlr_tasks.R 27 | mlr_tasks$add("zoo", load_task_zoo) 28 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "Article", 3 | key = "mlr3", 4 | title = "{mlr3}: A modern object-oriented machine learning framework in {R}", 5 | author = c( 6 | person(given = "Michel", family = "Lang"), 7 | person(given = "Martin", family = "Binder"), 8 | person(given = "Jakob", family = "Richter"), 9 | person(given = "Patrick", family = "Schratz"), 10 | person(given = "Florian", family = "Pfisterer"), 11 | person(given = "Stefan", family = "Coors"), 12 | person(given = "Quay", family = "Au"), 13 | person(given = "Giuseppe", family = "Casalicchio"), 14 | person(given = "Lars", family = "Kotthoff"), 15 | person(given = "Bernd", family = "Bischl") 16 | ), 17 | journal = "Journal of Open Source Software", 18 | year = 2019, 19 | month = "dec", 20 | doi = "10.21105/joss.01903", 21 | url = "https://joss.theoj.org/papers/10.21105/joss.01903" 22 | ) 23 | 24 | # vim: ft=r 25 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureRegrPinball.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_measures_regr.pinball", { 2 | task = tsk("california_housing") 3 | lrn = lrn("regr.featureless") 4 | 5 | expect_error(msr("regr.pinball", alpha = 2), "alpha: Element 1 is not <= 1") 6 | 7 | m = msr("regr.pinball") 8 | expect_equal(m$properties, character(0)) 9 | preds_na = lrn$train(task)$predict(task) 10 | expect_warning(preds_na$score(m), "missing predict type 'quantiles'") 11 | score_na = suppressWarnings(unname(preds_na$score(m))) 12 | expect_equal(score_na, NaN) 13 | 14 | lrn$predict_type = "quantiles" 15 | lrn$quantiles = c(0.25, 0.5, 0.75) 16 | lrn$quantile_response = 0.5 17 | 18 | preds = lrn$train(task)$predict(task) 19 | expect_number(preds$score(m)) 20 | 21 | # alpha must be in predicted quantiles 22 | expect_error(preds$score(msr("regr.pinball", alpha = 0.1)), 23 | "Assertion on 'alpha' failed: Must be element of set") 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test_partition.R: -------------------------------------------------------------------------------- 1 | test_that("partition two way split", { 2 | task = tsk("pima") 3 | li = partition(task, ratio = 0.66) 4 | expect_list(li, len = 3L) 5 | expect_names(names(li), identical.to = c("train", "test", "validation")) 6 | expect_equal(length(li$train), 507) 7 | expect_equal(length(li$test), 261) 8 | expect_equal(length(li$validation), 0) 9 | 10 | expect_disjunct(li$train, li$test) 11 | expect_disjunct(li$train, li$validation) 12 | }) 13 | 14 | test_that("partition three way split", { 15 | task = tsk("pima") 16 | li = partition(task, ratio = c(0.66, 0.14)) 17 | expect_list(li, len = 3L) 18 | expect_names(names(li), identical.to = c("train", "test", "validation")) 19 | expect_equal(length(li$train), 507) 20 | expect_equal(length(li$test), 107) 21 | expect_equal(length(li$validation), 154) 22 | 23 | expect_disjunct(li$train, li$test) 24 | expect_disjunct(li$train, li$validation) 25 | }) 26 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This R package is licensed under the 2 | [LGPL-3](https://www.gnu.org/licenses/lgpl-3.0.html.en). If you 3 | encounter problems using this software (lack of documentation, 4 | misleading or wrong documentation, unexpected behavior, bugs, …) or just 5 | want to suggest features, please open an issue in the [issue 6 | tracker](https://github.com/mlr-org/mlr3/issues). Pull requests are 7 | welcome and will be included at the discretion of the maintainers. 8 | 9 | Please consult the [wiki](https://github.com/mlr-org/mlr3/wiki/) for a 10 | [style guide](https://github.com/mlr-org/mlr3/wiki/Style-Guide), a 11 | [roxygen guide](https://github.com/mlr-org/mlr3/wiki/Roxygen-Guide) and 12 | a [pull request 13 | guide](https://github.com/mlr-org/mlr3/wiki/PR-Guidelines). 14 | 15 | Finally, please make sure any contributions to the universe adhere to our [code of conduct](https://github.com/mlr-org/mlr3/blob/main/.github/CODE_OF_CONDUCT.md). 16 | -------------------------------------------------------------------------------- /man-roxygen/seealso_task.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} 5 | #' * Package \CRANpkg{mlr3data} for more toy tasks. 6 | #' * Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. 7 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 8 | #' * [Dictionary][mlr3misc::Dictionary] of [Tasks][Task]: [mlr_tasks] 9 | #' * `as.data.table(mlr_tasks)` for a table of available [Tasks][Task] in the running session (depending on the loaded packages). 10 | #' * \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. 11 | #' * Extension packages for additional task types: 12 | #' * Unsupervised clustering: \CRANpkg{mlr3cluster} 13 | #' * Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. 14 | #' 15 | #' @family Task 16 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/Task.md: -------------------------------------------------------------------------------- 1 | # $characteristics works 2 | 3 | Code 4 | task 5 | Output 6 | 7 | -- (4601x58): HP Spam Detection ---------------------------------- 8 | * Target: type 9 | * Target classes: spam (positive class, 39%), nonspam (61%) 10 | * Properties: twoclass 11 | * Features (57): 12 | * dbl (57): address, addresses, all, business, capitalAve, capitalLong, 13 | capitalTotal, charDollar, charExclamation, charHash, charRoundbracket, 14 | charSemicolon, charSquarebracket, conference, credit, cs, data, direct, edu, 15 | email, font, free, george, hp, hpl, internet, lab, labs, mail, make, meeting, 16 | money, num000, num1999, num3d, num415, num650, num85, num857, order, 17 | original, our, over, parts, people, pm, project, re, receive, remove, report, 18 | table, technology, telnet, will, you, your 19 | * Characteristics: foo=1, bar=a 20 | 21 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureRegr.R: -------------------------------------------------------------------------------- 1 | test_that("Regression measures", { 2 | keys = mlr_measures$keys("^regr\\.") 3 | task = tsk("california_housing") 4 | learner = lrn("regr.rpart") 5 | learner$train(task) 6 | p = learner$predict(task) 7 | 8 | for (key in keys) { 9 | m = mlr_measures$get(key) 10 | 11 | if (is.na(m$task_type) || m$task_type == "regr") { 12 | if (m$predict_type == "quantiles") { 13 | learner_q = lrn("regr.featureless", predict_type = "quantiles", quantiles = 0.5) 14 | learner_q$train(task) 15 | p_q = learner_q$predict(task) 16 | perf = m$score(prediction = p_q, task = task, learner = learner_q) 17 | expect_number(perf, na.ok = FALSE, lower = m$range[1], upper = m$range[2]) 18 | } else { 19 | perf = m$score(prediction = p, task = task, learner = learner) 20 | expect_number(perf, na.ok = FALSE, lower = m$range[1], upper = m$range[2]) 21 | } 22 | } 23 | } 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_tasks.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_tasks", { 2 | expect_dictionary(mlr_tasks, min_items = 1L) 3 | keys = mlr_tasks$keys() 4 | 5 | for (key in keys) { 6 | t = tsk(key) 7 | expect_task_supervised(t) 8 | } 9 | }) 10 | 11 | test_that("load_x", { 12 | ns = getNamespace("mlr3") 13 | nn = names(ns) 14 | nn = nn[startsWith(names(ns), "load_task")] 15 | 16 | for (fun in nn) { 17 | fun = get(fun, envir = ns, mode = "function") 18 | expect_task_supervised(fun()) 19 | } 20 | }) 21 | 22 | test_that("tasks are cloned", { 23 | if (packageVersion("mlr3misc") >= "0.9.2") { 24 | task = tsk("iris") 25 | mlr_tasks$add("foo", task) 26 | expect_different_address(task, tsk("foo")) 27 | mlr_tasks$remove("foo") 28 | } 29 | }) 30 | 31 | test_that("as.data.table(..., objects = TRUE)", { 32 | tab = as.data.table(mlr_tasks, objects = TRUE) 33 | expect_data_table(tab) 34 | expect_list(tab$object, "Task", any.missing = FALSE) 35 | }) 36 | -------------------------------------------------------------------------------- /R/as_resampling.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Resampling 2 | #' 3 | #' @description 4 | #' Convert object to a [Resampling] or a list of [Resampling]. 5 | #' This method e.g. allows to convert an `OMLTask` of \CRANpkg{mlr3oml} to a [`Resampling`]. 6 | #' @inheritParams as_task 7 | #' @export 8 | as_resampling = function(x, ...) { # nolint 9 | UseMethod("as_resampling") 10 | } 11 | 12 | #' @export 13 | #' @rdname as_resampling 14 | as_resampling.Resampling = function(x, clone = FALSE, ...) { # nolint 15 | assert_empty_ellipsis(...) 16 | if (isTRUE(clone)) x$clone() else x 17 | } 18 | 19 | #' @export 20 | #' @rdname as_resampling 21 | as_resamplings = function(x, ...) { # nolint 22 | UseMethod("as_resamplings") 23 | } 24 | 25 | #' @export 26 | #' @rdname as_resampling 27 | as_resamplings.default = function(x, ...) { # nolint 28 | list(as_resampling(x, ...)) 29 | } 30 | 31 | #' @export 32 | #' @rdname as_resampling 33 | as_resamplings.list = function(x, ...) { # nolint 34 | lapply(x, as_resampling, ...) 35 | } 36 | -------------------------------------------------------------------------------- /man-roxygen/param_measure_properties.R: -------------------------------------------------------------------------------- 1 | #' @param properties (`character()`)\cr 2 | #' Properties of the measure. 3 | #' Must be a subset of [mlr_reflections$measure_properties][mlr_reflections]. 4 | #' Supported by `mlr3`: 5 | #' * `"requires_task"` (requires the complete [Task]), 6 | #' * `"requires_learner"` (requires the trained [Learner]), 7 | #' * `"requires_model"` (requires the trained [Learner], including the fitted model), 8 | #' * `"requires_train_set"` (requires the training indices from the [Resampling]), 9 | #' * `"na_score"` (the measure is expected to occasionally return `NA` or `NaN`), 10 | #' * `"weights"` (support weighted scoring using sample weights from task, column role `weights_measure`), and 11 | #' * `"primary_iters"` (the measure explictly handles resamplings that only use a subset of their iterations for the point estimate) 12 | #' * `"requires_no_prediction"` (No prediction is required; This usually means that the measure extracts some information from the learner state.). 13 | -------------------------------------------------------------------------------- /tests/testthat/test_as_learner.R: -------------------------------------------------------------------------------- 1 | test_that("as_learner conversion", { 2 | learner = lrn("classif.rpart") 3 | converted = as_learner(learner) 4 | cloned = as_learner(learner, clone = TRUE) 5 | 6 | expect_class(converted, "Learner") 7 | expect_same_address(learner, converted) 8 | expect_different_address(learner, cloned) 9 | 10 | expect_list(as_learners(learner), types = "Learner") 11 | expect_list(as_learners(list(learner)), types = "Learner") 12 | }) 13 | 14 | test_that("discard_state", { 15 | learner = lrn("classif.rpart")$train(tsk("iris")) 16 | learner2 = as_learner(learner, clone = TRUE, discard_state = TRUE) 17 | expect_null(learner2$state) 18 | expect_false(is.null(learner$state)) 19 | 20 | learner3 = lrn("classif.rpart") 21 | as_learner(learner3, clone = FALSE, discard_state = TRUE) 22 | expect_null(learner3$state) 23 | }) 24 | 25 | test_that("error when arguments are misspelled", { 26 | expect_error(as_learner(lrn("classif.rpart"), clone2 = TRUE), "Received the following") 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_subsampling.R: -------------------------------------------------------------------------------- 1 | test_that("subsampling has no duplicated ids", { 2 | r = rsmp("subsampling") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("stratification", { 7 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 8 | b = as_data_backend(data) 9 | task = TaskClassif$new("stratify_data", b, target = "y") 10 | task$col_roles$stratum = task$target_names 11 | 12 | r = rsmp("subsampling", ratio = 0.5, repeats = 3) 13 | r$instantiate(task) 14 | 15 | for (i in seq_len(r$iters)) { 16 | expect_equal(task$data(r$train_set(i))[y == "a", .N], 45) 17 | expect_equal(task$data(r$train_set(i))[y == "b", .N], 5) 18 | expect_equal(task$data(r$test_set(i))[y == "a", .N], 45) 19 | expect_equal(task$data(r$test_set(i))[y == "b", .N], 5) 20 | } 21 | }) 22 | 23 | test_that("grouping", { 24 | r = rsmp("subsampling", ratio = 0.5, repeats = 3) 25 | expect_grouping_works(r) 26 | }) 27 | -------------------------------------------------------------------------------- /R/TaskUnsupervised.R: -------------------------------------------------------------------------------- 1 | #' @title Unsupervised Task 2 | #' 3 | #' @include Task.R 4 | #' 5 | #' @description 6 | #' This is the abstract base class for unsupervised tasks such as cluster tasks in \CRANpkg{mlr3cluster} and \CRANpkg{mlr3spatial}. 7 | #' 8 | #' @template param_id 9 | #' @template param_task_type 10 | #' @template param_backend 11 | #' @template param_label 12 | #' @template param_extra_args 13 | #' 14 | #' @template seealso_task 15 | #' @keywords internal 16 | #' @export 17 | #' @examples 18 | #' TaskUnsupervised$new("penguins", task_type = "regr", backend = palmerpenguins::penguins) 19 | TaskUnsupervised = R6Class("TaskUnsupervised", 20 | inherit = Task, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function(id, task_type = "unsupervised", backend, label = NA_character_, extra_args = list()) { 25 | super$initialize(id = id, task_type = task_type, backend = backend, label = label, extra_args = extra_args) 26 | } 27 | ) 28 | ) 29 | -------------------------------------------------------------------------------- /man/as_learner.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_learner.R 3 | \name{as_learner} 4 | \alias{as_learner} 5 | \alias{as_learner.Learner} 6 | \alias{as_learners} 7 | \alias{as_learners.default} 8 | \alias{as_learners.list} 9 | \title{Convert to a Learner} 10 | \usage{ 11 | as_learner(x, ...) 12 | 13 | \method{as_learner}{Learner}(x, clone = FALSE, discard_state = FALSE, ...) 14 | 15 | as_learners(x, ...) 16 | 17 | \method{as_learners}{default}(x, ...) 18 | 19 | \method{as_learners}{list}(x, ...) 20 | } 21 | \arguments{ 22 | \item{x}{(any)\cr 23 | Object to convert.} 24 | 25 | \item{...}{(any)\cr 26 | Additional arguments.} 27 | 28 | \item{clone}{(\code{logical(1)})\cr 29 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 30 | 31 | \item{discard_state}{(\code{logical(1)}) 32 | Whether to discard the state.} 33 | } 34 | \value{ 35 | \link{Learner}. 36 | } 37 | \description{ 38 | Convert object to a \link{Learner} or a list of \link{Learner}. 39 | } 40 | -------------------------------------------------------------------------------- /tests/testthat/test_DataBackendDataTable.R: -------------------------------------------------------------------------------- 1 | test_that("DataBackendDataTable construction", { 2 | b = as_data_backend(iris) 3 | expect_backend(b) 4 | expect_iris_backend(b) 5 | 6 | i = 1:30 7 | data = iris[i, ] 8 | data$id = i 9 | b = as_data_backend(data, primary_key = "id") 10 | expect_backend(b) 11 | expect_set_equal(b$rownames, i) 12 | 13 | rownames(data) = sprintf("rn_%i", data$id) 14 | data$id = NULL 15 | b = as_data_backend(data, keep_rownames = TRUE) 16 | expect_backend(b) 17 | expect_set_equal(b$data(i, "..rownames")[[1]], sprintf("rn_%i", i)) 18 | 19 | data$Petal.Length[21:30] = NA 20 | b = as_data_backend(data) 21 | x = b$missings(b$rownames, c("Petal.Width", "Petal.Length")) 22 | expect_equal(x, set_names(c(0L, 10L), c("Petal.Width", "Petal.Length"))) 23 | 24 | b = as_data_backend(iris, primary_key = 151:300) 25 | expect_equal(b$rownames, 151:300) 26 | }) 27 | 28 | test_that("DataBackendDataTable with 0 rows", { 29 | b = as_data_backend(iris[integer(), ]) 30 | expect_backend(b) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_learners", { 2 | expect_dictionary(mlr_learners, min_items = 1L) 3 | keys = mlr_learners$keys() 4 | 5 | for (key in keys) { 6 | l = lrn(key) 7 | if (key == "classif.debug") { 8 | expect_learner(l, task = tsk("iris")) 9 | } else { 10 | expect_learner(l) 11 | } 12 | if (inherits(l, "TaskClassif")) { 13 | expect_true(startsWith(l$id, "classif.")) 14 | } 15 | if (inherits(l, "TaskRegr")) { 16 | expect_true(startsWith(l$id, "regr.")) 17 | } 18 | } 19 | }) 20 | 21 | test_that("mlr_learners: sugar", { 22 | lrn = lrn("classif.rpart", id = "foo", cp = 0.001, predict_type = "prob") 23 | expect_equal(lrn$id, "foo") 24 | expect_equal(lrn$param_set$values$cp, 0.001) 25 | expect_equal(lrn$predict_type, "prob") 26 | }) 27 | 28 | test_that("as.data.table(..., objects = TRUE)", { 29 | tab = as.data.table(mlr_learners, objects = TRUE) 30 | expect_data_table(tab) 31 | expect_list(tab$object, "Learner", any.missing = FALSE) 32 | }) 33 | -------------------------------------------------------------------------------- /man/default_fallback.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/default_fallback.R 3 | \name{default_fallback} 4 | \alias{default_fallback} 5 | \alias{default_fallback.Learner} 6 | \alias{default_fallback.LearnerClassif} 7 | \alias{default_fallback.LearnerRegr} 8 | \title{Create a Fallback Learner} 9 | \usage{ 10 | default_fallback(learner, ...) 11 | 12 | \method{default_fallback}{Learner}(learner, ...) 13 | 14 | \method{default_fallback}{LearnerClassif}(learner, ...) 15 | 16 | \method{default_fallback}{LearnerRegr}(learner, ...) 17 | } 18 | \arguments{ 19 | \item{learner}{\link{Learner}\cr 20 | The learner for which a fallback learner should be created.} 21 | 22 | \item{...}{\code{any}\cr 23 | ignored.} 24 | } 25 | \value{ 26 | \link{Learner} 27 | } 28 | \description{ 29 | Create a fallback learner for a given learner. 30 | The function searches for a suitable fallback learner based on the task type. 31 | Additional checks are performed to ensure that the fallback learner supports the predict type. 32 | } 33 | -------------------------------------------------------------------------------- /man/as_resampling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_resampling.R 3 | \name{as_resampling} 4 | \alias{as_resampling} 5 | \alias{as_resampling.Resampling} 6 | \alias{as_resamplings} 7 | \alias{as_resamplings.default} 8 | \alias{as_resamplings.list} 9 | \title{Convert to a Resampling} 10 | \usage{ 11 | as_resampling(x, ...) 12 | 13 | \method{as_resampling}{Resampling}(x, clone = FALSE, ...) 14 | 15 | as_resamplings(x, ...) 16 | 17 | \method{as_resamplings}{default}(x, ...) 18 | 19 | \method{as_resamplings}{list}(x, ...) 20 | } 21 | \arguments{ 22 | \item{x}{(any)\cr 23 | Object to convert.} 24 | 25 | \item{...}{(any)\cr 26 | Additional arguments.} 27 | 28 | \item{clone}{(\code{logical(1)})\cr 29 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 30 | } 31 | \description{ 32 | Convert object to a \link{Resampling} or a list of \link{Resampling}. 33 | This method e.g. allows to convert an \code{OMLTask} of \CRANpkg{mlr3oml} to a \code{\link{Resampling}}. 34 | } 35 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_measures_similarity.R: -------------------------------------------------------------------------------- 1 | task = tsk("penguins") 2 | learner = lrn("classif.rpart") 3 | rr = resample(task, learner, rsmp("cv", folds = 3), store_models = TRUE) 4 | 5 | test_that("similarity measures", { 6 | measures = mlr_measures$keys("^sim\\.") 7 | 8 | for (m in msrs(measures)) { 9 | expect_number(m$aggregate(rr)) 10 | expect_true(is.na(m$score(rr$prediction(), learner = rr$learners[[1]]))) 11 | expect_numeric(rr$aggregate(msrs(c("classif.acc", m$id))), len = 2, any.missing = FALSE) 12 | expect_true(allMissing(rr$score(m)[[m$id]])) 13 | } 14 | }) 15 | 16 | test_that("similarity example", { 17 | task = tsk("penguins") 18 | learners = list( 19 | lrn("classif.rpart", maxdepth = 1, id = "r1"), 20 | lrn("classif.rpart", maxdepth = 2, id = "r2") 21 | ) 22 | resampling = rsmp("cv", folds = 3) 23 | grid = benchmark_grid(task, learners, resampling) 24 | bmr = benchmark(grid, store_models = TRUE) 25 | x = bmr$aggregate(msr("sim.jaccard")) 26 | expect_numeric(x$sim.jaccard, any.missing = FALSE) 27 | }) 28 | -------------------------------------------------------------------------------- /R/default_measures.R: -------------------------------------------------------------------------------- 1 | #' @title Get the Default Measure 2 | #' 3 | #' @description 4 | #' Gets the default measures using the information in [mlr_reflections$default_measures][mlr_reflections]: 5 | #' * [`"classif.ce"`][mlr_measures_classif.ce] for classification (`"classif"`). 6 | #' * [`"regr.mse"`][mlr_measures_regr.mse] for regression (`"regr"`). 7 | #' * Add-on package may register additional default measures for their own task types. 8 | #' 9 | #' @param task_type (`character(1)`)\cr 10 | #' Get the default measure for the task type `task_type`, e.g., `"classif"` or `"regr"`. 11 | #' If `task_type` is `NULL`, an empty list is returned. 12 | #' 13 | #' @return list of [Measure]. 14 | #' 15 | #' @export 16 | #' @examples 17 | #' default_measures("classif") 18 | #' default_measures("regr") 19 | default_measures = function(task_type) { 20 | if (is.null(task_type)) { 21 | return(list()) 22 | } 23 | assert_choice(task_type, names(mlr_reflections$default_measures)) 24 | keys = mlr_reflections$default_measures[[task_type]] 25 | mlr_measures$mget(keys) 26 | } 27 | -------------------------------------------------------------------------------- /man-roxygen/param_average.R: -------------------------------------------------------------------------------- 1 | #' @param average (`character(1)`)\cr 2 | #' How to average multiple [Prediction]s from a [ResampleResult]. 3 | #' 4 | #' The default, `"macro"`, calculates the individual performances scores for each [Prediction] and then uses the 5 | #' function defined in `$aggregator` to average them to a single number. 6 | #' 7 | #' `"macro_weighted"` is similar to `"macro"`, but uses weighted averages. 8 | #' Weights are taken from the `weights_measure` column of the resampled [Task] if present. 9 | #' Note that `"macro_weighted"` can differ from `"macro"` even if no weights are present or if `$use_weights` is set to `"ignore"`, 10 | #' since then aggregation is done using *uniform sample weights*, which result in non-uniform weights for [Prediction]s if they contain different 11 | #' numbers of samples. 12 | #' 13 | #' If set to `"micro"`, the individual [Prediction] objects are first combined into a single new [Prediction] object which is then used to assess the performance. 14 | #' The function in `$aggregator` is not used in this case. 15 | -------------------------------------------------------------------------------- /man/auto_convert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auto_convert.R 3 | \name{auto_convert} 4 | \alias{auto_convert} 5 | \title{Column Auto-Converter} 6 | \usage{ 7 | auto_convert(value, id, type, levels) 8 | } 9 | \arguments{ 10 | \item{value}{(any)\cr 11 | New values to convert in order to match \code{type}.} 12 | 13 | \item{id}{(\code{character(1)})\cr 14 | Name of the column, used in error messages.} 15 | 16 | \item{type}{(\code{character(1)})\cr 17 | Type to convert \code{values} to.} 18 | 19 | \item{levels}{(\code{character()} | \code{NULL})\cr 20 | Levels to use for conversion to \code{factor} or \code{ordered}.} 21 | } 22 | \value{ 23 | Vector \code{value} converted to type \code{type}. 24 | } 25 | \description{ 26 | Set of rules to automatically convert column types. 27 | This is used during \code{rbind}-ing of \link{Task}s, but also in some pipe operators in 28 | \CRANpkg{mlr3pipelines}. 29 | 30 | All rules are stored as functions in \link[=mlr_reflections]{mlr_reflections$auto_converters}. 31 | } 32 | \keyword{internal} 33 | -------------------------------------------------------------------------------- /man/default_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/default_measures.R 3 | \name{default_measures} 4 | \alias{default_measures} 5 | \title{Get the Default Measure} 6 | \usage{ 7 | default_measures(task_type) 8 | } 9 | \arguments{ 10 | \item{task_type}{(\code{character(1)})\cr 11 | Get the default measure for the task type \code{task_type}, e.g., \code{"classif"} or \code{"regr"}. 12 | If \code{task_type} is \code{NULL}, an empty list is returned.} 13 | } 14 | \value{ 15 | list of \link{Measure}. 16 | } 17 | \description{ 18 | Gets the default measures using the information in \link[=mlr_reflections]{mlr_reflections$default_measures}: 19 | \itemize{ 20 | \item \code{\link[=mlr_measures_classif.ce]{"classif.ce"}} for classification (\code{"classif"}). 21 | \item \code{\link[=mlr_measures_regr.mse]{"regr.mse"}} for regression (\code{"regr"}). 22 | \item Add-on package may register additional default measures for their own task types. 23 | } 24 | } 25 | \examples{ 26 | default_measures("classif") 27 | default_measures("regr") 28 | } 29 | -------------------------------------------------------------------------------- /man/deprecated_binding.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/warn_deprecated.R 3 | \name{deprecated_binding} 4 | \alias{deprecated_binding} 5 | \title{Create an Active Binding that Generates a Deprecation Warning} 6 | \usage{ 7 | deprecated_binding(what, value) 8 | } 9 | \arguments{ 10 | \item{what}{(character(1))\cr 11 | A description of the deprecated binding. Should be of the form \code{"Class$field"}.} 12 | 13 | \item{value}{(any)\cr 14 | The value of the active binding. This should be an expression that will 15 | be evaluated in the context of the active binding. It could, for example, 16 | refer to \code{self}.} 17 | } 18 | \description{ 19 | Creates an active binding that generates a warning when accessed, using 20 | \code{warn_deprecated()}. The active binding will otherwise be read-only. 21 | } 22 | \examples{ 23 | MyClass = R6::R6Class("MyClass", public = list(), 24 | active = list( 25 | foo = deprecated_binding("MyClass$foo", "bar") 26 | ) 27 | ) 28 | mco = MyClass$new() 29 | mco$foo 30 | } 31 | \keyword{internal} 32 | -------------------------------------------------------------------------------- /tests/testthat/test_install_pkgs.R: -------------------------------------------------------------------------------- 1 | test_that("extract_pkgs works", { 2 | skip_if_not_installed("remotes") 3 | 4 | # this is a noop, mlr3 is stripped from required packages 5 | expect_character(install_pkgs("mlr3"), len = 0) 6 | 7 | expect_set_equal( 8 | extract_pkgs("rpart"), 9 | "rpart" 10 | ) 11 | 12 | expect_set_equal( 13 | extract_pkgs(lrn("classif.rpart")), 14 | c("mlr3", "rpart") 15 | ) 16 | 17 | expect_set_equal( 18 | extract_pkgs(msr("classif.auc")), 19 | c("mlr3", "mlr3measures") 20 | ) 21 | 22 | expect_set_equal( 23 | extract_pkgs(tgen("xor")), 24 | c("mlr3", "mlbench") 25 | ) 26 | 27 | rr = resample(tsk("mtcars"), lrn("regr.featureless"), rsmp("holdout")) 28 | expect_set_equal( 29 | extract_pkgs(rr), 30 | c("mlr3", "stats") 31 | ) 32 | 33 | bmr = as_benchmark_result(rr) 34 | expect_set_equal( 35 | extract_pkgs(rr), 36 | c("mlr3", "stats") 37 | ) 38 | 39 | expect_set_equal( 40 | extract_pkgs(list(lrns(c("regr.rpart", "regr.featureless")))), 41 | c("mlr3", "rpart", "stats") 42 | ) 43 | }) 44 | -------------------------------------------------------------------------------- /man/task_check_col_roles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Task.R 3 | \name{task_check_col_roles} 4 | \alias{task_check_col_roles} 5 | \alias{task_check_col_roles.Task} 6 | \alias{task_check_col_roles.TaskClassif} 7 | \alias{task_check_col_roles.TaskRegr} 8 | \alias{task_check_col_roles.TaskSupervised} 9 | \alias{task_check_col_roles.TaskUnsupervised} 10 | \title{Check Column Roles} 11 | \usage{ 12 | task_check_col_roles(task, new_roles, ...) 13 | 14 | \method{task_check_col_roles}{Task}(task, new_roles, ...) 15 | 16 | \method{task_check_col_roles}{TaskClassif}(task, new_roles, ...) 17 | 18 | \method{task_check_col_roles}{TaskRegr}(task, new_roles, ...) 19 | 20 | \method{task_check_col_roles}{TaskSupervised}(task, new_roles, ...) 21 | 22 | \method{task_check_col_roles}{TaskUnsupervised}(task, new_roles, ...) 23 | } 24 | \arguments{ 25 | \item{task}{(\link{Task})\cr 26 | Task.} 27 | 28 | \item{new_roles}{(\code{list()})\cr 29 | Column roles.} 30 | } 31 | \description{ 32 | Internal function to check column roles. 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /tests/testthat/test_as_task.R: -------------------------------------------------------------------------------- 1 | test_that("as_task conversion", { 2 | task = tsk("zoo") 3 | converted = as_task(task) 4 | cloned = as_task(task, clone = TRUE) 5 | 6 | expect_class(converted, "Task") 7 | expect_same_address(task, converted) 8 | expect_different_address(task, cloned) 9 | 10 | expect_list(as_tasks(task), types = "Task") 11 | expect_list(as_tasks(list(task)), types = "Task") 12 | }) 13 | 14 | test_that("as_task_xx error messages (#944)", { 15 | expect_error( 16 | as_task_classif(data.frame(x = factor(c("a", "b", "a", "b"), levels = c("a", "b")), y = factor(c("a", "b", "a", "b"), levels = c("a", "b")))), 17 | "argument \"target\" is missing, with no default" 18 | ) 19 | 20 | expect_error( 21 | as_task_regr(data.frame(x = factor(c("a", "b", "a", "b"), levels = c("a", "b")), y = factor(c("a", "b", "a", "b"), levels = c("a", "b")))), 22 | "argument \"target\" is missing, with no default" 23 | ) 24 | }) 25 | 26 | test_that("error when arguments are misspelled", { 27 | expect_error(as_task(tsk("iris"), clone2 = TRUE), "Received the following") 28 | }) 29 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resamplings.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_resamplings", { 2 | expect_dictionary(mlr_resamplings, min_items = 1L) 3 | keys = setdiff(mlr_resamplings$keys(), "ordered_holdout") 4 | 5 | task = tsk("iris") 6 | for (key in keys) { 7 | r = rsmp(key) 8 | expect_resampling(r) # construction works 9 | expect_false(r$is_instantiated) 10 | if (key == "custom") { 11 | ret = r$instantiate(task, list(1:3), list(5:9)) 12 | } else if (key == "custom_cv") { 13 | ret = r$instantiate(task, iris$Species) 14 | } else { 15 | ret = r$instantiate(task) 16 | } 17 | expect_r6(ret, "Resampling") 18 | expect_true(r$is_instantiated) 19 | expect_resampling(r) 20 | } 21 | }) 22 | 23 | test_that("mlr_resamplings: sugar", { 24 | r = rsmp("cv", id = "cv3", folds = 3L) 25 | expect_equal(r$id, "cv3") 26 | expect_equal(r$param_set$values$folds, 3L) 27 | }) 28 | 29 | test_that("as.data.table(..., objects = TRUE)", { 30 | tab = as.data.table(mlr_resamplings, objects = TRUE) 31 | expect_data_table(tab) 32 | expect_list(tab$object, "Resampling", any.missing = FALSE) 33 | }) 34 | -------------------------------------------------------------------------------- /R/TaskRegr_mtcars.R: -------------------------------------------------------------------------------- 1 | #' @title Motor Trend Regression Task 2 | #' 3 | #' @name mlr_tasks_mtcars 4 | #' @format [R6::R6Class] inheriting from [TaskRegr]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A regression task for the [datasets::mtcars] data set. 9 | #' Target variable is `mpg` (Miles/(US) gallon). 10 | #' Rownames are stored as variable `"..rownames` with column role `"model"`. 11 | #' 12 | #' @section Construction: 13 | #' ``` 14 | #' mlr_tasks$get("mtcars") 15 | #' tsk("mtcars") 16 | #' ``` 17 | #' 18 | #' @section Meta Information: 19 | #' `r rd_info(tsk("mtcars"))` 20 | #' 21 | #' @template seealso_task 22 | NULL 23 | 24 | load_task_mtcars = function(id = "mtcars") { 25 | b = as_data_backend(load_dataset("mtcars", "datasets", keep_rownames = TRUE), keep_rownames = "model") 26 | task = TaskRegr$new(id, b, target = "mpg", label = "Motor Trends") 27 | b$hash = task$man = "mlr3::mlr_tasks_mtcars" 28 | task$col_roles$name = "model" 29 | task$col_roles$feature = setdiff(task$col_roles$feature, "model") 30 | task 31 | } 32 | 33 | #' @include mlr_tasks.R 34 | mlr_tasks$add("mtcars", load_task_mtcars) 35 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_custom.R: -------------------------------------------------------------------------------- 1 | test_that("custom has duplicated ids", { 2 | r = rsmp("custom") 3 | expect_identical(r$duplicated_ids, TRUE) 4 | }) 5 | 6 | test_that("custom_cv accepts external factor", { 7 | task = tsk("penguins") 8 | task$filter(1:10) 9 | 10 | ccv = rsmp("custom_cv") 11 | split_f = factor(c(rep(letters[1:3], each = 3), NA)) 12 | ccv$instantiate(task, f = split_f) 13 | 14 | expect_class(ccv$instance, "list") 15 | expect_length(ccv$instance, 3) 16 | expect_length(ccv$train_set(3), 6) 17 | 18 | expect_identical(ccv$duplicated_ids, FALSE) 19 | }) 20 | 21 | test_that("custom_cv accepts task feature", { 22 | task = tsk("german_credit") 23 | ccv = rsmp("custom_cv") 24 | expect_identical(ccv$duplicated_ids, FALSE) 25 | 26 | ccv$instantiate(task, f = task$data(cols = "job")[[1L]]) 27 | expect_class(ccv$instance, "list") 28 | expect_length(ccv$instance, 4) 29 | expect_length(ccv$train_set(3), 370) 30 | 31 | ccv$instantiate(task, col = "job") 32 | expect_class(ccv$instance, "list") 33 | expect_length(ccv$instance, 4) 34 | expect_length(ccv$train_set(3), 370) 35 | }) 36 | -------------------------------------------------------------------------------- /man-roxygen/measure_regr.R: -------------------------------------------------------------------------------- 1 | #' @name mlr_measures_regr.<%= id %> 2 | #' @include mlr_measures.R 3 | #' 4 | #' @inherit mlr3measures::<%= id %> title description details 5 | #' 6 | #' @section Dictionary: 7 | #' This [Measure] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_measures] or with the associated sugar function [msr()]: 8 | #' ``` 9 | #' mlr_measures$get("regr.<%= id %>") 10 | #' msr("regr.<%= id %>") 11 | #' ``` 12 | #' 13 | #' @inheritSection mlr3measures::<%= id %> Meta Information 14 | #' 15 | #' @section Parameters: 16 | #' `r mlr3misc::rd_info(mlr3::msr("regr.<%= id %>")$param_set)` 17 | #' @md 18 | #' 19 | #' @note 20 | #' The score function calls [mlr3measures::<%= id %>()] from package \CRANpkg{mlr3measures}. 21 | #' 22 | #' If the measure is undefined for the input, `NaN` is returned. 23 | #' This can be customized by setting the field `na_value`. 24 | #' 25 | #' @family regression measures 26 | #' 27 | #' @seealso 28 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][Measure]: [mlr_measures] 29 | #' 30 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [Measure] implementations. 31 | -------------------------------------------------------------------------------- /man-roxygen/seealso_learner.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} 5 | #' * Package \CRANpkg{mlr3learners} for a solid collection of essential learners. 6 | #' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners. 7 | #' * [Dictionary][mlr3misc::Dictionary] of [Learners][Learner]: [mlr_learners] 8 | #' * `as.data.table(mlr_learners)` for a table of available [Learners][Learner] in the running session (depending on the loaded packages). 9 | #' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. 10 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 11 | #' * Extension packages for additional task types: 12 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 13 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 14 | #' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} 15 | #' for established default tuning spaces. 16 | #' 17 | #' @family Learner 18 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_task_generators.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_task_generators", { 2 | expect_dictionary(mlr_task_generators, min_items = 1L, contains = "TaskGenerator") 3 | keys = mlr_task_generators$keys() 4 | n = 30L 5 | 6 | for (key in keys) { 7 | gen = mlr_task_generators$get(key) 8 | expect_task_generator(gen) 9 | 10 | task = gen$generate(n) 11 | expect_task(task) 12 | expect_equal(gen$task_type, task$task_type) 13 | if (!inherits(gen, "TaskGeneratorSimplex")) { 14 | expect_equal(task$nrow, n) 15 | } 16 | } 17 | }) 18 | 19 | test_that("as.data.table(mlr_task_generators)", { 20 | tab = as.data.table(mlr_task_generators) 21 | expect_data_table(tab, min.cols = 2L) 22 | expect_character(tab$key, unique = TRUE, any.missing = FALSE) 23 | }) 24 | 25 | test_that("sugar functions", { 26 | expect_task_generator(tgen("xor")) 27 | expect_list(tgens(c("xor", "smiley")), "TaskGenerator") 28 | }) 29 | 30 | test_that("as.data.table(..., objects = TRUE)", { 31 | tab = as.data.table(mlr_task_generators, objects = TRUE) 32 | expect_data_table(tab) 33 | expect_list(tab$object, "TaskGenerator", any.missing = FALSE) 34 | }) 35 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_cv.R: -------------------------------------------------------------------------------- 1 | test_that("cv has no duplicated ids", { 2 | r = rsmp("cv") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("split into evenly sized groups", { 7 | task = tsk("iris") 8 | r = rsmp("cv", folds = 3) 9 | r$instantiate(task) 10 | expect_equal(uniqueN(lengths(lapply(1:3, r$train_set))), 1L) 11 | }) 12 | 13 | test_that("stratification", { 14 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 15 | b = as_data_backend(data) 16 | task = TaskClassif$new("stratify_data", b, target = "y") 17 | task$col_roles$stratum = task$target_names 18 | 19 | r = rsmp("cv", folds = 5L) 20 | r$instantiate(task) 21 | 22 | for (i in seq_len(r$iters)) { 23 | expect_equal(task$data(r$train_set(i))[y == "a", .N], 72) 24 | expect_equal(task$data(r$train_set(i))[y == "b", .N], 8) 25 | expect_equal(task$data(r$test_set(i))[y == "a", .N], 18) 26 | expect_equal(task$data(r$test_set(i))[y == "b", .N], 2) 27 | } 28 | }) 29 | 30 | test_that("grouping", { 31 | r = rsmp("cv", folds = 3) 32 | expect_grouping_works(r) 33 | }) 34 | -------------------------------------------------------------------------------- /man/warn_deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/warn_deprecated.R 3 | \name{warn_deprecated} 4 | \alias{warn_deprecated} 5 | \title{Give a Warning about a Deprecated Function, Argument, or Active Binding} 6 | \usage{ 7 | warn_deprecated(what) 8 | } 9 | \arguments{ 10 | \item{what}{(character(1))\cr 11 | A description of the deprecated entity. This should be somewhat descriptive, 12 | e.g. \code{"Class$method()"} or \code{"Argument 'foo' of Class$method()"}.\cr 13 | The \code{what} is used to determine if the warning has already been given, so 14 | it should be unique for each deprecated entity.} 15 | } 16 | \description{ 17 | Generates a warning when a deprecated function, argument, or active binding 18 | is used or accessed. A warning will only be given once per session, and all 19 | deprecation warnings can be suppressed by setting the option 20 | \code{mlr3.warn_deprecated = FALSE}. 21 | 22 | The warning is of the format 23 | "what is deprecated and will be removed in the future." 24 | 25 | Use the 'deprecated_binding()' helper function to create an active binding 26 | that generates a warning when accessed. 27 | } 28 | \keyword{internal} 29 | -------------------------------------------------------------------------------- /man-roxygen/measure_binary.R: -------------------------------------------------------------------------------- 1 | #' @name mlr_measures_classif.<%= id %> 2 | #' @include mlr_measures.R 3 | #' 4 | #' @inherit mlr3measures::<%= id %> title description details 5 | #' 6 | #' @section Dictionary: 7 | #' This [Measure] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_measures] or with the associated sugar function [msr()]: 8 | #' ``` 9 | #' mlr_measures$get("classif.<%= id %>") 10 | #' msr("classif.<%= id %>") 11 | #' ``` 12 | #' 13 | #' @inheritSection mlr3measures::<%= id %> Meta Information 14 | #' 15 | #' @section Parameters: 16 | #' `r mlr3misc::rd_info(mlr3::msr("classif.<%= id %>")$param_set)` 17 | #' @md 18 | #' 19 | #' @note 20 | #' The score function calls [mlr3measures::<%= id %>()] from package \CRANpkg{mlr3measures}. 21 | #' 22 | #' If the measure is undefined for the input, `NaN` is returned. 23 | #' This can be customized by setting the field `na_value`. 24 | #' 25 | #' @family classification measures 26 | #' @family binary classification measures 27 | #' 28 | #' @seealso 29 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][Measure]: [mlr_measures] 30 | #' 31 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [Measure] implementations. 32 | -------------------------------------------------------------------------------- /man-roxygen/measure_classif.R: -------------------------------------------------------------------------------- 1 | #' @name mlr_measures_classif.<%= id %> 2 | #' @include mlr_measures.R 3 | #' 4 | #' @inherit mlr3measures::<%= id %> title description details 5 | #' 6 | #' @section Dictionary: 7 | #' This [Measure] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_measures] or with the associated sugar function [msr()]: 8 | #' ``` 9 | #' mlr_measures$get("classif.<%= id %>") 10 | #' msr("classif.<%= id %>") 11 | #' ``` 12 | #' 13 | #' @inheritSection mlr3measures::<%= id %> Meta Information 14 | #' 15 | #' @section Parameters: 16 | #' `r mlr3misc::rd_info(mlr3::msr("classif.<%= id %>")$param_set)` 17 | #' @md 18 | #' 19 | #' @note 20 | #' The score function calls [mlr3measures::<%= id %>()] from package \CRANpkg{mlr3measures}. 21 | #' 22 | #' If the measure is undefined for the input, `NaN` is returned. 23 | #' This can be customized by setting the field `na_value`. 24 | #' 25 | #' @family classification measures 26 | #' @family multiclass classification measures 27 | #' 28 | #' @seealso 29 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][Measure]: [mlr_measures] 30 | #' 31 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [Measure] implementations. 32 | -------------------------------------------------------------------------------- /R/as_learner.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Learner 2 | #' 3 | #' @description 4 | #' Convert object to a [Learner] or a list of [Learner]. 5 | #' 6 | #' @inheritParams as_task 7 | #' 8 | #' @return [Learner]. 9 | #' @export 10 | as_learner = function(x, ...) { # nolint 11 | UseMethod("as_learner") 12 | } 13 | 14 | #' @export 15 | #' @param discard_state (`logical(1)`) 16 | #' Whether to discard the state. 17 | #' @rdname as_learner 18 | as_learner.Learner = function(x, clone = FALSE, discard_state = FALSE, ...) { # nolint 19 | assert_empty_ellipsis(...) 20 | if (isTRUE(clone) && isTRUE(discard_state)) { 21 | clone_without(x, "state") 22 | } else if (isTRUE(clone)) { 23 | x$clone(deep = TRUE) 24 | } else if (isTRUE(discard_state)) { 25 | x$state = NULL 26 | x 27 | } else { 28 | x 29 | } 30 | } 31 | 32 | #' @export 33 | #' @rdname as_learner 34 | as_learners = function(x, ...) { # nolint 35 | UseMethod("as_learners") 36 | } 37 | 38 | #' @export 39 | #' @rdname as_learner 40 | as_learners.default = function(x, ...) { # nolint 41 | list(as_learner(x, ...)) 42 | } 43 | 44 | #' @export 45 | #' @rdname as_learner 46 | as_learners.list = function(x, ...) { # nolint 47 | lapply(x, as_learner, ...) 48 | } 49 | -------------------------------------------------------------------------------- /tests/testthat/test_hashes.R: -------------------------------------------------------------------------------- 1 | expect_hash_changes = function(x) { 2 | id_before = x$id 3 | hash_before = x$hash 4 | 5 | x$id = "foo" 6 | expect_false(identical(x$id, id_before)) 7 | if (!is.na(hash_before)) { 8 | expect_false(identical(x$hash, hash_before)) 9 | } 10 | x$id = id_before 11 | expect_identical(x$id, id_before) 12 | expect_identical(x$hash, hash_before) 13 | if ("use_weights" %in% names(x)) { 14 | use_weights_before = x$use_weights 15 | x$use_weights = "ignore" 16 | expect_false(identical(x$use_weights, use_weights_before)) 17 | if (!is.na(hash_before)) { 18 | expect_false(identical(x$hash, hash_before)) 19 | } 20 | x$use_weights = use_weights_before 21 | expect_identical(x$use_weights, use_weights_before) 22 | expect_identical(x$hash, hash_before) 23 | } 24 | } 25 | 26 | test_that("task$hash", { 27 | x = tsk("iris") 28 | expect_hash_changes(x) 29 | }) 30 | 31 | test_that("learner$hash", { 32 | x = lrn("classif.rpart") 33 | expect_hash_changes(x) 34 | }) 35 | 36 | test_that("measure$hash", { 37 | x = msr("classif.ce") 38 | expect_hash_changes(x) 39 | }) 40 | 41 | test_that("resampling$hash", { 42 | x = rsmp("cv") 43 | expect_hash_changes(x) 44 | }) 45 | -------------------------------------------------------------------------------- /R/set_validate.R: -------------------------------------------------------------------------------- 1 | #' @title Enable Validation for Learner 2 | #' 3 | #' @description 4 | #' Helper function to configure the `$validate` field(s) of a [`Learner`]. 5 | #' 6 | #' This is especially useful for learners such as `AutoTuner` of \CRANpkg{mlr3tuning} or `GraphLearner` of \CRANpkg{mlr3pipelines} which have multiple levels of `$validate` fields., 7 | #' where the `$validate` fields need to be configured on multiple levels. 8 | #' 9 | #' @param learner (any)\cr 10 | #' The learner. 11 | #' @param validate (`numeric(1)`, `"predefined"`, `"test"`, or `NULL`)\cr 12 | #' Which validation set to use. 13 | #' @param ... (any)\cr 14 | #' Additional arguments. 15 | #' 16 | #' @export 17 | #' @return Modified [`Learner`] 18 | #' @rdname mlr_sugar 19 | #' @examples 20 | #' learner = lrn("classif.debug") 21 | #' set_validate(learner, 0.2) 22 | #' learner$validate 23 | set_validate = function(learner, validate, ...) { 24 | UseMethod("set_validate") 25 | } 26 | 27 | #' @export 28 | set_validate.Learner = function(learner, validate, ...) { 29 | if (!"validation" %chin% learner$properties) { 30 | error_input("Learner '%s' does not support validation.", learner$id) 31 | } 32 | learner$validate = validate 33 | invisible(learner) 34 | } 35 | -------------------------------------------------------------------------------- /R/TaskRegr_california_housing.R: -------------------------------------------------------------------------------- 1 | #' @title Median House Value in California 2 | #' 3 | #' @name california_housing 4 | #' @format [R6::R6Class] inheriting from [TaskRegr]. 5 | #' @aliases mlr_tasks_california_housing 6 | #' 7 | #' @description 8 | #' A regression task to predict the median house value in California. 9 | #' 10 | #' Contains 9 features and 20640 observations. 11 | #' Target column is `"median_house_value"`. 12 | #' 13 | #' @section Construction: 14 | #' ``` 15 | #' mlr_tasks$get("california_housing") 16 | #' tsk("california_housing") 17 | #' ``` 18 | #' 19 | #' @section Meta Information: 20 | #' `r rd_info(tsk("california_housing"))` 21 | #' 22 | #' @source https://www.kaggle.com/datasets/camnugent/california-housing-prices 23 | #' 24 | #' @template seealso_task 25 | NULL 26 | 27 | load_task_california_housing = function(id = "california_housing") { 28 | b = as_data_backend(readRDS(system.file("extdata", "california_housing.rds", package = "mlr3"))) 29 | task = mlr3::TaskRegr$new(id, b, target = "median_house_value", label = "California House Value") 30 | b$hash = task$man = "mlr3::mlr_tasks_california_housing" 31 | task 32 | } 33 | 34 | #' @include mlr_tasks.R 35 | mlr_tasks$add("california_housing", load_task_california_housing) 36 | -------------------------------------------------------------------------------- /man-roxygen/measure_similarity.R: -------------------------------------------------------------------------------- 1 | #' @name mlr_measures_sim.<%= id %> 2 | #' @include mlr_measures.R 3 | #' 4 | #' @inherit mlr3measures::<%= id %> title description details 5 | #' @inheritSection mlr3measures::<%= id %> Meta Information 6 | #' 7 | #' @section Parameters: 8 | #' `r mlr3misc::rd_info(mlr3::msr("sim.<%= id %>")$param_set)` 9 | #' @md 10 | #' 11 | #' @section Dictionary: 12 | #' This [Measure] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr_measures] or with the associated sugar function [msr()]: 13 | #' ``` 14 | #' mlr_measures$get("sim.<%= id %>") 15 | #' msr("sim.<%= id %>") 16 | #' ``` 17 | #' 18 | #' @note 19 | #' This measure requires learners with property `"selected_features"`. 20 | #' The extracted feature sets are passed to [mlr3measures::<%= id %>()] from 21 | #' package \CRANpkg{mlr3measures}. 22 | #' 23 | #' If the measure is undefined for the input, `NaN` is returned. 24 | #' This can be customized by setting the field `na_value`. 25 | #' 26 | #' @family similarity measures 27 | #' 28 | #' @seealso 29 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][Measure]: [mlr_measures] 30 | #' 31 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [Measure] implementations. 32 | -------------------------------------------------------------------------------- /man/figures/block.svg: -------------------------------------------------------------------------------- 1 | 2 | 12 | 14 | 15 | 17 | image/svg+xml 18 | 20 | 21 | 22 | 23 | 24 | 26 | 30 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /man/as_prediction_regr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_prediction_regr.R 3 | \name{as_prediction_regr} 4 | \alias{as_prediction_regr} 5 | \alias{as_prediction_regr.PredictionRegr} 6 | \alias{as_prediction_regr.data.frame} 7 | \title{Convert to a Regression Prediction} 8 | \usage{ 9 | as_prediction_regr(x, ...) 10 | 11 | \method{as_prediction_regr}{PredictionRegr}(x, ...) 12 | 13 | \method{as_prediction_regr}{data.frame}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{(any)\cr 17 | Object to convert.} 18 | 19 | \item{...}{(any)\cr 20 | Additional arguments.} 21 | } 22 | \value{ 23 | \link{PredictionRegr}. 24 | } 25 | \description{ 26 | Convert object to a \link{PredictionRegr}. 27 | } 28 | \examples{ 29 | # create a prediction object 30 | task = tsk("mtcars") 31 | learner = lrn("regr.rpart") 32 | learner$train(task) 33 | p = learner$predict(task) 34 | 35 | # convert to a data.table 36 | tab = as.data.table(p) 37 | 38 | # convert back to a Prediction 39 | as_prediction_regr(tab) 40 | 41 | # split data.table into a list of data.tables 42 | tabs = split(tab, cut(tab$truth, 3)) 43 | 44 | # convert back to list of predictions 45 | preds = lapply(tabs, as_prediction_regr) 46 | 47 | # calculate performance in each group 48 | sapply(preds, function(p) p$score()) 49 | } 50 | -------------------------------------------------------------------------------- /tests/testthat/_object_snapshots/snapshot.R: -------------------------------------------------------------------------------- 1 | task_classif = tsk("pima") # mlr3 1.2.0 2 | saveRDS(task_classif, "tests/testthat/_object_snapshots/task_classif.rds") 3 | 4 | task_regr = tsk("mtcars") # mlr3 1.2.0 5 | saveRDS(task_regr, "tests/testthat/_object_snapshots/task_regr.rds") 6 | 7 | learner_classif = lrn("classif.rpart") # mlr3 1.0.0 8 | learner_classif$train(task_classif) 9 | saveRDS(learner_classif, "tests/testthat/_object_snapshots/learner_classif.rds") 10 | 11 | learner_regr = lrn("regr.rpart") # mlr3 1.0.0 12 | learner_regr$train(task_regr) 13 | saveRDS(learner_regr, "tests/testthat/_object_snapshots/learner_regr.rds") 14 | 15 | resampling = rsmp("cv", folds = 3) # mlr3 1.0.0 16 | resampling$instantiate(task_classif) 17 | saveRDS(resampling, "tests/testthat/_object_snapshots/resampling.rds") 18 | 19 | rr = resample(task_classif, learner_classif, resampling) # mlr3 1.2.0 20 | saveRDS(rr, "tests/testthat/_object_snapshots/rr.rds") 21 | 22 | design = benchmark_grid( 23 | tasks = list(task_classif), 24 | learners = list(learner_classif), 25 | resamplings = list(resampling) 26 | ) 27 | 28 | bmr = benchmark(design) # mlr3 1.2.0 29 | saveRDS(bmr, "tests/testthat/_object_snapshots/bmr.rds") 30 | 31 | measure = msr("classif.ce") # mlr3 1.0.0 32 | saveRDS(measure, "tests/testthat/_object_snapshots/measure.rds") 33 | 34 | -------------------------------------------------------------------------------- /tests/testthat/test_default_fallback.R: -------------------------------------------------------------------------------- 1 | test_that("fallback = default_fallback() works", { 2 | learner = lrn("classif.rpart") 3 | fallback = default_fallback(learner) 4 | 5 | expect_class(fallback, "LearnerClassifFeatureless") 6 | expect_equal(fallback$predict_type, "response") 7 | 8 | learner = lrn("classif.rpart", predict_type = "prob") 9 | fallback = default_fallback(learner) 10 | 11 | expect_class(fallback, "LearnerClassifFeatureless") 12 | expect_equal(fallback$predict_type, "prob") 13 | 14 | learner = lrn("regr.rpart") 15 | fallback = default_fallback(learner) 16 | 17 | expect_class(fallback, "LearnerRegrFeatureless") 18 | expect_equal(fallback$predict_type, "response") 19 | 20 | learner = lrn("regr.debug", predict_type = "se") 21 | fallback = default_fallback(learner) 22 | 23 | expect_class(fallback, "LearnerRegrFeatureless") 24 | expect_equal(fallback$predict_type, "se") 25 | 26 | learner = lrn("regr.debug", 27 | predict_type = "quantiles", 28 | quantiles = c(0.1, 0.9), 29 | quantile_response = 0.1) 30 | fallback = default_fallback(learner) 31 | 32 | expect_class(fallback, "LearnerRegrFeatureless") 33 | expect_equal(fallback$predict_type, "quantiles") 34 | expect_equal(fallback$quantiles, c(0.1, 0.9)) 35 | expect_equal(fallback$quantile_response, 0.1) 36 | }) 37 | -------------------------------------------------------------------------------- /R/TaskClassif_wine.R: -------------------------------------------------------------------------------- 1 | #' @title Wine Classification Task 2 | #' 3 | #' @name mlr_tasks_wine 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' Wine data set from the UCI machine learning repository (\url{http://archive.ics.uci.edu/dataset/109/wine}). 9 | #' Results of a chemical analysis of three types of wines grown in the same region in Italy but derived from three different cultivars. 10 | #' 11 | #' @templateVar id wine 12 | #' @template task 13 | #' 14 | #' @source 15 | #' Original owners: 16 | #' Forina, M. et al, PARVUS - An Extendible Package for Data Exploration, Classification and Correlation. 17 | #' Institute of Pharmaceutical and Food Analysis and Technologies, Via Brigata Salerno, 16147 Genoa, Italy. 18 | #' 19 | #' Donor: 20 | #' Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au 21 | #' 22 | #' @references 23 | #' `r format_bib("dua_2017")` 24 | #' 25 | #' @template seealso_task 26 | NULL 27 | 28 | load_task_wine = function(id = "wine") { 29 | b = as_data_backend(readRDS(system.file("extdata", "wine.rds", package = "mlr3"))) 30 | task = TaskClassif$new(id, b, target = "type", 31 | label = "Wine Regions") 32 | b$hash = task$man = "mlr3::mlr_tasks_wine" 33 | task 34 | } 35 | 36 | #' @include mlr_tasks.R 37 | mlr_tasks$add("wine", load_task_wine) 38 | -------------------------------------------------------------------------------- /man/as_prediction_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_prediction_data.R 3 | \name{as_prediction_data} 4 | \alias{as_prediction_data} 5 | \alias{as_prediction_data.Prediction} 6 | \alias{as_prediction_data.PredictionData} 7 | \alias{as_prediction_data.list} 8 | \title{PredictionData} 9 | \usage{ 10 | as_prediction_data(x, task, row_ids = task$row_ids, check = TRUE, ...) 11 | 12 | \method{as_prediction_data}{Prediction}(x, task, row_ids = task$row_ids, check = TRUE, ...) 13 | 14 | \method{as_prediction_data}{PredictionData}(x, task, row_ids = task$row_ids, check = TRUE, ...) 15 | 16 | \method{as_prediction_data}{list}( 17 | x, 18 | task, 19 | row_ids = task$row_ids, 20 | check = TRUE, 21 | ..., 22 | train_task 23 | ) 24 | } 25 | \arguments{ 26 | \item{x}{(any)\cr 27 | Object to convert.} 28 | 29 | \item{task}{(\link{Task}).} 30 | 31 | \item{row_ids}{\code{integer()}\cr 32 | Row indices.} 33 | 34 | \item{check}{(\code{logical(1)})\cr 35 | Perform argument checks and type conversions?} 36 | 37 | \item{...}{(any)\cr 38 | Additional arguments.} 39 | 40 | \item{train_task}{(\link{Task})\cr 41 | Task used for training the learner.} 42 | } 43 | \value{ 44 | \link{PredictionData}. 45 | } 46 | \description{ 47 | Convert object to a \link{PredictionData} or a list of \link{PredictionData}. 48 | } 49 | -------------------------------------------------------------------------------- /man/convert_task.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/task_converters.R 3 | \name{convert_task} 4 | \alias{convert_task} 5 | \title{Convert a Task from One Type to Another} 6 | \usage{ 7 | convert_task( 8 | intask, 9 | target = NULL, 10 | new_type = NULL, 11 | drop_original_target = FALSE, 12 | drop_levels = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{intask}{(\link{Task})\cr 17 | A \link{Task} to be converted.} 18 | 19 | \item{target}{(\code{character(1)})\cr 20 | New target to be set, must be a column in the \code{intask} data. 21 | If \code{NULL}, no new target is set, and task is converted as-is.} 22 | 23 | \item{new_type}{(\code{character(1)})\cr 24 | The new task type. Must be in \code{\link[=mlr_reflections]{mlr_reflections$task_types}}]. 25 | If \code{NULL} (default), a new task with the same task_type is created.} 26 | 27 | \item{drop_original_target}{(\code{logical(1)})\cr 28 | If \code{FALSE} (default), the original target is added as a feature. 29 | Otherwise the original target is dropped.} 30 | 31 | \item{drop_levels}{(\code{logical(1)})\cr 32 | If \code{TRUE} (default), unused levels of the new target variable are dropped.} 33 | } 34 | \value{ 35 | \link{Task} of requested type. 36 | } 37 | \description{ 38 | The task's target is replaced by a different column from the data. 39 | } 40 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureRegrRQR.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_measures_regr.rqr", { 2 | tsk = tsk("california_housing") 3 | lrn = lrn("regr.featureless") 4 | 5 | expect_error(msr("regr.rqr", alpha = 2), "alpha: Element 1 is not <= 1") 6 | 7 | # default 8 | m = msr("regr.rqr") 9 | expect_null(m$properties) 10 | expect_equal(m$param_set$values$alpha, 0.5) 11 | 12 | # missing predict type 13 | preds_na = lrn$train(tsk)$predict(tsk) 14 | expect_warning(preds_na$score(m), "missing predict type 'quantiles'") 15 | score_na = suppressWarnings(unname(preds_na$score(m))) 16 | expect_equal(score_na, NaN) 17 | 18 | # proper quantile prediction 19 | lrn$predict_type = "quantiles" 20 | lrn$quantiles = c(0.25, 0.5, 0.75) 21 | lrn$quantile_response = 0.5 22 | preds = lrn$train(tsk)$predict(tsk) 23 | 24 | expect_number(preds$score(m)) 25 | expect_true(preds$score(m) == 0) 26 | 27 | # pred_set_mean 28 | m2 = msr("regr.rqr", pred_set_mean = FALSE) 29 | expect_equal(m2$properties, c("requires_task", "requires_train_set")) 30 | expect_number(preds$score(m2, task = tsk, train_set = tsk$nrow)) 31 | 32 | m_25 = msr("regr.rqr", alpha = 0.25) 33 | expect_number(preds$score(m_25)) 34 | 35 | # alpha must be in predicted quantiles 36 | expect_error(preds$score(msr("regr.pinball", alpha = 0.1)), 37 | "Must be element of set") 38 | }) 39 | -------------------------------------------------------------------------------- /man/as_prediction_classif.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_prediction_classif.R 3 | \name{as_prediction_classif} 4 | \alias{as_prediction_classif} 5 | \alias{as_prediction_classif.PredictionClassif} 6 | \alias{as_prediction_classif.data.frame} 7 | \title{Convert to a Classification Prediction} 8 | \usage{ 9 | as_prediction_classif(x, ...) 10 | 11 | \method{as_prediction_classif}{PredictionClassif}(x, ...) 12 | 13 | \method{as_prediction_classif}{data.frame}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{(any)\cr 17 | Object to convert.} 18 | 19 | \item{...}{(any)\cr 20 | Additional arguments.} 21 | } 22 | \value{ 23 | \link{PredictionClassif}. 24 | } 25 | \description{ 26 | Convert object to a \link{PredictionClassif}. 27 | } 28 | \examples{ 29 | # create a prediction object 30 | task = tsk("penguins") 31 | learner = lrn("classif.rpart", predict_type = "prob") 32 | learner$train(task) 33 | p = learner$predict(task) 34 | 35 | # convert to a data.table 36 | tab = as.data.table(p) 37 | 38 | # convert back to a Prediction 39 | as_prediction_classif(tab) 40 | 41 | # split data.table into a list of data.tables 42 | tabs = split(tab, tab$truth) 43 | 44 | # convert back to list of predictions 45 | preds = lapply(tabs, as_prediction_classif) 46 | 47 | # calculate performance in each group 48 | sapply(preds, function(p) p$score()) 49 | } 50 | -------------------------------------------------------------------------------- /tests/testthat/test_warn_deprecated.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_that("warn_deprecated works as expected", { 4 | 5 | oldopts = options(mlr3.warn_deprecated = TRUE) 6 | expect_warning(warn_deprecated("test"), "^test is deprecated and will be removed in the future\\.$") 7 | expect_no_warning(warn_deprecated("test")) # no second warning 8 | 9 | oldopts = options(mlr3.warn_deprecated = FALSE) 10 | expect_no_warning(warn_deprecated("test2")) # no warning when options disallow it 11 | 12 | options(oldopts) 13 | }) 14 | 15 | test_that("deprecated_binding works as expected", { 16 | 17 | oldopts = options(mlr3.warn_deprecated = TRUE) 18 | MyClass = R6::R6Class("MyClass", public = list(val = 1), 19 | active = list( 20 | foo = deprecated_binding("MyClass$foo", "bar"), 21 | foo2 = deprecated_binding("MyClass$foo2", self$val) 22 | ), 23 | ) 24 | mco = MyClass$new() 25 | expect_warning({fooval = mco$foo}, "^MyClass\\$foo is deprecated and will be removed in the future\\.$") 26 | expect_equal(fooval, "bar") 27 | expect_no_warning({fooval = mco$foo}) # no second warning 28 | expect_equal(fooval, "bar") 29 | 30 | oldopts = options(mlr3.warn_deprecated = FALSE) 31 | expect_no_warning({foo2val = mco$foo2}) # no warning when options disallow it 32 | expect_equal(foo2val, 1) 33 | mco$val = 2 34 | expect_equal(mco$foo2, 2) 35 | 36 | options(oldopts) 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_callbacks.R: -------------------------------------------------------------------------------- 1 | test_that("model extractor works", { 2 | task = tsk("pima") 3 | learner = lrn("classif.rpart") 4 | resampling = rsmp("cv", folds = 3) 5 | 6 | selected_features = function(learner) list(selected_features = learner$selected_features()) 7 | callback = clbk("mlr3.model_extractor", fun = selected_features) 8 | 9 | rr = resample(task, learner, resampling = resampling, callbacks = callback) 10 | 11 | expect_list(rr$data_extra$data_extra) 12 | walk(rr$data_extra$data_extra, function(data) { 13 | expect_names(names(data), must.include = "selected_features") 14 | expect_subset(data[["selected_features"]], task$feature_names) 15 | }) 16 | }) 17 | 18 | test_that("holdout task works", { 19 | task = tsk("pima") 20 | task_holdout = task$clone() 21 | learner = lrn("classif.rpart") 22 | resampling = rsmp("cv", folds = 3) 23 | splits = partition(task, 0.7) 24 | 25 | task$filter(splits$train) 26 | task_holdout$filter(splits$test) 27 | 28 | callback = clbk("mlr3.holdout_task", task = task_holdout) 29 | 30 | rr = resample(task, learner, resampling = resampling, callbacks = callback) 31 | 32 | expect_list(rr$data_extra$data_extra) 33 | walk(rr$data_extra$data_extra, function(data) { 34 | expect_names(names(data), must.include = "prediction_holdout") 35 | expect_prediction(data[["prediction_holdout"]]) 36 | }) 37 | }) 38 | -------------------------------------------------------------------------------- /man/partition.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/partition.R 3 | \name{partition} 4 | \alias{partition} 5 | \alias{partition.Task} 6 | \title{Manually Partition into Training, Test and Validation Set} 7 | \usage{ 8 | partition(task, ratio = 0.67) 9 | 10 | \method{partition}{Task}(task, ratio = 0.67) 11 | } 12 | \arguments{ 13 | \item{task}{(\link{Task})\cr 14 | Task to operate on.} 15 | 16 | \item{ratio}{(\code{numeric()})\cr 17 | Ratio of observations to put into the training set. 18 | If a 2 element vector is provided, the first element is the ratio for the training set, the second element is the ratio for the test set. 19 | The validation set will contain the remaining observations.} 20 | } 21 | \description{ 22 | Creates a split of the row ids of a \link{Task} into a training and a test set, and optionally a validation set. 23 | } 24 | \examples{ 25 | # regression task partitioned into training and test set 26 | task = tsk("california_housing") 27 | split = partition(task, ratio = 0.5) 28 | data = data.frame( 29 | y = c(task$truth(split$train), task$truth(split$test)), 30 | split = rep(c("train", "predict"), lengths(split[c("train", "test")])) 31 | ) 32 | boxplot(y ~ split, data = data) 33 | 34 | # classification task partitioned into training, test and validation set 35 | task = tsk("pima") 36 | split = partition(task, c(0.66, 0.14)) 37 | } 38 | -------------------------------------------------------------------------------- /R/TaskClassif_breast_cancer.R: -------------------------------------------------------------------------------- 1 | #' @title Wisconsin Breast Cancer Classification Task 2 | #' 3 | #' @name mlr_tasks_breast_cancer 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the [mlbench::BreastCancer] data set. 9 | #' 10 | #' * Column `"Id"` has been removed. 11 | #' * Column names have been converted to `snake_case`. 12 | #' * Positive class is set to `"malignant"`. 13 | #' * 16 incomplete cases have been removed from the data set. 14 | #' * All factor features have been converted to ordered factors. 15 | #' 16 | #' @templateVar id breast_cancer 17 | #' @template task 18 | #' 19 | #' @template seealso_task 20 | NULL 21 | 22 | load_task_breast_cancer = function(id = "breast_cancer") { 23 | tab = load_dataset("BreastCancer", "mlbench") 24 | names(tab) = tolower(chartr(".", "_", colnames(tab))) 25 | for (cn in c("bare_nuclei", "bl_cromatin", "normal_nucleoli", "mitoses")) { 26 | tab[[cn]] = ordered(tab[[cn]]) 27 | } 28 | b = as_data_backend(remove_named(tab[stats::complete.cases(tab), ], "id")) 29 | 30 | task = TaskClassif$new(id, b, target = "class", positive = "malignant", 31 | label = "Wisconsin Breast Cancer") 32 | b$hash = task$man = "mlr3::mlr_tasks_breast_cancer" 33 | task 34 | } 35 | 36 | #' @include mlr_tasks.R 37 | mlr_tasks$add("breast_cancer", load_task_breast_cancer) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_measures.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_measures", { 2 | expect_dictionary(mlr_measures, min_items = 1L) 3 | keys = mlr_measures$keys() 4 | 5 | for (key in keys) { 6 | m = mlr_measures$get(key) 7 | expect_measure(m) 8 | } 9 | }) 10 | 11 | test_that("as.data.table(mlr_measures)", { 12 | d = as.data.table(mlr_measures) 13 | expect_data_table(d) 14 | expect_character(d$key, unique = TRUE, any.missing = FALSE) 15 | expect_subset(d$task_type, c(mlr_reflections$task_types$type, NA)) 16 | qexpectr(d$packages, "S") 17 | expect_subset(d$predict_type, c(NA, unlist(mlr_reflections$learner_predict_types))) 18 | qexpectr(d$task_properties, "S") 19 | expect_subset(unlist(d$task_properties), unlist(mlr_reflections$task_properties)) 20 | }) 21 | 22 | test_that("custom aggregation", { 23 | task = tsk("wine") 24 | lrn = lrn("classif.featureless") 25 | 26 | m = msr("classif.ce") 27 | m$id = "max_ce" 28 | m$aggregator = max 29 | measures = list(msr("classif.ce"), m) 30 | 31 | rr = resample(task, lrn, rsmp("cv", folds = 3)) 32 | perf = rr$score(measures) 33 | aggr = rr$aggregate(measures) 34 | expect_equal(aggr[["max_ce"]], max(perf$classif.ce)) 35 | }) 36 | 37 | test_that("as.data.table(..., objects = TRUE)", { 38 | tab = as.data.table(mlr_measures, objects = TRUE) 39 | expect_data_table(tab) 40 | expect_list(tab$object, "Measure", any.missing = FALSE) 41 | }) 42 | -------------------------------------------------------------------------------- /R/as_prediction_regr.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Regression Prediction 2 | #' 3 | #' @description 4 | #' Convert object to a [PredictionRegr]. 5 | #' 6 | #' @inheritParams as_prediction 7 | #' 8 | #' @return [PredictionRegr]. 9 | #' @export 10 | #' @examples 11 | #' # create a prediction object 12 | #' task = tsk("mtcars") 13 | #' learner = lrn("regr.rpart") 14 | #' learner$train(task) 15 | #' p = learner$predict(task) 16 | #' 17 | #' # convert to a data.table 18 | #' tab = as.data.table(p) 19 | #' 20 | #' # convert back to a Prediction 21 | #' as_prediction_regr(tab) 22 | #' 23 | #' # split data.table into a list of data.tables 24 | #' tabs = split(tab, cut(tab$truth, 3)) 25 | #' 26 | #' # convert back to list of predictions 27 | #' preds = lapply(tabs, as_prediction_regr) 28 | #' 29 | #' # calculate performance in each group 30 | #' sapply(preds, function(p) p$score()) 31 | as_prediction_regr = function(x, ...) { 32 | UseMethod("as_prediction_regr") 33 | } 34 | 35 | 36 | #' @rdname as_prediction_regr 37 | #' @export 38 | as_prediction_regr.PredictionRegr = function(x, ...) { # nolint 39 | x 40 | } 41 | 42 | 43 | #' @rdname as_prediction_regr 44 | #' @export 45 | as_prediction_regr.data.frame = function(x, ...) { # nolint 46 | assert_names(names(x), must.include = c("row_ids", "truth", "response")) 47 | assert_names(names(x), subset.of = c("row_ids", "truth", "response", "se", "weights")) 48 | invoke(PredictionRegr$new, .args = x) 49 | } 50 | -------------------------------------------------------------------------------- /man/mlr3.model_extractor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mlr_callbacks.R 3 | \name{mlr3.model_extractor} 4 | \alias{mlr3.model_extractor} 5 | \title{Model Extractor Callback} 6 | \arguments{ 7 | \item{fun}{(\verb{function(learner)})\cr 8 | Function to extract information from the learner. 9 | The function must have the argument \code{learner}. 10 | The function must return a named list.} 11 | } 12 | \description{ 13 | This \link{CallbackResample} extracts information from the model after training with a user-defined function. 14 | This way information can be extracted from the model without saving the model (\code{store_models = FALSE}). 15 | The \code{fun} must be a function that takes a learner as input and returns the extracted information as named list (see example). 16 | The callback is very helpful to call \verb{$selected_features()}, \verb{$importance()}, \verb{$oob_error()} on the learner. 17 | } 18 | \examples{ 19 | task = tsk("pima") 20 | learner = lrn("classif.rpart") 21 | resampling = rsmp("cv", folds = 3) 22 | 23 | # define function to extract selected features 24 | selected_features = function(learner) list(selected_features = learner$selected_features()) 25 | 26 | # create callback 27 | callback = clbk("mlr3.model_extractor", fun = selected_features) 28 | 29 | rr = resample(task, learner, resampling = resampling, store_models = FALSE, callbacks = callback) 30 | 31 | rr$data_extra 32 | } 33 | -------------------------------------------------------------------------------- /R/TaskClassif_penguins.R: -------------------------------------------------------------------------------- 1 | #' @title Palmer Penguins Data Set 2 | #' 3 | #' @name mlr_tasks_penguins 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' Classification data to predict the species of penguins from the \CRANpkg{palmerpenguins} package, see [palmerpenguins::penguins]. 9 | #' A better alternative to the [iris data set][iris]. 10 | #' 11 | #' @templateVar id penguins 12 | #' @template task 13 | #' 14 | #' @section Pre-processing: 15 | #' * The unit of measurement have been removed from the column names. 16 | #' Lengths are given in millimeters (mm), weight in gram (g). 17 | #' 18 | #' @source \CRANpkg{palmerpenguins} 19 | #' 20 | #' @references 21 | #' `r format_bib("gorman2014")` 22 | #' 23 | #' \url{https://github.com/allisonhorst/palmerpenguins} 24 | #' 25 | #' @template seealso_task 26 | NULL 27 | 28 | load_task_penguins = function() { 29 | penguins = as.data.table(palmerpenguins::penguins) 30 | setnames(penguins, 31 | old = c("bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"), 32 | new = c("bill_length", "bill_depth", "flipper_length", "body_mass") 33 | ) 34 | 35 | b = as_data_backend(penguins) 36 | task = TaskClassif$new("penguins", b, target = "species", 37 | label = "Palmer Penguins") 38 | b$hash = task$man = "mlr3::mlr_tasks_penguins" 39 | task 40 | } 41 | 42 | #' @include mlr_tasks.R 43 | mlr_tasks$add("penguins", load_task_penguins) 44 | -------------------------------------------------------------------------------- /man/as_task.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_task.R 3 | \name{as_task} 4 | \alias{as_task} 5 | \alias{as_task.Task} 6 | \alias{as_tasks} 7 | \alias{as_tasks.default} 8 | \alias{as_tasks.list} 9 | \title{Convert to a Task} 10 | \usage{ 11 | as_task(x, ...) 12 | 13 | \method{as_task}{Task}(x, clone = FALSE, ...) 14 | 15 | as_tasks(x, ...) 16 | 17 | \method{as_tasks}{default}(x, ...) 18 | 19 | \method{as_tasks}{list}(x, ...) 20 | } 21 | \arguments{ 22 | \item{x}{(any)\cr 23 | Object to convert.} 24 | 25 | \item{...}{(any)\cr 26 | Additional arguments.} 27 | 28 | \item{clone}{(\code{logical(1)})\cr 29 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 30 | } 31 | \description{ 32 | Convert object to a \link{Task} or a list of \link{Task}. 33 | 34 | The function supports: 35 | \itemize{ 36 | \item Converting existing \link{Task} objects (with optional cloning) 37 | \item Converting objects from other packages (e.g., \code{OMLTask} from \CRANpkg{mlr3oml}) 38 | \item Converting lists of objects to lists of tasks 39 | } 40 | 41 | For constructing tasks from data frames, use the dedicated converters: 42 | \itemize{ 43 | \item \code{\link[=as_task_classif]{as_task_classif()}} for classification tasks 44 | \item \code{\link[=as_task_regr]{as_task_regr()}} for regression tasks 45 | \item \code{\link[=as_task_unsupervised]{as_task_unsupervised()}} for unsupervised tasks 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_holdout.R: -------------------------------------------------------------------------------- 1 | test_that("holdout has no duplicated ids", { 2 | r = rsmp("holdout") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("stratification", { 7 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 8 | b = as_data_backend(data) 9 | task = TaskClassif$new("stratify_data", b, target = "y") 10 | task$col_roles$stratum = task$target_names 11 | 12 | r = rsmp("holdout", ratio = .5) 13 | r$instantiate(task) 14 | 15 | i = 1L 16 | expect_equal(task$data(r$train_set(i))[y == "a", .N], 45) 17 | expect_equal(task$data(r$train_set(i))[y == "b", .N], 5) 18 | expect_equal(task$data(r$test_set(i))[y == "a", .N], 45) 19 | expect_equal(task$data(r$test_set(i))[y == "b", .N], 5) 20 | }) 21 | 22 | test_that("grouping", { 23 | r = rsmp("holdout") 24 | expect_grouping_works(r) 25 | }) 26 | 27 | test_that("prediction does not drop dimension (#551)", { 28 | task = tsk("iris") 29 | learner = lrn("classif.rpart") 30 | resampling = rsmp("holdout") 31 | resampling$instantiate(task) 32 | 33 | design = data.table( 34 | learner = list(learner), 35 | task = list(task), 36 | resampling = list(resampling) 37 | ) 38 | 39 | bmr = benchmark(design) 40 | expect_number(bmr$aggregate(msr("classif.ce"))[["classif.ce"]]) 41 | expect_equal(map(get_private(bmr)$.data$data$fact$prediction, names), list("test")) 42 | }) 43 | -------------------------------------------------------------------------------- /R/TaskClassif_spam.R: -------------------------------------------------------------------------------- 1 | #' @title Spam Classification Task 2 | #' 3 | #' @name mlr_tasks_spam 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' Spam data set from the UCI machine learning repository (\url{http://archive.ics.uci.edu/dataset/94/spambase}). 9 | #' Data set collected at Hewlett-Packard Labs to classify emails as spam or non-spam. 10 | #' 57 variables indicate the frequency of certain words and characters in the e-mail. 11 | #' The positive class is set to "spam". 12 | #' 13 | #' @templateVar id spam 14 | #' @template task 15 | #' 16 | #' @source 17 | #' Creators: 18 | #' Mark Hopkins, Erik Reeber, George Forman, Jaap Suermondt. 19 | #' Hewlett-Packard Labs, 1501 Page Mill Rd., Palo Alto, CA 94304 20 | #' 21 | #' Donor: 22 | #' George Forman (gforman at nospam hpl.hp.com) 650-857-7835 23 | #' 24 | #' Preprocessing: 25 | #' Columns have been renamed. Preprocessed data taken from the \CRANpkg{kernlab} package. 26 | #' 27 | #' @references 28 | #' `r format_bib("dua_2017")` 29 | #' 30 | #' @template seealso_task 31 | NULL 32 | 33 | load_task_spam = function(id = "spam") { 34 | b = as_data_backend(readRDS(system.file("extdata", "spam.rds", package = "mlr3"))) 35 | task = TaskClassif$new(id, b, target = "type", positive = "spam", 36 | label = "HP Spam Detection") 37 | b$hash = task$man = "mlr3::mlr_tasks_spam" 38 | task 39 | } 40 | 41 | #' @include mlr_tasks.R 42 | mlr_tasks$add("spam", load_task_spam) 43 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | release: 11 | types: 12 | - published 13 | workflow_dispatch: 14 | 15 | name: pkgdown 16 | 17 | jobs: 18 | pkgdown: 19 | runs-on: ubuntu-latest 20 | 21 | concurrency: 22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | BRANCH_NAME: ${{ github.head_ref || github.ref_name }} 26 | 27 | steps: 28 | - uses: actions/checkout@v5 29 | 30 | - uses: r-lib/actions/setup-pandoc@v2 31 | 32 | - uses: r-lib/actions/setup-r@v2 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Install template 40 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") 41 | shell: Rscript {0} 42 | 43 | - name: Build site 44 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 45 | shell: Rscript {0} 46 | 47 | - name: Deploy 48 | if: github.event_name != 'pull_request' 49 | uses: JamesIves/github-pages-deploy-action@v4.7.4 50 | with: 51 | clean: false 52 | branch: gh-pages 53 | folder: docs 54 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_repeated_cv.R: -------------------------------------------------------------------------------- 1 | test_that("repeated cv has no duplicated ids", { 2 | r = rsmp("repeated_cv") 3 | expect_identical(r$duplicated_ids, FALSE) 4 | }) 5 | 6 | test_that("folds first, then repetitions", { 7 | task = tsk("iris") 8 | rrcv = rsmp("repeated_cv", repeats = 2, folds = 3) 9 | rrcv$instantiate(task) 10 | 11 | expect_set_equal(c(rrcv$train_set(1), rrcv$test_set(1)), task$row_ids) 12 | expect_integer(intersect(intersect(rrcv$test_set(1), rrcv$test_set(2)), rrcv$test_set(3)), len = 0L) 13 | 14 | expect_equal(rrcv$folds(seq_len(rrcv$iters)), rep(1:3, 2)) 15 | expect_equal(rrcv$repeats(seq_len(rrcv$iters)), rep(1:2, each = 3)) 16 | }) 17 | 18 | test_that("stratification", { 19 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 20 | task = TaskClassif$new("stratify_data", data, target = "y") 21 | task$col_roles$stratum = task$target_names 22 | 23 | r = rsmp("repeated_cv", folds = 5, repeats = 2) 24 | r$instantiate(task) 25 | 26 | for (i in seq_len(r$iters)) { 27 | expect_equal(task$data(r$train_set(i))[y == "a", .N], 72) 28 | expect_equal(task$data(r$train_set(i))[y == "b", .N], 8) 29 | expect_equal(task$data(r$test_set(i))[y == "a", .N], 18) 30 | expect_equal(task$data(r$test_set(i))[y == "b", .N], 2) 31 | } 32 | }) 33 | 34 | test_that("grouping", { 35 | r = rsmp("repeated_cv", folds = 5, repeats = 2) 36 | expect_grouping_works(r) 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_resampling_bootstrap.R: -------------------------------------------------------------------------------- 1 | test_that("bootstrap has duplicated ids", { 2 | r = rsmp("bootstrap") 3 | expect_identical(r$duplicated_ids, TRUE) 4 | }) 5 | 6 | test_that("stratification", { 7 | data = data.table(y = factor(rep(letters[1:2], times = c(90, 10))), x1 = runif(100), x2 = rep(LETTERS[1:2], times = c(50, 50))) 8 | b = as_data_backend(data) 9 | task = TaskClassif$new("stratify_data", b, target = "y") 10 | task$col_roles$stratum = task$target_names 11 | 12 | r = rsmp("bootstrap", ratio = 1, repeats = 3) 13 | r$instantiate(task) 14 | 15 | for (i in seq_len(r$iters)) { 16 | expect_equal(task$data(r$train_set(i))[y == "a", .N], 90) 17 | expect_equal(task$data(r$train_set(i))[y == "b", .N], 10) 18 | } 19 | }) 20 | 21 | test_that("grouping", { 22 | r = rsmp("bootstrap", ratio = 1, repeats = 3) 23 | expect_grouping_works(r) 24 | }) 25 | 26 | test_that("#518", { 27 | with_seed(36, { 28 | test_df = data.frame(feat1 = runif(3000), 29 | feat2 = runif(3000), 30 | target = runif(3000), 31 | group = sample(paste0("g", 1:100), 3000, replace = TRUE) 32 | ) 33 | task = TaskRegr$new(id = "test", backend = test_df, target = "target") 34 | task$col_roles$group = "group" 35 | task$col_roles$feature = c("feat1", "feat2") 36 | rb = rsmp("bootstrap", repeats = 10, ratio = 1) 37 | rb$instantiate(task) 38 | }) 39 | 40 | min.len = if (getRversion() >= "3.6.0") 3001L else NULL 41 | expect_integer(rb$train_set(2), min.len = min.len) 42 | }) 43 | -------------------------------------------------------------------------------- /R/as_resample_result.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to ResampleResult 2 | #' 3 | #' @description 4 | #' Convert object to a [ResampleResult]. 5 | #' 6 | #' The S3 method for `list` expects argument `x` to be a list of [Prediction] objects and 7 | #' all other relevant objects ([Task], [Learner]s, and instantiated [Resampling]) must 8 | #' be provided, too. 9 | #' A more flexible way to manually create a [ResampleResult] is implemented in [as_result_data()]. 10 | #' 11 | #' @param x (any)\cr 12 | #' Object to convert. 13 | #' @param ... (any)\cr 14 | #' Currently not used. 15 | #' 16 | #' @return ([ResampleResult]). 17 | #' @export 18 | as_resample_result = function(x, ...) { 19 | UseMethod("as_resample_result") 20 | } 21 | 22 | 23 | #' @rdname as_resample_result 24 | #' @export 25 | as_resample_result.ResampleResult = function(x, ...) { # nolint 26 | x 27 | } 28 | 29 | #' @rdname as_resample_result 30 | #' @param view (`character()`)\cr 31 | #' See construction argument `view` of [`ResampleResult`]. 32 | #' @export 33 | as_resample_result.ResultData = function(x, view = NULL, ...) { # nolint 34 | ResampleResult$new(x, view = view) 35 | } 36 | 37 | #' @rdname as_resample_result 38 | #' @inheritParams as_result_data 39 | #' @export 40 | as_resample_result.list = function(x, task, learners, resampling, store_backends = TRUE, ...) { # nolint 41 | rdata = as_result_data(task = task, learners = learners, resampling = resampling, 42 | iterations = seq_len(resampling$iters), predictions = x) 43 | 44 | ResampleResult$new(rdata) 45 | } 46 | -------------------------------------------------------------------------------- /R/TaskGeneratorFriedman1.R: -------------------------------------------------------------------------------- 1 | #' @title Friedman1 Regression Task Generator 2 | #' 3 | #' @name mlr_task_generators_friedman1 4 | #' @include TaskGenerator.R 5 | #' 6 | #' @description 7 | #' A [TaskGenerator] for the friedman1 task in [mlbench::mlbench.friedman1()]. 8 | #' 9 | #' @templateVar id friedman1 10 | #' @template task_generator 11 | #' 12 | #' @template seealso_task_generator 13 | #' @export 14 | #' @examples 15 | #' generator = tgen("friedman1") 16 | #' task = generator$generate(200) 17 | #' str(task$data()) 18 | TaskGeneratorFriedman1 = R6Class("TaskGeneratorFriedman1", 19 | inherit = TaskGenerator, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | ps = ps( 25 | sd = p_dbl(0L, default = 1) 26 | ) 27 | 28 | super$initialize(id = "friedman1", "regr", "mlbench", ps, 29 | label = "Friedman Regression", man = "mlr3::mlr_task_generators_friedman1") 30 | } 31 | ), 32 | 33 | private = list( 34 | .generate = function(n) { 35 | obj = invoke(mlbench::mlbench.friedman1, n = n, .args = self$param_set$values) 36 | colnames(obj$x) = c(sprintf("important%i", 1:5), sprintf("unimportant%i", 1:5)) 37 | data = insert_named(as.data.table(obj$x), list(y = obj$y)) 38 | TaskRegr$new(sprintf("%s_%i", self$id, n), data, target = "y") 39 | } 40 | ) 41 | ) 42 | 43 | #' @include mlr_task_generators.R 44 | mlr_task_generators$add("friedman1", function() TaskGeneratorFriedman1$new()) 45 | -------------------------------------------------------------------------------- /man/as_measure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_measure.R 3 | \name{as_measure} 4 | \alias{as_measure} 5 | \alias{as_measure.NULL} 6 | \alias{as_measure.Measure} 7 | \alias{as_measures} 8 | \alias{as_measures.default} 9 | \alias{as_measures.NULL} 10 | \alias{as_measures.list} 11 | \title{Convert to a Measure} 12 | \usage{ 13 | as_measure(x, task_type = NULL, clone = FALSE, ...) 14 | 15 | \method{as_measure}{`NULL`}(x, task_type = NULL, clone = FALSE, ...) 16 | 17 | \method{as_measure}{Measure}(x, task_type = NULL, clone = FALSE, ...) 18 | 19 | as_measures(x, task_type = NULL, clone = FALSE, ...) 20 | 21 | \method{as_measures}{default}(x, task_type = NULL, clone = FALSE, ...) 22 | 23 | \method{as_measures}{`NULL`}(x, task_type = NULL, clone = FALSE, ...) 24 | 25 | \method{as_measures}{list}(x, task_type = NULL, clone = FALSE, ...) 26 | } 27 | \arguments{ 28 | \item{x}{(any)\cr 29 | Object to convert.} 30 | 31 | \item{task_type}{(\code{character(1)})\cr 32 | Used if \code{x} is \code{NULL} to construct a default measure for the respective task type. 33 | The default measures are stored in \code{\link[=mlr_reflections]{mlr_reflections$default_measures}}.} 34 | 35 | \item{clone}{(\code{logical(1)})\cr 36 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 37 | 38 | \item{...}{(any)\cr 39 | Additional arguments.} 40 | } 41 | \value{ 42 | \link{Measure}. 43 | } 44 | \description{ 45 | Convert object to a \link{Measure} or a list of \link{Measure}. 46 | } 47 | -------------------------------------------------------------------------------- /man/score_roc_measures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/score_roc_measures.R 3 | \name{score_roc_measures} 4 | \alias{score_roc_measures} 5 | \title{Calculate ROC Measures} 6 | \usage{ 7 | score_roc_measures(pred) 8 | } 9 | \arguments{ 10 | \item{pred}{(\link{PredictionClassif})\cr 11 | The prediction object.} 12 | } 13 | \value{ 14 | \code{list()}\cr 15 | A list containing two elements \code{confusion_matrix} which is the 2 times 2 confusion matrix of absolute frequencies and \code{measures}, a list of the above mentioned measures. 16 | } 17 | \description{ 18 | Calculate a set of roc performance measures based on the confusion matrix. 19 | \itemize{ 20 | \item \code{tpr} True positive rate (Sensitivity, Recall) 21 | \item \code{fpr} False positive rate (Fall-out) 22 | \item \code{fnr} False negative rate (Miss rate) 23 | \item \code{tnr} True negative rate (Specificity) 24 | \item \code{ppv} Positive predictive value (Precision) 25 | \item \code{fomr} False omission rate 26 | \item \code{lrp} Positive likelihood ratio (LR+) 27 | \item \code{fdr} False discovery rate 28 | \item \code{npv} Negative predictive value 29 | \item \code{acc} Accuracy 30 | \item \code{lrm} Negative likelihood ratio (LR-) 31 | \item \code{dor} Diagnostic odds ratio 32 | } 33 | } 34 | \examples{ 35 | learner = lrn("classif.rpart", predict_type = "prob") 36 | splits = partition(task = tsk("pima"), ratio = 0.7) 37 | task = tsk("pima") 38 | learner$train(task) 39 | pred = learner$predict(task) 40 | score_roc_measures(pred) 41 | } 42 | -------------------------------------------------------------------------------- /man/as_task_unsupervised.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_task_unsupervised.R 3 | \name{as_task_unsupervised} 4 | \alias{as_task_unsupervised} 5 | \alias{as_task_unsupervised.Task} 6 | \alias{as_task_unsupervised.data.frame} 7 | \alias{as_task_unsupervised.DataBackend} 8 | \alias{as_tasks_unsupervised} 9 | \alias{as_tasks_unsupervised.list} 10 | \alias{as_tasks_unsupervised.Task} 11 | \title{Convert to an Unsupervised Task} 12 | \usage{ 13 | as_task_unsupervised(x, ...) 14 | 15 | \method{as_task_unsupervised}{Task}(x, clone = FALSE, ...) 16 | 17 | \method{as_task_unsupervised}{data.frame}( 18 | x, 19 | id = deparse1(substitute(x)), 20 | label = NA_character_, 21 | ... 22 | ) 23 | 24 | \method{as_task_unsupervised}{DataBackend}( 25 | x, 26 | id = deparse1(substitute(x)), 27 | label = NA_character_, 28 | ... 29 | ) 30 | 31 | as_tasks_unsupervised(x, ...) 32 | 33 | \method{as_tasks_unsupervised}{list}(x, clone = FALSE, ...) 34 | 35 | \method{as_tasks_unsupervised}{Task}(x, clone = FALSE, ...) 36 | } 37 | \arguments{ 38 | \item{x}{(any)\cr 39 | Object to convert.} 40 | 41 | \item{...}{(any)\cr 42 | Additional arguments.} 43 | 44 | \item{clone}{(\code{logical(1)})\cr 45 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 46 | 47 | \item{id}{(\code{character(1)})\cr 48 | Id for the new task. 49 | Defaults to the (deparsed and substituted) name of the data argument.} 50 | 51 | \item{label}{(\code{character(1)})\cr 52 | Label for the new instance.} 53 | } 54 | \description{ 55 | Convert object to a \link{TaskUnsupervised} or a list of \link{TaskUnsupervised}. 56 | } 57 | -------------------------------------------------------------------------------- /man/as_resample_result.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_resample_result.R 3 | \name{as_resample_result} 4 | \alias{as_resample_result} 5 | \alias{as_resample_result.ResampleResult} 6 | \alias{as_resample_result.ResultData} 7 | \alias{as_resample_result.list} 8 | \title{Convert to ResampleResult} 9 | \usage{ 10 | as_resample_result(x, ...) 11 | 12 | \method{as_resample_result}{ResampleResult}(x, ...) 13 | 14 | \method{as_resample_result}{ResultData}(x, view = NULL, ...) 15 | 16 | \method{as_resample_result}{list}(x, task, learners, resampling, store_backends = TRUE, ...) 17 | } 18 | \arguments{ 19 | \item{x}{(any)\cr 20 | Object to convert.} 21 | 22 | \item{...}{(any)\cr 23 | Currently not used.} 24 | 25 | \item{view}{(\code{character()})\cr 26 | See construction argument \code{view} of \code{\link{ResampleResult}}.} 27 | 28 | \item{task}{(\link{Task}).} 29 | 30 | \item{learners}{(list of trained \link{Learner}s).} 31 | 32 | \item{resampling}{(\link{Resampling}).} 33 | 34 | \item{store_backends}{(\code{logical(1)})\cr 35 | If set to \code{FALSE}, the backends of the \link{Task}s provided in \code{data} are 36 | removed.} 37 | } 38 | \value{ 39 | (\link{ResampleResult}). 40 | } 41 | \description{ 42 | Convert object to a \link{ResampleResult}. 43 | 44 | The S3 method for \code{list} expects argument \code{x} to be a list of \link{Prediction} objects and 45 | all other relevant objects (\link{Task}, \link{Learner}s, and instantiated \link{Resampling}) must 46 | be provided, too. 47 | A more flexible way to manually create a \link{ResampleResult} is implemented in \code{\link[=as_result_data]{as_result_data()}}. 48 | } 49 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow of the mlr3 ecosystem v0.4.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: r-cmd-check 19 | 20 | jobs: 21 | r-cmd-check: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | BRANCH_NAME: ${{ github.head_ref || github.ref_name }} 29 | 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | config: 34 | - {os: ubuntu-latest, r: 'devel'} 35 | - {os: ubuntu-latest, r: 'release'} 36 | 37 | steps: 38 | - uses: actions/checkout@v5 39 | 40 | - uses: r-lib/actions/setup-pandoc@v2 41 | 42 | - uses: r-lib/actions/setup-r@v2 43 | with: 44 | r-version: ${{ matrix.config.r }} 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | extra-packages: any::rcmdcheck 49 | needs: check 50 | 51 | - uses: mxschmitt/action-tmate@v3 52 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 53 | with: 54 | limit-access-to-actor: true 55 | 56 | - uses: r-lib/actions/check-r-package@v2 57 | with: 58 | args: 'c("--no-manual", "--as-cran")' 59 | error-on: '"note"' 60 | -------------------------------------------------------------------------------- /R/MeasureOOBError.R: -------------------------------------------------------------------------------- 1 | #' @title Out-of-bag Error Measure 2 | #' 3 | #' @name mlr_measures_oob_error 4 | #' @include Measure.R 5 | #' 6 | #' @description 7 | #' Returns the out-of-bag error of the [Learner] for learners that support it 8 | #' (learners with property `"oob_error"`). 9 | #' Returns `NA` for unsupported learners. 10 | #' 11 | #' @templateVar id oob_error 12 | #' @template measure 13 | #' 14 | #' @template seealso_measure 15 | #' @export 16 | MeasureOOBError = R6Class("MeasureOOBError", 17 | inherit = Measure, 18 | public = list( 19 | #' @description 20 | #' Creates a new instance of this [R6][R6::R6Class] class. 21 | initialize = function() { 22 | super$initialize( 23 | id = "oob_error", 24 | task_type = NA_character_, 25 | properties = c("na_score", "requires_learner", "requires_no_prediction"), 26 | predict_sets = NULL, 27 | predict_type = NA_character_, 28 | range = c(-Inf, Inf), 29 | minimize = TRUE, 30 | label = "Out-of-bag Error", 31 | man = "mlr3::mlr_measures_oob_error" 32 | ) 33 | } 34 | ), 35 | 36 | private = list( 37 | .score = function(prediction, learner, ...) { 38 | learner = learner$base_learner() 39 | if ("oob_error" %nin% learner$properties) { 40 | return(NA_real_) 41 | } 42 | # prefer value captured during training in learner$state to avoid requiring stored models 43 | if (!is.null(learner$state$oob_error)) { 44 | return(learner$state$oob_error) 45 | } 46 | 47 | learner$oob_error() 48 | } 49 | ) 50 | ) 51 | 52 | #' @include mlr_measures.R 53 | mlr_measures$add("oob_error", function() MeasureOOBError$new()) 54 | -------------------------------------------------------------------------------- /man/as_prediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_prediction.R 3 | \name{as_prediction} 4 | \alias{as_prediction} 5 | \alias{as_prediction.Prediction} 6 | \alias{as_prediction.PredictionDataClassif} 7 | \alias{as_prediction.PredictionDataRegr} 8 | \alias{as_predictions} 9 | \alias{as_predictions.list} 10 | \title{Convert to a Prediction} 11 | \usage{ 12 | as_prediction(x, check = FALSE, ...) 13 | 14 | \method{as_prediction}{Prediction}(x, check = FALSE, ...) 15 | 16 | \method{as_prediction}{PredictionDataClassif}(x, check = FALSE, ...) 17 | 18 | \method{as_prediction}{PredictionDataRegr}(x, check = FALSE, ...) 19 | 20 | as_predictions(x, predict_sets = "test", ...) 21 | 22 | \method{as_predictions}{list}(x, predict_sets = "test", ...) 23 | } 24 | \arguments{ 25 | \item{x}{(any)\cr 26 | Object to convert.} 27 | 28 | \item{check}{(\code{logical(1)})\cr 29 | Perform argument checks and type conversions?} 30 | 31 | \item{...}{(any)\cr 32 | Additional arguments.} 33 | 34 | \item{predict_sets}{(\code{character()})\cr 35 | Prediction sets to operate on, used in \code{aggregate()} to extract the matching \code{predict_sets} from the \link{ResampleResult}. 36 | Multiple predict sets are calculated by the respective \link{Learner} during \code{\link[=resample]{resample()}}/\code{\link[=benchmark]{benchmark()}}. 37 | Must be a non-empty subset of \verb{\{"train", "test", "internal_valid"\}}. 38 | If multiple sets are provided, these are first combined to a single prediction object. 39 | Default is \code{"test"}.} 40 | } 41 | \value{ 42 | \link{Prediction}. 43 | } 44 | \description{ 45 | Convert object to a \link{Prediction} or a list of \link{Prediction}. 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureInternalValidScore.R: -------------------------------------------------------------------------------- 1 | test_that("internal valid score", { 2 | task = tsk("iris") 3 | learner = lrn("classif.debug", validate = 0.2)$train(task) 4 | pred = learner$predict(task) 5 | rr = resample(task, learner, rsmp("holdout")) 6 | expect_equal( 7 | rr$score(msr("internal_valid_score", select = "acc"))$acc, 8 | rr$learners[[1]]$internal_valid_scores$acc 9 | ) 10 | expect_equal( 11 | rr$score(msr("internal_valid_score", select = "wrong_name"))$wrong_name, 12 | NA_real_ 13 | ) 14 | rr = resample(task, lrn("classif.rpart"), rsmp("holdout")) 15 | expect_equal( 16 | rr$score(msr("internal_valid_score", select = "acc"))$acc, 17 | NA_real_ 18 | ) 19 | expect_measure(msr("internal_valid_score")) 20 | 21 | # learner that does not have it 22 | m = msr("internal_valid_score", select = "a") 23 | task = tsk("mtcars") 24 | learner = lrn("regr.debug") 25 | learner$train(task) 26 | pred = learner$predict(task) 27 | rr = resample(task, learner, rsmp("holdout")) 28 | expect_equal(rr$score(m)$a, NA_real_) 29 | 30 | task = tsk("iris") 31 | # the first validation score is taken by default 32 | rr = resample(task, lrn("classif.debug", predict_type = "prob", validate = 0.2), rsmp("holdout")) 33 | 34 | m = msr("internal_valid_score") 35 | expect_equal( 36 | rr$score(m)$internal_valid_score, 37 | rr$learners[[1]]$internal_valid_scores[[1L]] 38 | ) 39 | 40 | measure = msr("internal_valid_score") 41 | expect_equal(measure$id, "internal_valid_score") 42 | expect_equal(measure$minimize, NA) 43 | measure = msr("internal_valid_score", select = "acc", minimize = TRUE) 44 | expect_equal(measure$id, "acc") 45 | expect_true(measure$minimize) 46 | }) 47 | -------------------------------------------------------------------------------- /R/ResamplingInsample.R: -------------------------------------------------------------------------------- 1 | #' @title Insample Resampling 2 | #' 3 | #' @name mlr_resamplings_insample 4 | #' @include Resampling.R 5 | #' 6 | #' @description 7 | #' Uses all observations as training and as test set. 8 | #' 9 | #' @templateVar id insample 10 | #' @template resampling 11 | #' 12 | #' @template seealso_resampling 13 | #' @export 14 | #' @examples 15 | #' # Create a task with 10 observations 16 | #' task = tsk("penguins") 17 | #' task$filter(1:10) 18 | #' 19 | #' # Instantiate Resampling 20 | #' insample = rsmp("insample") 21 | #' insample$instantiate(task) 22 | #' 23 | #' # Train set equal to test set: 24 | #' setequal(insample$train_set(1), insample$test_set(1)) 25 | #' 26 | #' # Internal storage: 27 | #' insample$instance # just row ids 28 | ResamplingInsample = R6Class("ResamplingInsample", inherit = Resampling, 29 | public = list( 30 | #' @description 31 | #' Creates a new instance of this [R6][R6::R6Class] class. 32 | initialize = function() { 33 | super$initialize(id = "insample", 34 | label = "Insample Resampling", man = "mlr3::mlr_resamplings_insample") 35 | } 36 | ), 37 | active = list( 38 | #' @template field_iters 39 | iters = function(rhs) { 40 | assert_ro_binding(rhs) 41 | 1L 42 | } 43 | ), 44 | 45 | private = list( 46 | .sample = function(ids, ...) { 47 | ids 48 | }, 49 | 50 | .get_train = function(i) { 51 | self$instance 52 | }, 53 | 54 | .get_test = function(i) { 55 | self$instance 56 | }, 57 | 58 | .combine = function(instances) { 59 | do.call(c, instances) 60 | } 61 | ) 62 | ) 63 | 64 | #' @include mlr_resamplings.R 65 | mlr_resamplings$add("insample", function() ResamplingInsample$new()) 66 | -------------------------------------------------------------------------------- /R/as_measure.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Measure 2 | #' 3 | #' @description 4 | #' Convert object to a [Measure] or a list of [Measure]. 5 | #' 6 | #' @inheritParams as_task 7 | #' @param task_type (`character(1)`)\cr 8 | #' Used if `x` is `NULL` to construct a default measure for the respective task type. 9 | #' The default measures are stored in [`mlr_reflections$default_measures`][mlr_reflections]. 10 | #' 11 | #' @return [Measure]. 12 | #' @export 13 | as_measure = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 14 | UseMethod("as_measure") 15 | } 16 | 17 | #' @export 18 | #' @rdname as_measure 19 | as_measure.NULL = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 20 | assert_empty_ellipsis(...) 21 | default_measures(task_type)[[1L]] 22 | } 23 | 24 | #' @export 25 | #' @rdname as_measure 26 | as_measure.Measure = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 27 | assert_empty_ellipsis(...) 28 | if (isTRUE(clone)) x$clone() else x 29 | } 30 | 31 | #' @export 32 | #' @rdname as_measure 33 | as_measures = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 34 | UseMethod("as_measures") 35 | } 36 | 37 | #' @export 38 | #' @rdname as_measure 39 | as_measures.default = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 40 | list(as_measure(x, task_type = task_type, clone = clone, ...)) 41 | } 42 | 43 | #' @export 44 | #' @rdname as_measure 45 | as_measures.NULL = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 46 | default_measures(task_type) 47 | } 48 | 49 | #' @export 50 | #' @rdname as_measure 51 | as_measures.list = function(x, task_type = NULL, clone = FALSE, ...) { # nolint 52 | lapply(x, as_measure, task_type = NULL, clone = clone, ...) 53 | } 54 | -------------------------------------------------------------------------------- /R/as_prediction.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Prediction 2 | #' 3 | #' @description 4 | #' Convert object to a [Prediction] or a list of [Prediction]. 5 | #' 6 | #' @inheritParams as_prediction_data 7 | #' 8 | #' @return [Prediction]. 9 | #' @export 10 | as_prediction = function(x, check = FALSE, ...) { 11 | if (is.null(x)) return(list()) 12 | 13 | UseMethod("as_prediction") 14 | } 15 | 16 | #' @rdname as_prediction 17 | #' @export 18 | as_prediction.Prediction = function(x, check = FALSE, ...) { # nolint 19 | x 20 | } 21 | 22 | 23 | #' @rdname as_prediction 24 | #' @export 25 | as_prediction.PredictionDataClassif = function(x, check = FALSE, ...) { # nolint 26 | invoke(PredictionClassif$new, check = check, .args = x) 27 | } 28 | 29 | 30 | #' @rdname as_prediction 31 | #' @export 32 | as_prediction.PredictionDataRegr = function(x, check = FALSE, ...) { # nolint 33 | invoke(PredictionRegr$new, check = check, .args = x) 34 | } 35 | 36 | 37 | #' @rdname as_prediction 38 | #' @template param_predict_sets 39 | #' @export 40 | as_predictions = function(x, predict_sets = "test", ...) { 41 | UseMethod("as_predictions") 42 | } 43 | 44 | #' @rdname as_prediction 45 | #' @export 46 | as_predictions.list = function(x, predict_sets = "test", ...) { # nolint 47 | result = replicate(length(x), list()) 48 | ii = lengths(x) > 0L 49 | result[ii] = map(x[ii], function(li) { 50 | assert_list(li, "PredictionData") 51 | li = discard(li[predict_sets], is.null) 52 | if (length(li) == 0L) { 53 | return(list()) 54 | } 55 | 56 | if (length(li) == 1L) { 57 | combined = li[[1L]] 58 | } else { 59 | combined = do.call(c, li) 60 | } 61 | as_prediction(combined, check = FALSE) 62 | }) 63 | result 64 | } 65 | -------------------------------------------------------------------------------- /R/as_task.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Task 2 | #' 3 | #' @description 4 | #' Convert object to a [Task] or a list of [Task]. 5 | #' 6 | #' The function supports: 7 | #' * Converting existing [Task] objects (with optional cloning) 8 | #' * Converting objects from other packages (e.g., `OMLTask` from \CRANpkg{mlr3oml}) 9 | #' * Converting lists of objects to lists of tasks 10 | #' 11 | #' For constructing tasks from data frames, use the dedicated converters: 12 | #' * [as_task_classif()] for classification tasks 13 | #' * [as_task_regr()] for regression tasks 14 | #' * [as_task_unsupervised()] for unsupervised tasks 15 | #' 16 | #' @param x (any)\cr 17 | #' Object to convert. 18 | #' @param ... (any)\cr 19 | #' Additional arguments. 20 | #' @export 21 | as_task = function(x, ...) { 22 | UseMethod("as_task") 23 | } 24 | 25 | #' @export 26 | as_task.default = function(x, ...) { 27 | error_input("No method for class '%s'. To create a task from a `data.frame`, use dedicated converters such as `as_task_classif()` or `as_task_regr()`.", class(x)[1L]) 28 | } 29 | 30 | #' @rdname as_task 31 | #' @param clone (`logical(1)`)\cr 32 | #' If `TRUE`, ensures that the returned object is not the same as the input `x`. 33 | #' @export 34 | as_task.Task = function(x, clone = FALSE, ...) { # nolint 35 | assert_empty_ellipsis(...) 36 | if (isTRUE(clone)) x$clone(deep = TRUE) else x 37 | } 38 | 39 | #' @rdname as_task 40 | #' @export 41 | as_tasks = function(x, ...) { 42 | UseMethod("as_tasks") 43 | } 44 | 45 | #' @rdname as_task 46 | #' @export 47 | as_tasks.default = function(x, ...) { # nolint 48 | list(as_task(x, ...)) 49 | } 50 | 51 | #' @rdname as_task 52 | #' @export 53 | as_tasks.list = function(x, ...) { # nolint 54 | lapply(x, as_task, ...) 55 | } 56 | -------------------------------------------------------------------------------- /R/TaskSupervised.R: -------------------------------------------------------------------------------- 1 | #' @title Supervised Task 2 | #' 3 | #' @include Task.R 4 | #' 5 | #' @description 6 | #' This is the abstract base class for task objects like [TaskClassif] and [TaskRegr]. 7 | #' It extends [Task] with methods to handle a target columns. 8 | #' Supervised tasks for probabilistic regression (including survival analysis) can be 9 | #' found in \CRANpkg{mlr3proba}. 10 | #' 11 | #' @template param_id 12 | #' @template param_task_type 13 | #' @template param_backend 14 | #' @template param_rows 15 | #' @template param_label 16 | #' @template param_extra_args 17 | #' 18 | #' @template seealso_task 19 | #' @keywords internal 20 | #' @export 21 | #' @examples 22 | #' TaskSupervised$new("penguins", task_type = "classif", backend = palmerpenguins::penguins, 23 | #' target = "species") 24 | TaskSupervised = R6Class("TaskSupervised", inherit = Task, 25 | public = list( 26 | 27 | #' @description 28 | #' Creates a new instance of this [R6][R6::R6Class] class. 29 | #' 30 | #' @param target (`character(1)`)\cr 31 | #' Name of the target column. 32 | initialize = function(id, task_type, backend, target, label = NA_character_, extra_args = list()) { 33 | super$initialize(id = id, task_type = task_type, backend = backend, label = label, extra_args = extra_args) 34 | assert_subset(target, self$col_roles$feature) 35 | self$col_roles$target = target 36 | self$col_roles$feature = setdiff(self$col_roles$feature, target) 37 | }, 38 | 39 | #' @description 40 | #' True response for specified `row_ids`. Format depends on the task type. 41 | #' Defaults to all rows with role "use". 42 | truth = function(rows = NULL) { 43 | self$data(rows, cols = self$target_names) 44 | } 45 | ) 46 | ) 47 | -------------------------------------------------------------------------------- /.github/workflows/no-suggest-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow without suggests of the mlr3 ecosystem v0.3.1 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: no-suggest-cmd-check 19 | 20 | jobs: 21 | no-suggest-cmd-check: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | config: 33 | - {os: ubuntu-latest, r: 'release'} 34 | 35 | steps: 36 | - uses: actions/checkout@v5 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: | 47 | any::rcmdcheck 48 | any::testthat 49 | any::knitr 50 | any::rmarkdown 51 | needs: check 52 | dependencies: '"hard"' 53 | cache: false 54 | 55 | - uses: mxschmitt/action-tmate@v3 56 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 57 | with: 58 | limit-access-to-actor: true 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | args: 'c("--no-manual", "--as-cran")' 63 | -------------------------------------------------------------------------------- /R/MeasureBIC.R: -------------------------------------------------------------------------------- 1 | #' @title Bayesian Information Criterion Measure 2 | #' 3 | #' @name mlr_measures_bic 4 | #' @include Measure.R 5 | #' 6 | #' @description 7 | #' Calculates the Bayesian Information Criterion (BIC) which is a 8 | #' trade-off between goodness of fit (measured in terms of 9 | #' log-likelihood) and model complexity (measured in terms of number 10 | #' of included features). 11 | #' Internally, [stats::BIC()] is called. 12 | #' Requires the learner property `"loglik"`, `NA` is returned for unsupported learners. 13 | #' 14 | #' @templateVar id bic 15 | #' @template measure 16 | #' 17 | #' @template seealso_measure 18 | #' @export 19 | MeasureBIC = R6Class("MeasureBIC", 20 | inherit = Measure, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | super$initialize( 26 | id = "bic", 27 | task_type = NA_character_, 28 | properties = c("na_score", "requires_learner", "requires_model", "requires_no_prediction"), 29 | predict_sets = NULL, 30 | predict_type = NA_character_, 31 | minimize = TRUE, 32 | label = "Bayesian Information Criterion", 33 | man = "mlr3::mlr_measures_bic" 34 | ) 35 | } 36 | ), 37 | 38 | private = list( 39 | .score = function(prediction, learner, ...) { 40 | learner = learner$base_learner() 41 | 42 | tryCatch({ 43 | stats::BIC(stats::logLik(learner$model)) 44 | }, error = function(e) { 45 | warning_config("Learner '%s' does not support BIC calculation", learner$id) 46 | NA_real_ 47 | }) 48 | } 49 | ) 50 | ) 51 | 52 | #' @include mlr_measures.R 53 | mlr_measures$add("bic", function() MeasureBIC$new()) 54 | -------------------------------------------------------------------------------- /man-roxygen/param_learner_properties.R: -------------------------------------------------------------------------------- 1 | #' @param properties (`character()`)\cr 2 | #' Set of properties of the [Learner]. 3 | #' Must be a subset of [`mlr_reflections$learner_properties`][mlr_reflections]. 4 | #' The following properties are currently standardized and understood by learners in \CRANpkg{mlr3}: 5 | #' * `"missings"`: The learner can handle missing values in the data. 6 | #' * `"weights"`: The learner supports observation weights. 7 | #' * `"offset"`: The learner can incorporate offset values to adjust predictions. 8 | #' * `"importance"`: The learner supports extraction of importance scores, i.e. comes with an `$importance()` extractor function (see section on optional extractors in [Learner]). 9 | #' * `"selected_features"`: The learner supports extraction of the set of selected features, i.e. comes with a `$selected_features()` extractor function (see section on optional extractors in [Learner]). 10 | #' * `"oob_error"`: The learner supports extraction of estimated out of bag error, i.e. comes with a `oob_error()` extractor function (see section on optional extractors in [Learner]). 11 | #' * `"validation"`: The learner can use a validation task during training. 12 | #' * `"internal_tuning"`: The learner is able to internally optimize hyperparameters (those are also tagged with `"internal_tuning"`). 13 | #' * `"marshal"`: To save learners with this property, you need to call `$marshal()` first. 14 | #' If a learner is in a marshaled state, you call first need to call `$unmarshal()` to use its model, e.g. for prediction. 15 | #' * `"hotstart_forward"`: The learner supports to hotstart a model forward. 16 | #' * `"hotstart_backward"`: The learner supports hotstarting a model backward. 17 | #' * `"featureless": The learner does not use features. 18 | -------------------------------------------------------------------------------- /R/helper_data_table.R: -------------------------------------------------------------------------------- 1 | # extract values from a single column `j` of a data table where the `key` column matches `i` 2 | # tries to avoid the overhead of data.table for small tables 3 | # i must be a single value, use fget_keys for multiple values 4 | # returns all matches 5 | # if `i` has no match in `key`, `NULL` is returned 6 | fget_key = function(tab, i, j, key) { 7 | if (nrow(tab) > 1000L) { 8 | ijoin(tab, i, j, key, mult = "all", nomatch = NULL)[[1L]] 9 | } else { 10 | x = tab[[key]] 11 | if (is.character(x) && is.character(i)) { 12 | tab[[j]][x %chin% i] 13 | } else { 14 | tab[[j]][x %in% i] 15 | } 16 | } 17 | } 18 | 19 | # extract values from a single column `j` of a data table where the `key` column matches `i` 20 | # tries to avoid the overhead of data.table for small tables 21 | # i can be a vector of values 22 | # returns a vector sorted by the order of i 23 | # if the key column is not unique, the first match is returned for each i 24 | # if no-matching elements are found, NA is returned 25 | fget_keys = function(tab, i, j, key) { 26 | if (nrow(tab) > 1000L) { 27 | ijoin(tab, i, j, key, mult = "first", nomatch = NA)[[1L]] 28 | } else { 29 | x = tab[[key]] 30 | tab[[j]][match(i, x)] 31 | } 32 | } 33 | 34 | ijoin = function(tab, .__i__, .__j__, .__key__, nomatch = NULL, mult = "all") { 35 | if (!is.list(.__i__)) { 36 | .__i__ = list(.__i__) 37 | } 38 | tab[.__i__, .__j__, with = FALSE, nomatch = nomatch, on = .__key__, mult = mult] 39 | } 40 | 41 | # updating join: 42 | # replaces values in x with values in y 43 | ujoin = function(x, y, key) { 44 | cn = setdiff(intersect(names(x), names(y)), key) 45 | expr = parse(text = paste0("`:=`(", paste0(sprintf("%1$s=i.%1$s", cn), collapse = ","), ")")) 46 | x[y, eval(expr), on = key][] 47 | } 48 | -------------------------------------------------------------------------------- /R/MeasureRegr.R: -------------------------------------------------------------------------------- 1 | #' @title Regression Measure 2 | #' 3 | #' @include Measure.R 4 | #' 5 | #' @description 6 | #' This measure specializes [Measure] for regression problems: 7 | #' 8 | #' * `task_type` is set to `"regr"`. 9 | #' * Possible values for `predict_type` are `"response"`, `"se"` and `"distr"`. 10 | #' 11 | #' Predefined measures can be found in the [dictionary][mlr3misc::Dictionary] [mlr_measures]. 12 | #' The default measure for regression is [`regr.mse`][mlr_measures_regr.mse]. 13 | #' 14 | #' @template param_id 15 | #' @template param_param_set 16 | #' @template param_range 17 | #' @template param_minimize 18 | #' @template param_average 19 | #' @template param_aggregator 20 | #' @template param_predict_type 21 | #' @template param_measure_properties 22 | #' @template param_predict_sets 23 | #' @template param_task_properties 24 | #' @template param_packages 25 | #' @template param_label 26 | #' @template param_man 27 | #' 28 | #' @template seealso_measure 29 | #' @export 30 | MeasureRegr = R6Class("MeasureRegr", 31 | inherit = Measure, 32 | public = list( 33 | #' @description 34 | #' Creates a new instance of this [R6][R6::R6Class] class. 35 | initialize = function(id, param_set = ps(), range, minimize = NA, average = "macro", aggregator = NULL, properties = character(), predict_type = "response", 36 | predict_sets = "test", task_properties = character(), packages = character(), label = NA_character_, man = NA_character_) { 37 | super$initialize(id, task_type = "regr", param_set = param_set, range = range, minimize = minimize, average = average, aggregator = aggregator, 38 | properties = properties, predict_type = predict_type, predict_sets = predict_sets, 39 | task_properties = task_properties, packages = packages, label = label, man = man) 40 | } 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /R/MeasureClassif.R: -------------------------------------------------------------------------------- 1 | #' @title Classification Measure 2 | #' 3 | #' @include Measure.R 4 | #' 5 | #' @description 6 | #' This measure specializes [Measure] for classification problems: 7 | #' 8 | #' * `task_type` is set to `"classif"`. 9 | #' * Possible values for `predict_type` are `"response"` and `"prob"`. 10 | #' 11 | #' Predefined measures can be found in the [dictionary][mlr3misc::Dictionary] [mlr_measures]. 12 | #' The default measure for classification is [`classif.ce`][mlr_measures_classif.ce]. 13 | #' 14 | #' @template param_id 15 | #' @template param_param_set 16 | #' @template param_range 17 | #' @template param_minimize 18 | #' @template param_average 19 | #' @template param_aggregator 20 | #' @template param_predict_type 21 | #' @template param_measure_properties 22 | #' @template param_predict_sets 23 | #' @template param_task_properties 24 | #' @template param_packages 25 | #' @template param_label 26 | #' @template param_man 27 | #' 28 | #' @template seealso_measure 29 | #' @export 30 | MeasureClassif = R6Class("MeasureClassif", 31 | inherit = Measure, 32 | public = list( 33 | #' @description 34 | #' Creates a new instance of this [R6][R6::R6Class] class. 35 | initialize = function(id, param_set = ps(), range, minimize = NA, average = "macro", aggregator = NULL, properties = character(), predict_type = "response", 36 | predict_sets = "test", task_properties = character(), packages = character(), label = NA_character_, man = NA_character_) { 37 | super$initialize(id, task_type = "classif", param_set = param_set, range = range, minimize = minimize, average = average, aggregator = aggregator, 38 | properties = properties, predict_type = predict_type, predict_sets = predict_sets, 39 | task_properties = task_properties, packages = packages, label = label, man = man) 40 | } 41 | ) 42 | ) 43 | -------------------------------------------------------------------------------- /man/predict.Learner.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict.Learner} 4 | \alias{predict.Learner} 5 | \title{Predict Method for Learners} 6 | \usage{ 7 | \method{predict}{Learner}(object, newdata, predict_type = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{object}{(\link{Learner})\cr 11 | Any \link{Learner}.} 12 | 13 | \item{newdata}{(\code{\link[=data.frame]{data.frame()}})\cr 14 | New data to predict on.} 15 | 16 | \item{predict_type}{(\code{character(1)})\cr 17 | The predict type to return. 18 | Set to \verb{} to retrieve the complete \link{Prediction} object. 19 | If set to \code{NULL} (default), the first predict type for the respective class of the \link{Learner} 20 | as stored in \link{mlr_reflections} is used.} 21 | 22 | \item{...}{(any)\cr 23 | Hyperparameters to pass down to the \link{Learner}.} 24 | } 25 | \description{ 26 | Extends the generic \code{\link[stats:predict]{stats::predict()}} with a method for \link{Learner}. 27 | Note that this function is intended as glue code to be used in third party packages. 28 | We recommend to work with the \link{Learner} directly, i.e. calling \code{learner$predict()} 29 | or \code{learner$predict_newdata()} directly. 30 | 31 | Performs the following steps: 32 | \itemize{ 33 | \item Sets additional hyperparameters passed to this function. 34 | \item Creates a \link{Prediction} object by calling \code{learner$predict_newdata()}. 35 | \item Returns (subset of) \link{Prediction}. 36 | } 37 | } 38 | \examples{ 39 | task = tsk("spam") 40 | 41 | learner = lrn("classif.rpart", predict_type = "prob") 42 | learner$train(task) 43 | predict(learner, task$data(1:3), predict_type = "response") 44 | predict(learner, task$data(1:3), predict_type = "prob") 45 | predict(learner, task$data(1:3), predict_type = "") 46 | } 47 | -------------------------------------------------------------------------------- /R/TaskGeneratorPeak.R: -------------------------------------------------------------------------------- 1 | #' @title Peak Regression Task Generator 2 | #' 3 | #' @name mlr_task_generators_peak 4 | #' @include TaskGenerator.R 5 | #' 6 | #' @description 7 | #' A [TaskGenerator] for the peak task in [mlbench::mlbench.peak()]. 8 | #' 9 | #' @templateVar id peak 10 | #' @template task_generator 11 | #' 12 | #' @template seealso_task_generator 13 | #' @export 14 | #' @examples 15 | #' generator = tgen("peak", d = 5) 16 | #' task = generator$generate(200) 17 | #' str(task$data()) 18 | TaskGeneratorPeak = R6Class( 19 | "TaskGeneratorPeak", 20 | inherit = TaskGenerator, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | ps = ps( 26 | d = p_int(lower = 1, default = 20L) 27 | ) 28 | 29 | super$initialize( 30 | id = "peak", 31 | "regr", 32 | "mlbench", 33 | ps, 34 | label = "Peak Regression", 35 | man = "mlr3::mlr_task_generators_peak" 36 | ) 37 | } 38 | ), 39 | 40 | private = list( 41 | .generate_obj = function(n) { 42 | invoke( 43 | mlbench::mlbench.peak, 44 | n = n, 45 | .args = self$param_set$values, 46 | .opts = allow_partial_matching 47 | ) 48 | }, 49 | 50 | .generate = function(n) { 51 | obj = invoke( 52 | mlbench::mlbench.peak, 53 | n = n, 54 | .args = self$param_set$values 55 | ) 56 | colnames(obj$x) = c( 57 | sprintf("x%0i", seq_len(ncol(obj$x))) 58 | ) 59 | data = insert_named(as.data.table(obj$x), list(y = obj$y)) 60 | TaskRegr$new(sprintf("%s_%i", self$id, n), data, target = "y") 61 | } 62 | ) 63 | ) 64 | 65 | #' @include mlr_task_generators.R 66 | mlr_task_generators$add("peak", function() TaskGeneratorPeak$new()) 67 | -------------------------------------------------------------------------------- /R/MeasureDebug.R: -------------------------------------------------------------------------------- 1 | #' @title Debug Measure for Classification 2 | #' 3 | #' @name mlr_measures_debug_classif 4 | #' @include Measure.R 5 | #' 6 | #' @description 7 | #' This measure returns the number of observations in the [PredictionClassif] object. 8 | #' Its main purpose is debugging. 9 | #' The parameter `na_ratio` (`numeric(1)`) controls the ratio of scores which randomly 10 | #' are set to `NA`, between 0 (default) and 1. 11 | #' 12 | #' @templateVar id debug_classif 13 | #' @template measure 14 | #' 15 | #' @template seealso_measure 16 | #' @export 17 | #' @examples 18 | #' task = tsk("wine") 19 | #' learner = lrn("classif.featureless") 20 | #' measure = msr("debug_classif", na_ratio = 0.5) 21 | #' rr = resample(task, learner, rsmp("cv", folds = 5)) 22 | #' rr$score(measure) 23 | MeasureDebugClassif = R6Class("MeasureDebugClassif", 24 | inherit = Measure, 25 | public = list( 26 | #' @description 27 | #' Creates a new instance of this [R6][R6::R6Class] class. 28 | initialize = function() { 29 | param_set = ps(na_ratio = p_dbl(0, 1, tags = "required")) 30 | param_set$set_values(na_ratio = 0) 31 | super$initialize( 32 | id = "debug_classif", 33 | param_set = param_set, 34 | predict_type = "response", 35 | range = c(0, Inf), 36 | properties = "na_score", 37 | label = "Debug Classification Measure", 38 | man = "mlr3::mlr_measures_debug_classif" 39 | ) 40 | } 41 | ), 42 | 43 | private = list( 44 | .score = function(prediction, ...) { 45 | na_ratio = self$param_set$get_values()$na_ratio 46 | if (na_ratio > runif(1L)) { 47 | return(NA_integer_) 48 | } 49 | length(prediction$row_ids) 50 | } 51 | ) 52 | ) 53 | 54 | #' @include mlr_measures.R 55 | mlr_measures$add("debug_classif", function() MeasureDebugClassif$new()) 56 | -------------------------------------------------------------------------------- /.github/workflows/dev-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # dev cmd check workflow of the mlr3 ecosystem v0.4.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: dev-check 19 | 20 | jobs: 21 | check-package: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.dev-package }} 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | BRANCH_NAME: ${{ github.head_ref || github.ref_name }} 29 | 30 | strategy: 31 | fail-fast: false 32 | matrix: 33 | config: 34 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3misc'} 35 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/paradox'} 36 | 37 | steps: 38 | - uses: actions/checkout@v5 39 | 40 | - uses: r-lib/actions/setup-pandoc@v2 41 | 42 | - uses: r-lib/actions/setup-r@v2 43 | with: 44 | r-version: ${{ matrix.config.r }} 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | extra-packages: any::rcmdcheck 49 | needs: check 50 | 51 | - name: Install dev versions 52 | run: pak::pkg_install('${{ matrix.config.dev-package }}') 53 | shell: Rscript {0} 54 | 55 | - uses: mxschmitt/action-tmate@v3 56 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 57 | with: 58 | limit-access-to-actor: true 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | args: 'c("--no-manual", "--as-cran")' 63 | error-on: '"note"' 64 | -------------------------------------------------------------------------------- /R/mlr_task_generators.R: -------------------------------------------------------------------------------- 1 | #' @title Dictionary of Task Generators 2 | #' 3 | #' @usage NULL 4 | #' @format [R6::R6Class] object inheriting from [mlr3misc::Dictionary]. 5 | #' 6 | #' @description 7 | #' A simple [mlr3misc::Dictionary] storing objects of class [TaskGenerator]. 8 | #' Each task generator has an associated help page, see `mlr_task_generators_[id]`. 9 | #' 10 | #' This dictionary can get populated with additional task generators by add-on packages. 11 | #' 12 | #' For a more convenient way to retrieve and construct task generators, see [tgen()]/[tgens()]. 13 | #' 14 | #' @section Methods: 15 | #' See [mlr3misc::Dictionary]. 16 | #' 17 | #' @section S3 methods: 18 | #' * `as.data.table(dict, ..., objects = FALSE)`\cr 19 | #' [mlr3misc::Dictionary] -> [data.table::data.table()]\cr 20 | #' Returns a [data.table::data.table()] with fields "key", "label", "task_type", "params", and "packages" as columns. 21 | #' If `objects` is set to `TRUE`, the constructed objects are returned in the list column named `object`. 22 | #' 23 | #' @family Dictionary 24 | #' @family TaskGenerator 25 | #' @seealso 26 | #' Sugar functions: [tgen()], [tgens()] 27 | #' @export 28 | #' @examples 29 | #' mlr_task_generators$get("smiley") 30 | #' tgen("2dnormals") 31 | mlr_task_generators = R6Class("DictionaryTaskGenerator", 32 | inherit = Dictionary, 33 | cloneable = FALSE 34 | )$new() 35 | 36 | #' @export 37 | as.data.table.DictionaryTaskGenerator = function(x, ..., objects = FALSE) { 38 | assert_flag(objects) 39 | 40 | setkeyv(map_dtr(x$keys(), function(key) { 41 | g = withCallingHandlers(x$get(key), 42 | packageNotFoundWarning = function(w) invokeRestart("muffleWarning")) 43 | insert_named( 44 | list(key = key, label = g$label, task_type = g$task_type, params = list(g$param_set$ids()), packages = list(g$packages)), 45 | if (objects) list(object = list(g)) 46 | ) 47 | }, .fill = TRUE), "key")[] 48 | } 49 | -------------------------------------------------------------------------------- /man/uhash.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/BenchmarkResult.R 3 | \name{uhashes} 4 | \alias{uhashes} 5 | \alias{uhash} 6 | \title{Obtain specific uhashes from a \link{BenchmarkResult}} 7 | \usage{ 8 | uhashes(bmr, learner_ids = NULL, task_ids = NULL, resampling_ids = NULL) 9 | 10 | uhash(bmr, learner_id = NULL, task_id = NULL, resampling_id = NULL) 11 | } 12 | \arguments{ 13 | \item{bmr}{(\code{BenchmarkResult})\cr 14 | Benchmark result.} 15 | 16 | \item{learner_ids}{(\code{character()} | \code{NULL})\cr 17 | Learner IDs.} 18 | 19 | \item{task_ids}{(\code{character()} | \code{NULL})\cr 20 | Task IDs.} 21 | 22 | \item{resampling_ids}{(\code{character()} | \code{NULL})\cr 23 | Resampling IDs.} 24 | 25 | \item{learner_id}{(\code{character(1)} | \code{NULL})\cr 26 | Learner ID.} 27 | 28 | \item{task_id}{(\code{character(1)} | \code{NULL})\cr 29 | Task ID.} 30 | 31 | \item{resampling_id}{(\code{character(1)} | \code{NULL})\cr 32 | Resampling ID.} 33 | } 34 | \description{ 35 | In a \code{\link{BenchmarkResult}}, each \link{ResampleResult} is \emph{u}niquely identified by a \emph{hash} (\emph{uhash}). 36 | Operations that select specific \link{ResampleResult}s from a \link{BenchmarkResult} operate using 37 | these hashes. 38 | This function allows to obtain uhashes for specific learners, tasks, and resamplings. 39 | 40 | If you want more control, you can also directly obtain the uhash table from the \code{\link{BenchmarkResult}} 41 | via the field \verb{$uhash_table}. 42 | } 43 | \examples{ 44 | design = benchmark_grid( 45 | tsks(c("sonar", "iris")), 46 | lrns(c("classif.debug", "classif.featureless", "classif.rpart")), 47 | rsmp("holdout") 48 | ) 49 | bmr = benchmark(design) 50 | bmr 51 | bmr$uhashes 52 | uhash(bmr, learner_id = "classif.debug", task_id = "sonar", resampling_id = "holdout") 53 | uhashes(bmr, learner_ids = c("classif.debug", "classif.featureless")) 54 | } 55 | -------------------------------------------------------------------------------- /R/MeasureAIC.R: -------------------------------------------------------------------------------- 1 | #' @title Akaike Information Criterion Measure 2 | #' 3 | #' @name mlr_measures_aic 4 | #' @include Measure.R 5 | #' 6 | #' @description 7 | #' Calculates the Akaike Information Criterion (AIC) which is a 8 | #' trade-off between goodness of fit (measured in terms of 9 | #' log-likelihood) and model complexity (measured in terms of number 10 | #' of included features). 11 | #' Internally, [stats::AIC()] is called with parameter `k` (defaulting to 2). 12 | #' Requires the learner property `"loglik"`, `NA` is returned for unsupported learners. 13 | #' 14 | #' @templateVar id aic 15 | #' @template measure 16 | #' 17 | #' @template seealso_measure 18 | #' @export 19 | MeasureAIC = R6Class("MeasureAIC", 20 | inherit = Measure, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps(k = p_int(lower = 0L)) 26 | super$initialize( 27 | id = "aic", 28 | task_type = NA_character_, 29 | param_set = param_set, 30 | predict_sets = NULL, 31 | properties = c("na_score", "requires_learner", "requires_model", "requires_no_prediction"), 32 | predict_type = NA_character_, 33 | minimize = TRUE, 34 | label = "Akaike Information Criterion", 35 | man = "mlr3::mlr_measures_aic" 36 | ) 37 | } 38 | ), 39 | 40 | private = list( 41 | .score = function(prediction, learner, ...) { 42 | learner = learner$base_learner() 43 | k = self$param_set$values$k %??% 2L 44 | 45 | tryCatch({ 46 | stats::AIC(stats::logLik(learner$model), k = k) 47 | }, error = function(e) { 48 | warning_config("Learner '%s' does not support AIC calculation", learner$id) 49 | NA_real_ 50 | }) 51 | } 52 | ) 53 | ) 54 | 55 | #' @include mlr_measures.R 56 | mlr_measures$add("aic", function() MeasureAIC$new()) 57 | -------------------------------------------------------------------------------- /R/helper_hashes.R: -------------------------------------------------------------------------------- 1 | hashes = function(x) { 2 | map_chr(unname(x), "hash") 3 | } 4 | 5 | phashes = function(x) { 6 | map_chr(unname(x), "phash") 7 | } 8 | 9 | #' @description 10 | #' Calculate task hashes of resampling iterations. 11 | #' 12 | #' @param task ([Task]). 13 | #' @param resampling ([Resampling]). 14 | #' @param learner ([Learner])\cr 15 | #' Because we only set the validation task for learners that use it, a learner can be passed here to 16 | #' calculate the hashes accordingly, i.e. ignore the test ids when the learner does not have the 17 | #' 'validation' property but otherwise include them. 18 | #' 19 | #' @return (`character()`). 20 | #' @noRd 21 | resampling_task_hashes = function(task, resampling, learner = NULL) { 22 | # validation task is set on the worker 23 | learner_does_validation = !is.null(get0("validate", learner)) 24 | map_chr(seq_len(resampling$iters), function(i) { 25 | train_set = resampling$train_set(i) 26 | test_set = if (learner_does_validation) resampling$test_set(i) 27 | task_hash(task, train_set, test_set) 28 | }) 29 | } 30 | 31 | task_hash = function(task, use_ids, test_ids = NULL, ignore_internal_valid_task = FALSE) { 32 | # order matters: we first check for test_ids and then for the internal_valid_task 33 | internal_valid_task_hash = if (!is.null(test_ids)) { 34 | # this does the same as 35 | # task$internal_valid_task = test_ids 36 | # $internal_valid_task$hash 37 | # but avoids the deep clone 38 | task_hash(task, use_ids = test_ids, test_ids = NULL, ignore_internal_valid_task = TRUE) 39 | } else if (!ignore_internal_valid_task) { 40 | task$internal_valid_task$hash 41 | } 42 | 43 | calculate_hash( 44 | class(task), 45 | task$id, 46 | task$backend$hash, 47 | task$col_info, 48 | use_ids, 49 | task$col_roles, 50 | get_private(task)$.properties, 51 | internal_valid_task_hash, 52 | task$characteristics) 53 | } 54 | -------------------------------------------------------------------------------- /R/mlr_resamplings.R: -------------------------------------------------------------------------------- 1 | #' @title Dictionary of Resampling Strategies 2 | #' 3 | #' @usage NULL 4 | #' @format [R6::R6Class] object inheriting from [mlr3misc::Dictionary]. 5 | #' 6 | #' @description 7 | #' A simple [mlr3misc::Dictionary] storing objects of class [Resampling]. 8 | #' Each resampling has an associated help page, see `mlr_resamplings_[id]`. 9 | #' 10 | #' This dictionary can get populated with additional resampling strategies by add-on packages. 11 | #' 12 | #' For a more convenient way to retrieve and construct resampling strategies, see [rsmp()]/[rsmps()]. 13 | #' 14 | #' @section Methods: 15 | #' See [mlr3misc::Dictionary]. 16 | #' 17 | #' @section S3 methods: 18 | #' * `as.data.table(dict, ..., objects = FALSE)`\cr 19 | #' [mlr3misc::Dictionary] -> [data.table::data.table()]\cr 20 | #' Returns a [data.table::data.table()] with columns "key", "label", "params", and "iters". 21 | #' If `objects` is set to `TRUE`, the constructed objects are returned in the list column named `object`. 22 | #' 23 | #' @family Dictionary 24 | #' @family Resampling 25 | #' @seealso 26 | #' Sugar functions: [rsmp()], [rsmps()] 27 | #' @export 28 | #' @examples 29 | #' as.data.table(mlr_resamplings) 30 | #' mlr_resamplings$get("cv") 31 | #' rsmp("subsampling") 32 | mlr_resamplings = R6Class("DictionaryResampling", 33 | inherit = Dictionary, 34 | cloneable = FALSE, 35 | )$new() 36 | 37 | #' @export 38 | as.data.table.DictionaryResampling = function(x, ..., objects = FALSE) { # nolint 39 | assert_flag(objects) 40 | 41 | setkeyv(map_dtr(x$keys(), function(key) { 42 | r = tryCatch(x$get(key), 43 | missingDefaultError = function(e) NULL) 44 | if (is.null(r)) { 45 | return(list(key = key)) 46 | } 47 | 48 | insert_named( 49 | list(key = key, label = r$label, params = list(r$param_set$ids()), iters = r$iters), 50 | if (objects) list(object = list(r)) 51 | ) 52 | }, .fill = TRUE), "key")[] 53 | } 54 | -------------------------------------------------------------------------------- /R/TaskGeneratorXor.R: -------------------------------------------------------------------------------- 1 | #' @title XOR Classification Task Generator 2 | #' 3 | #' @name mlr_task_generators_xor 4 | #' @include TaskGenerator.R 5 | #' 6 | #' @description 7 | #' A [TaskGenerator] for the xor task in [mlbench::mlbench.xor()]. 8 | #' 9 | #' @templateVar id xor 10 | #' @template task_generator 11 | #' 12 | #' @template seealso_task_generator 13 | #' @export 14 | #' @examples 15 | #' generator = tgen("xor") 16 | #' plot(generator, n = 200) 17 | #' 18 | #' task = generator$generate(200) 19 | #' str(task$data()) 20 | TaskGeneratorXor = R6Class("TaskGeneratorXor", 21 | inherit = TaskGenerator, 22 | public = list( 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | ps = ps( 27 | d = p_int(1L, default = 1L) 28 | ) 29 | 30 | super$initialize(id = "xor", "classif", "mlbench", ps, 31 | label = "XOR Classification", man = "mlr3::mlr_task_generators_xor") 32 | }, 33 | 34 | #' @description 35 | #' Creates a simple plot of generated data. 36 | #' @param n (`integer(1)`)\cr 37 | #' Number of samples to draw for the plot. Default is 200. 38 | #' @param pch (`integer(1)`)\cr 39 | #' Point char. Passed to [plot()]. 40 | #' @param ... (any)\cr 41 | #' Additional arguments passed to [plot()]. 42 | plot = function(n = 200L, pch = 19L, ...) { 43 | plot(private$.generate_obj(n), pch = pch, ...) 44 | } 45 | ), 46 | 47 | private = list( 48 | .generate_obj = function(n) { 49 | invoke(mlbench::mlbench.xor, n = n, .args = self$param_set$values, .opts = allow_partial_matching) 50 | }, 51 | 52 | .generate = function(n) { 53 | obj = private$.generate_obj(n) 54 | TaskClassif$new(sprintf("%s_%i", self$id, n), convert_mlbench(obj), target = "y") 55 | } 56 | ) 57 | ) 58 | 59 | #' @include mlr_task_generators.R 60 | mlr_task_generators$add("xor", function() TaskGeneratorXor$new()) 61 | -------------------------------------------------------------------------------- /inst/testthat/helper_misc.R: -------------------------------------------------------------------------------- 1 | with_seed = function(seed, expr) { 2 | old_seed = get0(".Random.seed", globalenv(), mode = "integer", inherits = FALSE) 3 | if (is.null(old_seed)) { 4 | runif(1L) 5 | old_seed = get0(".Random.seed", globalenv(), mode = "integer", inherits = FALSE) 6 | } 7 | 8 | on.exit(assign(".Random.seed", old_seed, globalenv()), add = TRUE) 9 | set.seed(seed) 10 | force(expr) 11 | } 12 | 13 | with_future = function(backend, expr, ...) { 14 | requireNamespace("future") 15 | oplan = force(future::plan(backend, ...)) 16 | on.exit(future::plan(oplan), add = TRUE) 17 | force(expr) 18 | } 19 | 20 | with_mirai = function(expr, compute, seed = NULL) { 21 | requireNamespace("mirai") 22 | mirai::daemons(1, .compute = compute, seed = seed) 23 | on.exit(mirai::daemons(0, .compute = compute), add = TRUE) 24 | force(expr) 25 | expect_true(mirai::status(.compute = compute)$mirai["completed"] > 0) 26 | } 27 | 28 | private = function(x) { 29 | x[[".__enclos_env__"]][["private"]] 30 | } 31 | 32 | 33 | iris_weights_learner = TaskClassif$new("iris_weights_learner", as_data_backend(cbind(datasets::iris, data.frame(w = rep(c(1, 10, 100), each = 50)))), target = "Species") 34 | iris_weights_learner$set_col_roles("w", "weights_learner") 35 | 36 | iris_weights_measure = TaskClassif$new("iris_weights_measure", as_data_backend(cbind(datasets::iris, data.frame(w = rep(c(1, 10, 100), each = 50)))), target = "Species") 37 | iris_weights_measure$set_col_roles("w", "weights_measure") 38 | 39 | cars_weights_learner = TaskRegr$new("cars_weights_learner", as_data_backend(cbind(datasets::cars, data.frame(w = rep(c(1, 10), each = 25)))), target = "dist") 40 | cars_weights_learner$set_col_roles("w", "weights_learner") 41 | 42 | cars_weights_measure = TaskRegr$new("cars_weights_measure", as_data_backend(cbind(datasets::cars, data.frame(w = rep(c(1, 10), each = 25)))), target = "dist") 43 | cars_weights_measure$set_col_roles("w", "weights_measure") 44 | 45 | -------------------------------------------------------------------------------- /man/set_threads.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_threads.R 3 | \name{set_threads} 4 | \alias{set_threads} 5 | \alias{set_threads.default} 6 | \alias{set_threads.R6} 7 | \alias{set_threads.list} 8 | \title{Set the Number of Threads} 9 | \usage{ 10 | set_threads(x, n = availableCores(), ...) 11 | 12 | \method{set_threads}{default}(x, n = availableCores(), ...) 13 | 14 | \method{set_threads}{R6}(x, n = availableCores(), ...) 15 | 16 | \method{set_threads}{list}(x, n = availableCores(), ...) 17 | } 18 | \arguments{ 19 | \item{x}{(any)\cr 20 | Object to set threads for, e.g. a \link{Learner}. 21 | This object is modified in-place.} 22 | 23 | \item{n}{(\code{integer(1)})\cr 24 | Number of threads to use. Defaults to \code{\link[parallelly:availableCores]{parallelly::availableCores()}}.} 25 | 26 | \item{...}{(any)\cr 27 | Additional arguments.} 28 | } 29 | \value{ 30 | Same object as input \code{x} (changed in-place), 31 | with possibly updated parameter values. 32 | } 33 | \description{ 34 | Control the parallelism via threading while calling external packages from \CRANpkg{mlr3}. 35 | 36 | For example, the random forest implementation in package \CRANpkg{ranger} (connected 37 | via \CRANpkg{mlr3learners}) supports threading via OpenMP. 38 | The number of threads to use can be set via hyperparameter \code{num.threads}, and 39 | defaults to 1. By calling \code{set_threads(x, 4)} with \code{x} being a ranger learner, the 40 | hyperparameter is changed so that 4 cores are used. 41 | 42 | If the object \code{x} does not support threading, \code{x} is returned as-is. 43 | If applied to a list, recurses through all list elements. 44 | 45 | Note that threading is incompatible with other parallelization techniques such as forking 46 | via the \link[future:plan]{future::plan} \link[future:multicore]{future::multicore}. For this reason all learners connected to \CRANpkg{mlr3} 47 | have threading disabled in their defaults. 48 | } 49 | -------------------------------------------------------------------------------- /R/TaskClassif_german_credit.R: -------------------------------------------------------------------------------- 1 | #' @title German Credit Classification Task 2 | #' 3 | #' @name mlr_tasks_german_credit 4 | #' @format [R6::R6Class] inheriting from [TaskClassif]. 5 | #' @include mlr_tasks.R 6 | #' 7 | #' @description 8 | #' A classification task for the German credit data set. 9 | #' The aim is to predict creditworthiness, labeled as "good" and "bad". 10 | #' Positive class is set to label "good". 11 | #' 12 | #' See example for the creation of a [MeasureClassifCosts] as described misclassification costs. 13 | #' 14 | #' @templateVar id german_credit 15 | #' @template task 16 | #' 17 | #' @source 18 | #' Data set originally published on [UCI](http://archive.ics.uci.edu/dataset/144/statlog+german+credit+data). 19 | #' This is the preprocessed version taken from package \CRANpkg{rchallenge} with 20 | #' factors instead of dummy variables, and corrected as proposed by Ulrike 21 | #' Grömping. 22 | #' 23 | #' Donor: 24 | #' Professor Dr. Hans Hofmann\cr 25 | #' Institut für Statistik und Ökonometrie\cr 26 | #' Universität Hamburg\cr 27 | #' FB Wirtschaftswissenschaften\cr 28 | #' Von-Melle-Park 5\cr 29 | #' 2000 Hamburg 13 30 | #' 31 | #' @references 32 | #' `r format_bib("gromping_2019")` 33 | #' 34 | #' @template seealso_task 35 | #' @examples 36 | #' task = tsk("german_credit") 37 | #' costs = matrix(c(0, 1, 5, 0), nrow = 2) 38 | #' dimnames(costs) = list(predicted = task$class_names, truth = task$class_names) 39 | #' measure = msr("classif.costs", id = "german_credit_costs", costs = costs) 40 | #' print(measure) 41 | NULL 42 | 43 | load_task_german_credit = function(id = "german_credit") { 44 | b = as_data_backend(readRDS(system.file("extdata", "german_credit.rds", package = "mlr3"))) 45 | task = TaskClassif$new(id, b, target = "credit_risk", positive = "good", 46 | label = "German Credit") 47 | b$hash = task$man = "mlr3::mlr_tasks_german_credit" 48 | task 49 | } 50 | 51 | #' @include mlr_tasks.R 52 | mlr_tasks$add("german_credit", load_task_german_credit) 53 | -------------------------------------------------------------------------------- /R/as_prediction_data.R: -------------------------------------------------------------------------------- 1 | #' @title PredictionData 2 | #' 3 | #' @description 4 | #' Convert object to a [PredictionData] or a list of [PredictionData]. 5 | #' 6 | #' @inheritParams as_task 7 | #' @param task ([Task]). 8 | #' @template param_row_ids 9 | #' @param check (`logical(1)`)\cr 10 | #' Perform argument checks and type conversions? 11 | #' @param ... (any)\cr 12 | #' Additional arguments. 13 | #' 14 | #' @return [PredictionData]. 15 | #' @export 16 | as_prediction_data = function(x, task, row_ids = task$row_ids, check = TRUE, ...) { 17 | UseMethod("as_prediction_data") 18 | } 19 | 20 | #' @rdname as_prediction_data 21 | #' @export 22 | as_prediction_data.Prediction = function(x, task, row_ids = task$row_ids, check = TRUE, ...) { # nolint 23 | x$data 24 | } 25 | 26 | #' @rdname as_prediction_data 27 | #' @export 28 | as_prediction_data.PredictionData = function(x, task, row_ids = task$row_ids, check = TRUE, ...) { # nolint 29 | x 30 | } 31 | 32 | #' @rdname as_prediction_data 33 | #' @param train_task ([Task])\cr 34 | #' Task used for training the learner. 35 | #' @export 36 | as_prediction_data.list = function(x, task, row_ids = task$row_ids, check = TRUE, ..., train_task) { # nolint 37 | assert_list(x, names = "unique") 38 | predict_types = names(mlr_reflections$learner_predict_types[[task$task_type]]) 39 | if (!is.null(predict_types)) predict_types = c(predict_types, "extra") 40 | assert_names(names(x), subset.of = predict_types) 41 | 42 | x$row_ids = row_ids 43 | if (inherits(task, "TaskSupervised")) { 44 | x$truth = task$truth(row_ids) 45 | } 46 | 47 | if ("weights_measure" %chin% task$properties) { 48 | x$weights = task$weights_measure[list(row_id = row_ids), on = "row_id", "weight"][[1L]] 49 | } 50 | 51 | task = if (task$task_type == "unsupervised") train_task else task 52 | pdata = new_prediction_data(x, task$task_type) 53 | if (check) { 54 | pdata = check_prediction_data(pdata, train_task = task) 55 | } 56 | 57 | pdata 58 | } 59 | -------------------------------------------------------------------------------- /R/as_prediction_classif.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Classification Prediction 2 | #' 3 | #' @description 4 | #' Convert object to a [PredictionClassif]. 5 | #' 6 | #' @inheritParams as_prediction 7 | #' 8 | #' @return [PredictionClassif]. 9 | #' @export 10 | #' @examples 11 | #' # create a prediction object 12 | #' task = tsk("penguins") 13 | #' learner = lrn("classif.rpart", predict_type = "prob") 14 | #' learner$train(task) 15 | #' p = learner$predict(task) 16 | #' 17 | #' # convert to a data.table 18 | #' tab = as.data.table(p) 19 | #' 20 | #' # convert back to a Prediction 21 | #' as_prediction_classif(tab) 22 | #' 23 | #' # split data.table into a list of data.tables 24 | #' tabs = split(tab, tab$truth) 25 | #' 26 | #' # convert back to list of predictions 27 | #' preds = lapply(tabs, as_prediction_classif) 28 | #' 29 | #' # calculate performance in each group 30 | #' sapply(preds, function(p) p$score()) 31 | as_prediction_classif = function(x, ...) { 32 | UseMethod("as_prediction_classif") 33 | } 34 | 35 | 36 | #' @rdname as_prediction_classif 37 | #' @export 38 | as_prediction_classif.PredictionClassif = function(x, ...) { # nolint 39 | x 40 | } 41 | 42 | 43 | #' @rdname as_prediction_classif 44 | #' @export 45 | as_prediction_classif.data.frame = function(x, ...) { # nolint 46 | assert_names(names(x), must.include = c("row_ids", "truth", "response")) 47 | prob_cols = setdiff(names(x), c("row_ids", "truth", "response", "weights")) 48 | if (!all(startsWith(prob_cols, "prob."))) { 49 | error_input("Table may only contain columns 'row_ids', 'truth', 'response', 'weights' as well as columns prefixed with 'prob.' for class probabilities") 50 | } 51 | 52 | x = as.data.table(x) 53 | if (length(prob_cols)) { 54 | prob = as.matrix(x[, prob_cols, with = FALSE]) 55 | colnames(prob) = substr(colnames(prob), 6L, nchar(colnames(prob))) 56 | } else { 57 | prob = NULL 58 | } 59 | 60 | invoke(PredictionClassif$new, prob = prob, .args = x[, -prob_cols, with = FALSE]) 61 | } 62 | -------------------------------------------------------------------------------- /R/partition.R: -------------------------------------------------------------------------------- 1 | #' @title Manually Partition into Training, Test and Validation Set 2 | #' 3 | #' @description 4 | #' Creates a split of the row ids of a [Task] into a training and a test set, and optionally a validation set. 5 | #' 6 | #' @param task ([Task])\cr 7 | #' Task to operate on. 8 | #' @param ratio (`numeric()`)\cr 9 | #' Ratio of observations to put into the training set. 10 | #' If a 2 element vector is provided, the first element is the ratio for the training set, the second element is the ratio for the test set. 11 | #' The validation set will contain the remaining observations. 12 | #' @export 13 | #' @examples 14 | #' # regression task partitioned into training and test set 15 | #' task = tsk("california_housing") 16 | #' split = partition(task, ratio = 0.5) 17 | #' data = data.frame( 18 | #' y = c(task$truth(split$train), task$truth(split$test)), 19 | #' split = rep(c("train", "predict"), lengths(split[c("train", "test")])) 20 | #' ) 21 | #' boxplot(y ~ split, data = data) 22 | #' 23 | #' # classification task partitioned into training, test and validation set 24 | #' task = tsk("pima") 25 | #' split = partition(task, c(0.66, 0.14)) 26 | partition = function(task, ratio = 0.67) { 27 | assert_numeric(ratio, min.len = 1L, max.len = 2L) 28 | UseMethod("partition") 29 | } 30 | 31 | #' @rdname partition 32 | #' @export 33 | partition.Task = function(task, ratio = 0.67) { 34 | task = task$clone(deep = TRUE) 35 | if (sum(ratio) >= 1) { 36 | error_input("Sum of 'ratio' must be smaller than 1") 37 | } 38 | 39 | if (length(ratio) == 1L) { 40 | ratio[2L] = 1 - ratio 41 | } else { 42 | ratio[3L] = 1 - (ratio[1L] + ratio[2L]) 43 | } 44 | 45 | r1 = rsmp("holdout", ratio = ratio[1L])$instantiate(task) 46 | task$row_roles$use = r1$test_set(1L) 47 | r2 = rsmp("holdout", ratio = ratio[2L] / (1 - ratio[1L]))$instantiate(task) 48 | 49 | list( 50 | train = r1$train_set(1L), 51 | test = r2$train_set(1L), 52 | validation = r2$test_set(1L) 53 | ) 54 | } 55 | -------------------------------------------------------------------------------- /R/TaskRegr.R: -------------------------------------------------------------------------------- 1 | #' @title Regression Task 2 | #' 3 | #' @include TaskSupervised.R 4 | #' 5 | #' @description 6 | #' This task specializes [Task] and [TaskSupervised] for regression problems. 7 | #' The target column is assumed to be numeric. 8 | #' The `task_type` is set to `"regr"`. 9 | #' 10 | #' It is recommended to use [as_task_regr()] for construction. 11 | #' Predefined tasks are stored in the [dictionary][mlr3misc::Dictionary] [mlr_tasks]. 12 | #' 13 | #' @template param_rows 14 | #' @template param_id 15 | #' @template param_backend 16 | #' 17 | #' @template seealso_task 18 | #' @export 19 | #' @examples 20 | #' task = as_task_regr(palmerpenguins::penguins, target = "bill_length_mm") 21 | #' task$task_type 22 | #' task$formula() 23 | #' task$truth() 24 | #' task$data(rows = 1:3, cols = task$feature_names[1:2]) 25 | TaskRegr = R6Class("TaskRegr", 26 | inherit = TaskSupervised, 27 | public = list( 28 | #' @description 29 | #' Creates a new instance of this [R6][R6::R6Class] class. 30 | #' The function [as_task_regr()] provides an alternative way to construct regression tasks. 31 | #' 32 | #' @template param_target 33 | #' @template param_label 34 | #' @template param_extra_args 35 | initialize = function(id, backend, target, label = NA_character_, extra_args = list()) { 36 | assert_string(target) 37 | super$initialize( 38 | id = id, task_type = "regr", backend = backend, 39 | target = target, label = label, extra_args = extra_args) 40 | 41 | type = fget_key(self$col_info, i = target, j = "type", key = "id") 42 | if (type %nin% c("integer", "numeric")) { 43 | error_input("Target column '%s' must be numeric", target) 44 | } 45 | }, 46 | 47 | #' @description 48 | #' True response for specified `row_ids`. Format depends on the task type. 49 | #' Defaults to all rows with role "use". 50 | #' @return `numeric()`. 51 | truth = function(rows = NULL) { 52 | super$truth(rows)[[1L]] 53 | } 54 | ) 55 | ) 56 | -------------------------------------------------------------------------------- /R/as_task_unsupervised.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to an Unsupervised Task 2 | #' 3 | #' @description 4 | #' Convert object to a [TaskUnsupervised] or a list of [TaskUnsupervised]. 5 | #' 6 | #' @inheritParams as_task 7 | #' 8 | #' @export 9 | as_task_unsupervised = function(x, ...) { 10 | UseMethod("as_task_unsupervised") 11 | } 12 | 13 | #' @rdname as_task_unsupervised 14 | #' @export 15 | as_task_unsupervised.Task = function(x, clone = FALSE, ...) { # nolint 16 | if (clone) x$clone() else x 17 | } 18 | 19 | #' @rdname as_task_unsupervised 20 | #' @param id (`character(1)`)\cr 21 | #' Id for the new task. 22 | #' Defaults to the (deparsed and substituted) name of the data argument. 23 | #' @template param_label 24 | #' @export 25 | as_task_unsupervised.data.frame = function(x, id = deparse1(substitute(x)), label = NA_character_, ...) { # nolint 26 | force(id) 27 | 28 | ii = which(map_lgl(keep(x, is.double), anyInfinite)) 29 | if (length(ii)) { 30 | warning_input("Detected columns with unsupported Inf values in data: %s", str_collapse(names(ii))) 31 | } 32 | 33 | TaskUnsupervised$new(id = id, backend = x, label = label) 34 | } 35 | 36 | #' @rdname as_task_unsupervised 37 | #' @export 38 | as_task_unsupervised.DataBackend = function(x, id = deparse1(substitute(x)), label = NA_character_, ...) { # nolint 39 | force(id) 40 | 41 | TaskUnsupervised$new(id = id, backend = x, label = label) 42 | } 43 | 44 | #' @rdname as_task_unsupervised 45 | #' @export 46 | as_tasks_unsupervised = function(x, ...) { 47 | UseMethod("as_tasks") 48 | } 49 | 50 | #' @rdname as_task_unsupervised 51 | #' @param clone (`logical(1)`)\cr 52 | #' If `TRUE`, ensures that the returned object is not the same as the input `x`. 53 | #' @export 54 | as_tasks_unsupervised.list = function(x, clone = FALSE, ...) { # nolint 55 | lapply(x, as_task, clone = clone, ...) 56 | } 57 | 58 | #' @rdname as_task_unsupervised 59 | #' @export 60 | as_tasks_unsupervised.Task = function(x, clone = FALSE, ...) { # nolint 61 | list(if (clone) x$clone() else x) 62 | } 63 | -------------------------------------------------------------------------------- /man/mlr_measures_sim.jaccard.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureSimple.R 3 | \name{mlr_measures_sim.jaccard} 4 | \alias{mlr_measures_sim.jaccard} 5 | \title{Jaccard Similarity Index} 6 | \description{ 7 | Measure to compare two or more sets w.r.t. their similarity. 8 | } 9 | \details{ 10 | For two sets \eqn{A} and \eqn{B}, the Jaccard Index is defined as 11 | \deqn{ 12 | J(A, B) = \frac{|A \cap B|}{|A \cup B|}. 13 | }{ 14 | J(A, B) = length(intersect(A, B)) / length(union(A, B)). 15 | } 16 | If more than two sets are provided, the mean of all pairwise scores 17 | is calculated. 18 | 19 | This measure is undefined if two or more sets are empty. 20 | } 21 | \note{ 22 | This measure requires learners with property \code{"selected_features"}. 23 | The extracted feature sets are passed to \code{\link[mlr3measures:jaccard]{mlr3measures::jaccard()}} from 24 | package \CRANpkg{mlr3measures}. 25 | 26 | If the measure is undefined for the input, \code{NaN} is returned. 27 | This can be customized by setting the field \code{na_value}. 28 | } 29 | \section{Parameters}{ 30 | 31 | Empty ParamSet 32 | } 33 | 34 | \section{Dictionary}{ 35 | 36 | This \link{Measure} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_measures} or with the associated sugar function \code{\link[=msr]{msr()}}: 37 | 38 | \if{html}{\out{
}}\preformatted{mlr_measures$get("sim.jaccard") 39 | msr("sim.jaccard") 40 | }\if{html}{\out{
}} 41 | } 42 | 43 | \section{Meta Information}{ 44 | 45 | 46 | \itemize{ 47 | \item Type: \code{"similarity"} 48 | \item Range: \eqn{[0, 1]}{[0, 1]} 49 | \item Minimize: \code{FALSE} 50 | } 51 | 52 | } 53 | 54 | \seealso{ 55 | \link[mlr3misc:Dictionary]{Dictionary} of \link[=Measure]{Measures}: \link{mlr_measures} 56 | 57 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link{Measure} implementations. 58 | 59 | Other similarity measures: 60 | \code{\link{mlr_measures_sim.phi}} 61 | } 62 | \concept{similarity measures} 63 | -------------------------------------------------------------------------------- /R/TaskGenerator2DNormals.R: -------------------------------------------------------------------------------- 1 | #' @title 2D Normals Classification Task Generator 2 | #' 3 | #' @name mlr_task_generators_2dnormals 4 | #' @include TaskGenerator.R 5 | #' 6 | #' @description 7 | #' A [TaskGenerator] for the 2d normals task in [mlbench::mlbench.2dnormals()]. 8 | #' 9 | #' @templateVar id 2dnormals 10 | #' @template task_generator 11 | #' 12 | #' @template seealso_task_generator 13 | #' @export 14 | #' @examples 15 | #' generator = tgen("2dnormals") 16 | #' plot(generator, n = 200) 17 | #' 18 | #' task = generator$generate(200) 19 | #' str(task$data()) 20 | TaskGenerator2DNormals = R6Class("TaskGenerator2DNormals", 21 | inherit = TaskGenerator, 22 | public = list( 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | ps = ps( 27 | cl = p_int(2L), 28 | r = p_dbl(1L), 29 | sd = p_dbl(0L) 30 | ) 31 | 32 | super$initialize(id = "2dnormals", "classif", "mlbench", ps, 33 | label = "2D Normals Classification", man = "mlr3::mlr_task_generators_2dnormals") 34 | }, 35 | 36 | #' @description 37 | #' Creates a simple plot of generated data. 38 | #' @param n (`integer(1)`)\cr 39 | #' Number of samples to draw for the plot. Default is 200. 40 | #' @param pch (`integer(1)`)\cr 41 | #' Point char. Passed to [plot()]. 42 | #' @param ... (any)\cr 43 | #' Additional arguments passed to [plot()]. 44 | plot = function(n = 200L, pch = 19L, ...) { 45 | plot(private$.generate_obj(n), pch = pch, ...) 46 | } 47 | ), 48 | 49 | private = list( 50 | .generate_obj = function(n) { 51 | invoke(mlbench::mlbench.2dnormals, n = n, .args = self$param_set$values) 52 | }, 53 | 54 | .generate = function(n) { 55 | obj = private$.generate_obj(n) 56 | TaskClassif$new(sprintf("%s_%i", self$id, n), convert_mlbench(obj), target = "y") 57 | } 58 | ) 59 | ) 60 | 61 | #' @include mlr_task_generators.R 62 | mlr_task_generators$add("2dnormals", function() TaskGenerator2DNormals$new()) 63 | --------------------------------------------------------------------------------