├── R ├── aaa.R ├── helpers.R ├── zzz.R ├── helpers_ranger.R ├── LearnerClassifNaiveBayes.R ├── LearnerClassifQDA.R ├── LearnerClassifLDA.R ├── LearnerRegrKKNN.R ├── LearnerClassifKKNN.R ├── LearnerRegrSVM.R ├── LearnerRegrNnet.R ├── helpers_glmnet.R ├── LearnerClassifNnet.R ├── LearnerClassifMultinom.R ├── LearnerClassifSVM.R ├── LearnerRegrLM.R ├── LearnerClassifLogReg.R ├── LearnerRegrKM.R ├── bibentries.R ├── LearnerRegrGlmnet.R ├── LearnerRegrCVGlmnet.R ├── LearnerClassifGlmnet.R ├── LearnerClassifCVGlmnet.R └── LearnerClassifRanger.R ├── .ignore ├── man ├── figures │ └── logo_navbar.png ├── mlr3learners-package.Rd └── mlr_learners_classif.naive_bayes.Rd ├── pkgdown ├── favicon │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── apple-touch-icon.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ └── apple-touch-icon-180x180.png └── _pkgdown.yml ├── tests ├── testthat │ ├── teardown.R │ ├── test_register.R │ ├── test_classif_lda.R │ ├── test_classif_naive_bayes.R │ ├── test_classif_qda.R │ ├── test_unload.R │ ├── test_regr_nnet.R │ ├── test_classif_nnet.R │ ├── setup.R │ ├── test_classif_multinom.R │ ├── test_classif_svm.R │ ├── test_regr_km.R │ ├── test_regr_cv_glmnet.R │ ├── test_regr_svm.R │ ├── test_regr_kknn.R │ ├── test_classif_kknn.R │ ├── test_classif_cv_glmnet.R │ ├── test_regr_glmnet.R │ ├── helper.R │ ├── test_regr_lm.R │ ├── test_classif_log_reg.R │ ├── test_classif_glmnet.R │ └── test_classif_ranger.R └── testthat.R ├── .github ├── dependabot.yml ├── workflows │ ├── param-test.yml │ ├── pkgdown.yml │ ├── r-cmd-check.yml │ ├── no-suggest-cmd-check.yml │ └── dev-cmd-check.yml └── ISSUE_TEMPLATE │ └── learner-request.md ├── inst └── paramtest │ ├── helper.R │ ├── test_paramtest_regr.kknn.R │ ├── test_paramtest_classif.kknn.R │ ├── test_paramtest_regr.nnet.R │ ├── test_paramtest_classif.nnet.R │ ├── test_paramtest_classif.multinom.R │ ├── test_paramtest_classif.naive_bayes.R │ ├── test_paramtest_regr.km.R │ ├── test_paramtest_classif.svm.R │ ├── test_paramtest_regr.svm.R │ ├── test_paramtest_classif.lda.R │ ├── test_paramtest_classif.qda.R │ ├── test_paramtest_regr.cv_glmnet.R │ ├── test_paramtest_classif.cv_glmnet.R │ ├── test_paramtest_regr.lm.R │ ├── test_paramtest_regr.glmnet.R │ ├── test_paramtest_classif.glmnet.R │ ├── test_paramtest_classif.logreg.R │ ├── test_paramtest_regr.ranger.R │ ├── test_paramtest_classif.ranger.R │ ├── test_paramtest_regr.xgboost.R │ └── test_paramtest_classif.xgboost.R ├── .Rbuildignore ├── .editorconfig ├── man-roxygen ├── section_contrasts.R ├── note_xgboost.R ├── learner.R ├── example_early_stopping.R ├── note_kknn.R ├── example.R ├── seealso_learner.R └── example_dontrun.R ├── mlr3learners.Rproj ├── .lintr ├── src └── init.c ├── NAMESPACE ├── DESCRIPTION ├── .gitignore ├── README.md └── LICENSE /R/aaa.R: -------------------------------------------------------------------------------- 1 | learners = list() 2 | -------------------------------------------------------------------------------- /.ignore: -------------------------------------------------------------------------------- 1 | man/ 2 | docs/ 3 | pkgdown/ 4 | -------------------------------------------------------------------------------- /man/figures/logo_navbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/man/figures/logo_navbar.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | options(old_opts) 2 | lg = lgr::get_logger("mlr3") 3 | lg$set_threshold(old_threshold) 4 | future::plan(old_plan) 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3learners/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("testthat", quietly = TRUE)) { 2 | library("testthat") 3 | library("mlr3learners") 4 | test_check("mlr3learners") 5 | } 6 | -------------------------------------------------------------------------------- /inst/paramtest/helper.R: -------------------------------------------------------------------------------- 1 | library(mlr3) 2 | lapply(list.files(system.file("testthat", package = "mlr3"), 3 | pattern = "helper_autotest", full.names = TRUE), source) 4 | -------------------------------------------------------------------------------- /tests/testthat/test_register.R: -------------------------------------------------------------------------------- 1 | test_that("re-populate dictionaries", { 2 | rm("classif.log_reg", envir = mlr_learners$items) 3 | expect_disjunct("classif.log_reg", mlr_learners$keys()) 4 | register_mlr3() 5 | expect_subset("classif.log_reg", mlr_learners$keys()) 6 | }) 7 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_lda.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("MASS") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.lda") 5 | expect_learner(learner) 6 | result = run_autotest(learner, exclude = "feat_single") 7 | expect_true(result, info = result$error) 8 | }) 9 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_naive_bayes.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("e1071") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.naive_bayes") 5 | expect_learner(learner) 6 | capture.output({result = run_autotest(learner)}) 7 | expect_true(result, info = result$error) 8 | }) 9 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_qda.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("MASS") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.qda") 5 | expect_learner(learner) 6 | result = run_autotest(learner, N = 100L, exclude = "feat_single") 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | -------------------------------------------------------------------------------- /tests/testthat/test_unload.R: -------------------------------------------------------------------------------- 1 | test_that("unloading leaves no trace", { 2 | library(mlr3learners) 3 | n_learners = length(learners) 4 | n_total = length(mlr_learners$keys()) 5 | unloadNamespace("mlr3learners") 6 | n_mlr = length(mlr_learners$keys()) 7 | expect_true(n_learners == n_total - n_mlr) 8 | }) 9 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_nnet.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("nnet") 2 | skip_on_cran() # numerically instable with ATLAS blas 3 | 4 | test_that("autotest", { 5 | learner = mlr3::lrn("regr.nnet") 6 | expect_learner(learner) 7 | capture.output({result = run_autotest(learner)}) 8 | expect_true(result, info = result$error) 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_nnet.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("nnet") 2 | skip_on_cran() # numerically instable with ATLAS blas 3 | 4 | test_that("autotest", { 5 | learner = mlr3::lrn("classif.nnet") 6 | expect_learner(learner) 7 | capture.output({result = run_autotest(learner)}) 8 | expect_true(result, info = result$error) 9 | }) 10 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE$ 2 | .ignore 3 | .editorconfig 4 | .gitignore 5 | ^.*\.Rproj$ 6 | ^\.Rproj\.user$ 7 | ^\.github$ 8 | ^man-roxygen$ 9 | ^pkgdown$ 10 | ^\.lintr$ 11 | ^\.travis\.yml$ 12 | ^\.pre-commit-config\.yaml$ 13 | ^\.vscode$ 14 | ^docs$ 15 | vignettes/learners/ 16 | ^\.ccache$ 17 | ^clang-.* 18 | ^gfortran.* 19 | ^revdep$ 20 | ^cran-comments\.md$ 21 | ^CRAN-SUBMISSION$ 22 | .clangd 23 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # See http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | trim_trailing_whitespace = true 10 | 11 | [*.{r,R,md,Rmd}] 12 | indent_size = 2 13 | 14 | [*.{c,h}] 15 | indent_size = 4 16 | 17 | [*.{cpp,hpp}] 18 | indent_size = 4 19 | 20 | [{NEWS.md,DESCRIPTION,LICENSE}] 21 | max_line_length = 80 22 | -------------------------------------------------------------------------------- /man-roxygen/section_contrasts.R: -------------------------------------------------------------------------------- 1 | #' @section Contrasts: 2 | #' To ensure reproducibility, this learner always uses the default contrasts: 3 | #' 4 | #' * [contr.treatment()] for unordered factors, and 5 | #' * [contr.poly()] for ordered factors. 6 | #' 7 | #' Setting the option `"contrasts"` does not have any effect. 8 | #' Instead, set the respective hyperparameter or use \CRANpkg{mlr3pipelines} to create dummy features. 9 | -------------------------------------------------------------------------------- /man-roxygen/note_xgboost.R: -------------------------------------------------------------------------------- 1 | #' @note 2 | #' To compute on GPUs, you first need to compile \CRANpkg{xgboost} yourself and link 3 | #' against CUDA. 4 | #' See \url{https://xgboost.readthedocs.io/en/stable/build.html#building-with-gpu-support}. 5 | #' 6 | #' The `outputmargin`, `predcontrib`, `predinteraction`, and `predleaf` parameters are not supported. 7 | #' You can still call e.g. `predict(learner$model, newdata = newdata, outputmargin = TRUE)` to get these predictions. 8 | 9 | -------------------------------------------------------------------------------- /mlr3learners.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man-roxygen/learner.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [mlr3::Learner] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_learners] or with the associated sugar function [mlr3::lrn()]: 3 | #' ``` 4 | #' mlr_learners$get("<%= id %>") 5 | #' lrn("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))` 10 | #' @md 11 | #' 12 | #' @section Parameters: 13 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)` 14 | #' @md 15 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | old_opts = options( 2 | warnPartialMatchArgs = TRUE, 3 | warnPartialMatchAttr = TRUE, 4 | warnPartialMatchDollar = TRUE 5 | ) 6 | 7 | # https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 8 | old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) 9 | 10 | lg = lgr::get_logger("mlr3") 11 | old_threshold = lg$threshold 12 | old_plan = future::plan() 13 | lg$set_threshold(0) 14 | future::plan("sequential") 15 | 16 | mirai::daemons(1, .compute = "mlr3_encapsulation") 17 | 18 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | # lintr defaults: https://github.com/jimhester/lintr#available-linters 3 | # the following setup changes/removes certain linters 4 | assignment_linter = NULL, # do not force using <- for assignments 5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names 6 | cyclocomp_linter = NULL, # do not check function complexity 7 | commented_code_linter = NULL, # allow code in comments 8 | line_length_linter = line_length_linter(300L) 9 | ) -------------------------------------------------------------------------------- /man-roxygen/example_early_stopping.R: -------------------------------------------------------------------------------- 1 | #' @examples 2 | #' 3 | #' # Train learner with early stopping on spam data set 4 | #' task = tsk("spam") 5 | #' 6 | #' # Split task into training and test set 7 | #' split = partition(task, ratio = 0.8) 8 | #' task$set_row_roles(split$test, "validation") 9 | #' 10 | #' # Set early stopping parameter 11 | #' learner = lrn("<%= id %>", 12 | #' nrounds = 1000, 13 | #' early_stopping_rounds = 100, 14 | #' early_stopping = TRUE 15 | #' ) 16 | #' 17 | #' # Train learner with early stopping 18 | #' learner$train(task) 19 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_multinom.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("nnet") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.multinom") 5 | expect_learner(learner) 6 | capture.output({result = run_autotest(learner)}) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | 11 | test_that("predict single obs", { # https://github.com/mlr-org/mlr3/issues/883 12 | task = tsk("iris") 13 | lrn = lrn("classif.multinom", predict_type = "prob") 14 | capture.output({lrn$train(task)}) 15 | 16 | newdata = iris[1, ] 17 | expect_prediction(lrn$predict_newdata(newdata)) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_svm.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("e1071") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.svm") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("default_values", { 11 | learner = lrn("classif.svm") 12 | search_space = ps( 13 | cost = p_dbl(1e-2, 100), 14 | gamma = p_dbl(0, 1) 15 | ) 16 | task = tsk("pima") 17 | 18 | values = default_values(learner, search_space, task) 19 | expect_names(names(values), permutation.of = c("cost", "gamma")) 20 | }) 21 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* .Call calls */ 7 | extern SEXP c_ranger_mu_sigma(SEXP, SEXP, SEXP); 8 | extern SEXP c_ranger_var(SEXP, SEXP, SEXP); 9 | 10 | static const R_CallMethodDef CallEntries[] = { 11 | {"c_ranger_mu_sigma", (DL_FUNC) &c_ranger_mu_sigma, 3}, 12 | {"c_ranger_var", (DL_FUNC) &c_ranger_var, 3}, 13 | {NULL, NULL, 0} 14 | }; 15 | 16 | void R_init_mlr3learners(DllInfo *dll) { 17 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 18 | R_useDynamicSymbols(dll, FALSE); 19 | } 20 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_km.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("DiceKriging") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("regr.km", nugget.stability = 1e-8) 5 | expect_learner(learner) 6 | capture.output({result = run_autotest(learner, exclude = "feat_single", N = 50)}) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("autotest w/ jitter", { 11 | learner = mlr3::lrn("regr.km", nugget.stability = 1e-8, jitter = 1e-12) 12 | expect_learner(learner) 13 | capture.output({result = run_autotest(learner, exclude = "feat_single|reordered", N = 50)}) 14 | expect_true(result, info = result$error) 15 | }) 16 | -------------------------------------------------------------------------------- /man-roxygen/note_kknn.R: -------------------------------------------------------------------------------- 1 | #' @note 2 | #' There is no training step for k-NN models, just storing the training data to 3 | #' process it during the predict step. 4 | #' Therefore, `$model` returns a list with the following elements: 5 | #' 6 | #' * `formula`: Formula for calling [kknn::kknn()] during `$predict()`. 7 | #' * `data`: Training data for calling [kknn::kknn()] during `$predict()`. 8 | #' * `pv`: Training parameters for calling [kknn::kknn()] during `$predict()`. 9 | #' * `kknn`: Model as returned by [kknn::kknn()], only available **after** `$predict()` has been called. 10 | #' This is not stored by default, you must set hyperparameter `store_model` to `TRUE`. 11 | 12 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_cv_glmnet.R: -------------------------------------------------------------------------------- 1 | skip_on_os("solaris") # glmnet not working properly on solaris 2 | skip_if_not_installed("glmnet") 3 | 4 | options(warnPartialMatchArgs = FALSE) 5 | on.exit(options(warnPartialMatchArgs = TRUE)) 6 | 7 | test_that("autotest", { 8 | learner = mlr3::lrn("regr.cv_glmnet") 9 | expect_learner(learner) 10 | 11 | skip_on_os("solaris") 12 | result = run_autotest(learner, exclude = "feat_single") 13 | expect_true(result, info = result$error) 14 | }) 15 | 16 | test_that("selected_features", { 17 | task = tsk("mtcars") 18 | learner = lrn("regr.cv_glmnet") 19 | learner$train(task) 20 | 21 | expect_equal( 22 | learner$selected_features(0), 23 | task$feature_names 24 | ) 25 | 26 | expect_equal( 27 | learner$selected_features(Inf), 28 | character() 29 | ) 30 | }) 31 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_svm.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("e1071") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("regr.svm") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | 11 | test_that("autotest with type nu-regression (#209)", { 12 | learner = mlr3::lrn("regr.svm", type = "nu-regression") 13 | expect_learner(learner) 14 | result = run_autotest(learner) 15 | expect_true(result, info = result$error) 16 | }) 17 | 18 | test_that("default_values", { 19 | learner = lrn("regr.svm") 20 | search_space = ps( 21 | cost = p_dbl(1e-2, 100), 22 | gamma = p_dbl(0, 1) 23 | ) 24 | task = tsk("pima") 25 | 26 | values = default_values(learner, search_space, task) 27 | expect_names(names(values), permutation.of = c("cost", "gamma")) 28 | }) 29 | -------------------------------------------------------------------------------- /.github/workflows/param-test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | schedule: 10 | - cron: "0 13 * * 2" 11 | 12 | name: param-test 13 | 14 | jobs: 15 | param-test: 16 | runs-on: ubuntu-latest 17 | 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | 21 | steps: 22 | - uses: actions/checkout@v5 23 | 24 | - uses: r-lib/actions/setup-r@v2 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: | 29 | any::rvest 30 | any::magrittr 31 | 32 | - name: Run param test 33 | run: | 34 | pak::pkg_install(".") 35 | testthat::test_dir("inst/paramtest", stop_on_failure = TRUE) 36 | shell: Rscript {0} 37 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.kknn.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.kknn", { 4 | learner = lrn("regr.kknn") 5 | fun = kknn::kknn 6 | exclude = c( 7 | "train", # handled via mlr3 8 | "test", # handled via mlr3 9 | "na.action", # handled via mlr3 10 | "formula", # handled via mlr3 11 | "contrasts", # causes lots of troubles just when setting the default 12 | "store_model" # our parameter 13 | ) 14 | 15 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 16 | expect_true(ParamTest, info = paste0( 17 | "\nMissing parameters in mlr3 param set:\n", 18 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 19 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 20 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 21 | ) 22 | }) 23 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.kknn.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.kknn", { 4 | learner = lrn("classif.kknn") 5 | fun = kknn::kknn 6 | exclude = c( 7 | "train", # handled via mlr3 8 | "test", # handled via mlr3 9 | "na.action", # handled via mlr3 10 | "formula", # handled via mlr3 11 | "contrasts", # causes lots of troubles just when setting the default, 12 | "store_model" # our parameter 13 | ) 14 | 15 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 16 | expect_true(ParamTest, info = paste0( 17 | "\nMissing parameters in mlr3 param set:\n", 18 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 19 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 20 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 21 | ) 22 | }) 23 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.nnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.nnet", { 4 | learner = lrn("regr.nnet") 5 | fun = list(nnet::nnet.default, nnet::nnet.formula) 6 | exclude = c( 7 | "x", # handled via mlr3 8 | "y", # handled via mlr3 9 | "weights", # handled via mlr3 10 | "data", # handled via mlr3 11 | "linout", # automatically set to TRUE, since it's the regression learner 12 | "entropy", # mutually exclusive with linout 13 | "softmax" # mutually exclusive with linout 14 | ) 15 | 16 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 17 | expect_true(ParamTest, info = paste0( 18 | "\nMissing parameters in mlr3 param set:\n", 19 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 20 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 21 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 22 | ) 23 | }) 24 | -------------------------------------------------------------------------------- /man-roxygen/example.R: -------------------------------------------------------------------------------- 1 | <% 2 | learner = mlr3::lrn(id) 3 | task_id = if ("LearnerClassif" %in% class(learner)) "sonar" else "mtcars" 4 | %> 5 | #' <%= sprintf("@examplesIf mlr3misc::require_namespaces(lrn(\"%s\")$packages, quietly = TRUE)", id) %> 6 | #' # Define the Learner and set parameter values 7 | #' <%= sprintf("learner = lrn(\"%s\")", id) %> 8 | #' print(learner) 9 | #' 10 | #' # Define a Task 11 | #' <%= sprintf("task = tsk(\"%s\")", task_id) %> 12 | #' 13 | #' # Create train and test set 14 | #' ids = partition(task) 15 | #' 16 | #' # Train the learner on the training ids 17 | #' learner$train(task, row_ids = ids$train) 18 | #' 19 | #' # Print the model 20 | #' print(learner$model) 21 | #' 22 | #' # Importance method 23 | #' if ("importance" %in% learner$properties) print(learner$importance) 24 | #' 25 | #' # Make predictions for the test rows 26 | #' predictions = learner$predict(task, row_ids = ids$test) 27 | #' 28 | #' # Score the predictions 29 | #' predictions$score() 30 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.nnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.nnet", { 4 | learner = lrn("classif.nnet") 5 | fun = list(nnet::nnet.default, nnet::nnet.formula) 6 | exclude = c( 7 | "x", # handled via mlr3 8 | "y", # handled via mlr3 9 | "weights", # handled via mlr3 10 | "data", # handled via mlr3 11 | "entropy", # automatically set to TRUE if two-class task 12 | "softmax", # automatically set to TRUE if multi-class task 13 | "linout" # automatically set to FALSE if two-class and TRUE if multi-class 14 | ) 15 | 16 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 17 | expect_true(ParamTest, info = paste0( 18 | "\nMissing parameters in mlr3 param set:\n", 19 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 20 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 21 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 22 | ) 23 | }) 24 | -------------------------------------------------------------------------------- /man-roxygen/seealso_learner.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} 5 | #' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners. 6 | #' * [Dictionary][mlr3misc::Dictionary] of [Learners][mlr3::Learner]: [mlr3::mlr_learners] 7 | #' * `as.data.table(mlr_learners)` for a table of available [Learners][mlr3::Learner] in the running session (depending on the loaded packages). 8 | #' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. 9 | #' * Extension packages for additional task types: 10 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 11 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 12 | #' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} 13 | #' for established default tuning spaces. 14 | #' 15 | #' @family Learner 16 | -------------------------------------------------------------------------------- /man-roxygen/example_dontrun.R: -------------------------------------------------------------------------------- 1 | <% 2 | lrn = mlr3::lrn(id) 3 | pkgs = setdiff(lrn$packages, c("mlr3", "mlr3learners")) 4 | %> 5 | #' <% task_id = if ("LearnerClassif" %in% class(lrn(id))) "sonar" else "mtcars" %> 6 | #' 7 | #' @examples 8 | #' \dontrun{ 9 | #' if (<%= paste0("requireNamespace(\"", pkgs, "\", quietly = TRUE)", collapse = " && ") %>) { 10 | #' # Define the Learner and set parameter values 11 | #' <%= sprintf("learner = lrn(\"%s\")", id)%> 12 | #' print(learner) 13 | #' 14 | #' # Define a Task 15 | #' <%= sprintf("task = tsk(\"%s\")", task_id)%> 16 | #' 17 | #' # Create train and test set 18 | #' <%= sprintf("ids = partition(task)")%> 19 | #' 20 | #' # Train the learner on the training ids 21 | #' <%= sprintf("learner$train(task, row_ids = ids$train)")%> 22 | #' 23 | #' # print the model 24 | #' print(learner$model) 25 | #' 26 | #' # importance method 27 | #' if("importance" %in% learner$properties) print(learner$importance) 28 | #' 29 | #' # Make predictions for the test rows 30 | #' <%= sprintf("predictions = learner$predict(task, row_ids = ids$test)")%> 31 | #' 32 | #' # Score the predictions 33 | #' predictions$score() 34 | #' } 35 | #' } 36 | -------------------------------------------------------------------------------- /R/helpers.R: -------------------------------------------------------------------------------- 1 | # p = probability for levs[2] => matrix with probs for levs[1] and levs[2] 2 | pvec2mat = function(p, levs) { 3 | stopifnot(is.numeric(p)) 4 | y = matrix(c(1 - p, p), ncol = 2L, nrow = length(p)) 5 | colnames(y) = levs 6 | y 7 | } 8 | 9 | 10 | ordered_features = function(task, learner) { 11 | cols = names(learner$state$data_prototype) %??% learner$state$feature_names 12 | task$data(cols = intersect(cols, task$feature_names)) 13 | } 14 | 15 | 16 | as_numeric_matrix = function(x) { # for svm / #181 17 | x = as.matrix(x) 18 | if (is.logical(x)) { 19 | storage.mode(x) = "double" 20 | } 21 | x 22 | } 23 | 24 | 25 | swap_levels = function(x) { 26 | factor(x, levels = rev(levels(x))) 27 | } 28 | 29 | 30 | rename = function(x, old, new) { 31 | if (length(x)) { 32 | ii = match(names(x), old, nomatch = 0L) 33 | names(x)[ii > 0L] = new[ii] 34 | } 35 | x 36 | } 37 | 38 | opts_default_contrasts = list(contrasts = c("contr.treatment", "contr.poly")) 39 | 40 | get_weights = function(task, private) { 41 | if (packageVersion("mlr3") > "0.23.0") { 42 | private$.get_weights(task) 43 | } else { 44 | task$weights$weight 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_kknn.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("kknn") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("regr.kknn") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("custom model", { 11 | task = tsk("mtcars") 12 | learner = mlr3::lrn("regr.kknn", store_model = TRUE) 13 | expect_null(learner$model) 14 | 15 | learner$train(task) 16 | mod = learner$model 17 | expect_list(mod, names = "unique", len = 4L) 18 | expect_null(mod$kknn) 19 | expect_formula(mod$formula) 20 | expect_data_table(mod$data) 21 | expect_list(mod$pv, names = "unique") 22 | 23 | learner$predict(task) 24 | mod = learner$model 25 | expect_list(mod, names = "unique", len = 4L) 26 | expect_s3_class(mod$kknn, "kknn") 27 | expect_formula(mod$formula) 28 | expect_data_table(mod$data) 29 | expect_list(mod$pv, names = "unique") 30 | 31 | learner$param_set$set_values(k = 3, store_model = FALSE) 32 | learner$train(task)$predict(task) 33 | expect_null(learner$model$kknn) 34 | }) 35 | 36 | test_that("error for k >= n", { 37 | task = tsk("mtcars")$filter(1:3) 38 | learner = mlr3::lrn("regr.kknn", k = 4) 39 | 40 | expect_error(learner$train(task)) 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_kknn.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("kknn") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.kknn") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("custom model", { 11 | task = tsk("iris") 12 | learner = mlr3::lrn("classif.kknn", store_model = TRUE) 13 | expect_null(learner$model) 14 | 15 | learner$train(task) 16 | mod = learner$model 17 | expect_list(mod, names = "unique", len = 4L) 18 | expect_null(mod$kknn) 19 | expect_formula(mod$formula) 20 | expect_data_table(mod$data) 21 | expect_list(mod$pv, names = "unique") 22 | 23 | learner$predict(task) 24 | mod = learner$model 25 | expect_list(mod, names = "unique", len = 4L) 26 | expect_s3_class(mod$kknn, "kknn") 27 | expect_formula(mod$formula) 28 | expect_data_table(mod$data) 29 | expect_list(mod$pv, names = "unique") 30 | 31 | learner$param_set$set_values(k = 3, store_model = FALSE) 32 | learner$train(task)$predict(task) 33 | expect_null(learner$model$kknn) 34 | }) 35 | 36 | test_that("error for k >= n", { 37 | task = tsk("iris")$filter(1:3) 38 | learner = mlr3::lrn("classif.kknn", k = 4) 39 | 40 | expect_error(learner$train(task)) 41 | }) 42 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import data.table 2 | #' @import paradox 3 | #' @import mlr3misc 4 | #' @import checkmate 5 | #' @importFrom R6 R6Class 6 | #' @importFrom mlr3 mlr_learners LearnerClassif LearnerRegr assert_validate assert_quantiles 7 | #' @importFrom stats predict reformulate 8 | #' @importFrom utils packageVersion 9 | #' @importFrom methods formalArgs 10 | #' @useDynLib mlr3learners c_ranger_mu_sigma c_ranger_var 11 | #' 12 | #' @description 13 | #' More learners are implemented in the [mlr3extralearners package](https://github.com/mlr-org/mlr3extralearners). 14 | #' A guide on how to create custom learners is covered in the book: 15 | #' \url{https://mlr3book.mlr-org.com}. 16 | #' Feel invited to contribute a missing learner to the \CRANpkg{mlr3} ecosystem! 17 | "_PACKAGE" 18 | 19 | utils::globalVariables(c("patterns")) 20 | 21 | #' @include aaa.R 22 | register_mlr3 = function() { 23 | x = utils::getFromNamespace("mlr_learners", ns = "mlr3") 24 | 25 | iwalk(learners, function(obj, nm) x$add(nm, obj)) 26 | } 27 | 28 | .onLoad = function(libname, pkgname) { # nolint 29 | register_namespace_callback(pkgname, "mlr3", register_mlr3) 30 | } # nocov end 31 | 32 | .onUnload = function(libpaths) { # nolint 33 | mlr_learners = mlr3::mlr_learners 34 | 35 | walk(names(learners), function(id) mlr_learners$remove(id)) 36 | } 37 | 38 | leanify_package() 39 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.multinom.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.multinom", { 4 | learner = lrn("classif.multinom") 5 | fun = list(nnet::multinom, nnet::nnet.default) 6 | exclude = c( 7 | "x", # handled via mlr3 8 | "y", # handled via mlr3 9 | "formula", # handled via mlr3 10 | "data", # handled via mlr3 11 | "weights", # handled via mlr3 12 | "subset", # handled via mlr3 13 | "na.action", # handled via mlr3 14 | "contrasts" # handled via mlr3 15 | ) 16 | 17 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 18 | expect_true(ParamTest, info = paste0( 19 | "\nMissing parameters in mlr3 param set:\n", 20 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 21 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 22 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 23 | ) 24 | }) 25 | 26 | # no extra args for predict.multinom 27 | # test_that("predict classif.multinom", { 28 | # learner = lrn("classif.multinom") 29 | # fun = nnet:::predict.multinom 30 | # exclude = c( 31 | # ) 32 | 33 | # ParamTest = run_paramtest(learner, fun, exclude) 34 | # expect_true(ParamTest, info = paste0( 35 | # "Missing parameters:", 36 | # paste0("- '", ParamTest$missing, "'", collapse = ","))) 37 | # }) 38 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | release: 11 | types: 12 | - published 13 | workflow_dispatch: 14 | 15 | name: pkgdown 16 | 17 | jobs: 18 | pkgdown: 19 | runs-on: ubuntu-latest 20 | 21 | concurrency: 22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | steps: 26 | - uses: actions/checkout@v5 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | 32 | - uses: r-lib/actions/setup-r-dependencies@v2 33 | with: 34 | extra-packages: any::pkgdown, local::. 35 | needs: website 36 | 37 | - name: Install template 38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") 39 | shell: Rscript {0} 40 | 41 | - name: Build site 42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 43 | shell: Rscript {0} 44 | 45 | - name: Deploy 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4.7.3 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.naive_bayes.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.naive_bayes", { 4 | learner = lrn("classif.naive_bayes") 5 | fun = list(e1071::naiveBayes, e1071:::naiveBayes.default) 6 | exclude = c( 7 | "x", # handled via mlr3 8 | "y" # handled via mlr3 9 | ) 10 | 11 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 12 | expect_true(ParamTest, info = paste0( 13 | "\nMissing parameters in mlr3 param set:\n", 14 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 15 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 16 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 17 | ) 18 | }) 19 | 20 | test_that("predict classif.naive_bayes", { 21 | learner = lrn("classif.naive_bayes") 22 | fun = e1071:::predict.naiveBayes 23 | exclude = c( 24 | "object", # handled via mlr3 25 | "newdata", # handled via mlr3 26 | "type" # handled via mlr3 27 | ) 28 | 29 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 30 | expect_true(ParamTest, info = paste0( 31 | "\nMissing parameters in mlr3 param set:\n", 32 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 33 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 34 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 35 | ) 36 | }) 37 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.km.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.km", { 4 | learner = lrn("regr.km") 5 | fun = DiceKriging::km 6 | exclude = c( 7 | "formula", # handled via mlr3 8 | "design", # handled via mlr3 9 | "response", # handled via mlr3 10 | "nugget.stability" # custom param from mlr3, see help page 11 | ) 12 | 13 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 14 | expect_true(ParamTest, info = paste0( 15 | "\nMissing parameters in mlr3 param set:\n", 16 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 17 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 18 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 19 | ) 20 | }) 21 | 22 | test_that("predict regr.km", { 23 | learner = lrn("regr.km") 24 | fun = DiceKriging::predict.km 25 | exclude = c( 26 | "object", # handled via mlr3 27 | "newdata", # handled via mlr3 28 | "jitter" # custom param from mlr3, see help page 29 | ) 30 | 31 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 32 | expect_true(ParamTest, info = paste0( 33 | "\nMissing parameters in mlr3 param set:\n", 34 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 35 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 36 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 37 | ) 38 | }) 39 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.svm.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.svm", { 4 | learner = lrn("classif.svm") 5 | fun = list(e1071:::svm.default) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "y", # handled by mlr3 9 | "probability", # handled by mlr3 10 | "subset", # handled by mlr3 11 | "na.action" # handled by mlr3 12 | ) 13 | 14 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 15 | expect_true(ParamTest, info = paste0( 16 | "\nMissing parameters in mlr3 param set:\n", 17 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 18 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 19 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 20 | ) 21 | }) 22 | 23 | test_that("classif.svm", { 24 | learner = lrn("classif.svm") 25 | fun = e1071:::predict.svm 26 | exclude = c( 27 | "object", # handled by mlr3 28 | "newdata", # handled by mlr3 29 | "probability", # handled by mlr3 30 | "na.action" # handled by mlr3 31 | ) 32 | 33 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 34 | expect_true(ParamTest, info = paste0( 35 | "\nMissing parameters in mlr3 param set:\n", 36 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 37 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 38 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 39 | ) 40 | }) 41 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(default_values,LearnerClassifRanger) 4 | S3method(default_values,LearnerClassifSVM) 5 | S3method(default_values,LearnerClassifXgboost) 6 | S3method(default_values,LearnerRegrRanger) 7 | S3method(default_values,LearnerRegrSVM) 8 | S3method(default_values,LearnerRegrXgboost) 9 | export(LearnerClassifCVGlmnet) 10 | export(LearnerClassifGlmnet) 11 | export(LearnerClassifKKNN) 12 | export(LearnerClassifLDA) 13 | export(LearnerClassifLogReg) 14 | export(LearnerClassifMultinom) 15 | export(LearnerClassifNaiveBayes) 16 | export(LearnerClassifNnet) 17 | export(LearnerClassifQDA) 18 | export(LearnerClassifRanger) 19 | export(LearnerClassifSVM) 20 | export(LearnerClassifXgboost) 21 | export(LearnerRegrCVGlmnet) 22 | export(LearnerRegrGlmnet) 23 | export(LearnerRegrKKNN) 24 | export(LearnerRegrKM) 25 | export(LearnerRegrLM) 26 | export(LearnerRegrNnet) 27 | export(LearnerRegrRanger) 28 | export(LearnerRegrSVM) 29 | export(LearnerRegrXgboost) 30 | import(checkmate) 31 | import(data.table) 32 | import(mlr3misc) 33 | import(paradox) 34 | importFrom(R6,R6Class) 35 | importFrom(methods,formalArgs) 36 | importFrom(mlr3,LearnerClassif) 37 | importFrom(mlr3,LearnerRegr) 38 | importFrom(mlr3,assert_quantiles) 39 | importFrom(mlr3,assert_validate) 40 | importFrom(mlr3,mlr_learners) 41 | importFrom(stats,predict) 42 | importFrom(stats,reformulate) 43 | importFrom(utils,bibentry) 44 | importFrom(utils,packageVersion) 45 | useDynLib(mlr3learners,c_ranger_mu_sigma) 46 | useDynLib(mlr3learners,c_ranger_var) 47 | -------------------------------------------------------------------------------- /man/mlr3learners-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \docType{package} 4 | \name{mlr3learners-package} 5 | \alias{mlr3learners} 6 | \alias{mlr3learners-package} 7 | \title{mlr3learners: Recommended Learners for 'mlr3'} 8 | \description{ 9 | More learners are implemented in the \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners package}. 10 | A guide on how to create custom learners is covered in the book: 11 | \url{https://mlr3book.mlr-org.com}. 12 | Feel invited to contribute a missing learner to the \CRANpkg{mlr3} ecosystem! 13 | } 14 | \seealso{ 15 | Useful links: 16 | \itemize{ 17 | \item \url{https://mlr3learners.mlr-org.com} 18 | \item \url{https://github.com/mlr-org/mlr3learners} 19 | \item Report bugs at \url{https://github.com/mlr-org/mlr3learners/issues} 20 | } 21 | 22 | } 23 | \author{ 24 | \strong{Maintainer}: Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) 25 | 26 | Authors: 27 | \itemize{ 28 | \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID}) 29 | \item Quay Au \email{quayau@gmail.com} (\href{https://orcid.org/0000-0002-5252-8902}{ORCID}) 30 | \item Stefan Coors \email{mail@stefancoors.de} (\href{https://orcid.org/0000-0002-7465-2146}{ORCID}) 31 | \item Patrick Schratz \email{patrick.schratz@gmail.com} (\href{https://orcid.org/0000-0003-0748-6624}{ORCID}) 32 | \item John Zobolas \email{bblodfon@gmail.com} (\href{https://orcid.org/0000-0002-3609-8674}{ORCID}) 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://mlr3learners.mlr-org.com 2 | 3 | template: 4 | bootstrap: 5 5 | light-switch: true 6 | math-rendering: mathjax 7 | package: mlr3pkgdowntemplate 8 | 9 | development: 10 | mode: auto 11 | version_label: default 12 | version_tooltip: "Version" 13 | 14 | toc: 15 | depth: 3 16 | 17 | navbar: 18 | structure: 19 | left: [reference, news, book] 20 | right: [search, github, mattermost, stackoverflow, rss, lightswitch] 21 | components: 22 | home: ~ 23 | reference: 24 | icon: fa fa-file-alt 25 | text: Reference 26 | href: reference/index.html 27 | mattermost: 28 | icon: fa fa-comments 29 | href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ 30 | book: 31 | text: mlr3book 32 | icon: fa fa-link 33 | href: https://mlr3book.mlr-org.com 34 | stackoverflow: 35 | icon: fab fa-stack-overflow 36 | href: https://stackoverflow.com/questions/tagged/mlr 37 | rss: 38 | icon: fa-rss 39 | href: https://mlr-org.com/ 40 | learners: 41 | text: Additional Learners 42 | href: https://mlr3extralearners.mlr-org.com/articles/learners/list_learners.html 43 | 44 | reference: 45 | - title: Classification Learnes 46 | contents: 47 | - starts_with("mlr_learners_classif") 48 | - title: Regression Learners 49 | contents: 50 | - starts_with("mlr_learners_regr") 51 | - title: Survival Learners 52 | contents: 53 | - starts_with("mlr_learners_surv") 54 | - title: General 55 | contents: 56 | - mlr3learners-package 57 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.svm.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.svm", { 4 | learner = lrn("regr.svm") 5 | fun = list(e1071:::svm.default) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "y", # handled by mlr3 9 | "probability", # handled by mlr3 10 | "subset", # handled by mlr3 11 | "na.action", # handled by mlr3 12 | "class.weights" # not defined in regr 13 | ) 14 | 15 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 16 | expect_true(ParamTest, info = paste0( 17 | "\nMissing parameters in mlr3 param set:\n", 18 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 19 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 20 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 21 | ) 22 | }) 23 | 24 | test_that("regr.svm", { 25 | learner = lrn("regr.svm") 26 | fun = e1071:::predict.svm 27 | exclude = c( 28 | "object", # handled by mlr3 29 | "newdata", # handled by mlr3 30 | "probability", # handled by mlr3 31 | "na.action", # handled by mlr3 32 | "decision.values" # classif only 33 | ) 34 | 35 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 36 | expect_true(ParamTest, info = paste0( 37 | "\nMissing parameters in mlr3 param set:\n", 38 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 39 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 40 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 41 | ) 42 | }) 43 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow of the mlr3 ecosystem v0.4.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: r-cmd-check 19 | 20 | jobs: 21 | r-cmd-check: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | config: 33 | - {os: ubuntu-latest, r: 'devel'} 34 | - {os: ubuntu-latest, r: 'release'} 35 | 36 | steps: 37 | - uses: actions/checkout@v5 38 | 39 | - uses: r-lib/actions/setup-pandoc@v2 40 | 41 | - uses: r-lib/actions/setup-r@v2 42 | with: 43 | r-version: ${{ matrix.config.r }} 44 | 45 | - uses: r-lib/actions/setup-r-dependencies@v2 46 | with: 47 | extra-packages: any::rcmdcheck 48 | needs: check 49 | 50 | - uses: mxschmitt/action-tmate@v3 51 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 52 | with: 53 | limit-access-to-actor: true 54 | 55 | - uses: r-lib/actions/check-r-package@v2 56 | with: 57 | args: 'c("--no-manual", "--as-cran")' 58 | error-on: '"note"' 59 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.lda.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.lda", { 4 | learner = lrn("classif.lda") 5 | fun = list(MASS::lda, MASS:::lda.default) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "grouping", # handled by mlr3 9 | "CV" # handled by mlr3 10 | ) 11 | 12 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 13 | expect_true(ParamTest, info = paste0( 14 | "\nMissing parameters in mlr3 param set:\n", 15 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 16 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 17 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 18 | ) 19 | }) 20 | 21 | test_that("predict classif.lda", { 22 | learner = lrn("classif.lda") 23 | fun = MASS:::predict.lda 24 | exclude = c( 25 | "object", # handled via mlr3 26 | "newdata", # handled via mlr3 27 | "method", # renamed to predict.method, see help page 28 | "predict.method", # renamed from method, see help page 29 | "prior", # renamed from predict.prior, see help page 30 | "predict.prior" # renamed from prior, see help page 31 | ) 32 | 33 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 34 | expect_true(ParamTest, info = paste0( 35 | "\nMissing parameters in mlr3 param set:\n", 36 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 37 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 38 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 39 | ) 40 | }) 41 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.qda.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.qda", { 4 | learner = lrn("classif.qda") 5 | fun = list(MASS::qda, MASS:::qda.default) 6 | exclude = c( 7 | "x", # handled via mlr3 8 | "grouping", # handled via mlr3 9 | "CV" # handled via mlr3 10 | ) 11 | 12 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 13 | expect_true(ParamTest, info = paste0( 14 | "\nMissing parameters in mlr3 param set:\n", 15 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 16 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 17 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 18 | ) 19 | }) 20 | 21 | test_that("predict classif.qda", { 22 | learner = lrn("classif.qda") 23 | fun = MASS:::predict.qda 24 | exclude = c( 25 | "object", # handled via mlr3 26 | "newdata", # handled via mlr3 27 | "method", # renamed to predict.method, see help page 28 | "predict.method", # renamed from method, see help page 29 | "prior", # renamed from predict.prior, see help page 30 | "predict.prior" # renamed from prior, see help page 31 | ) 32 | 33 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 34 | expect_true(ParamTest, info = paste0( 35 | "\nMissing parameters in mlr3 param set:\n", 36 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 37 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 38 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 39 | ) 40 | }) 41 | -------------------------------------------------------------------------------- /R/helpers_ranger.R: -------------------------------------------------------------------------------- 1 | #' @title Convert a Ratio Hyperparameter 2 | #' 3 | #' @description 4 | #' Given the named list `pv` (values of a [ParamSet]), converts a possibly provided hyperparameter 5 | #' called `ratio` to an integer hyperparameter `target`. 6 | #' If both are found in `pv`, an exception is thrown. 7 | #' 8 | #' @param pv (named `list()`). 9 | #' @param target (`character(1)`)\cr 10 | #' Name of the integer hyperparameter. 11 | #' @param ratio (`character(1)`)\cr 12 | #' Name of the ratio hyperparameter. 13 | #' @param n (`integer(1)`)\cr 14 | #' Ratio of what? 15 | #' 16 | #' @return (named `list()`) with new hyperparameter settings. 17 | #' @noRd 18 | convert_ratio = function(pv, target, ratio, n) { 19 | switch(to_decimal(c(target, ratio) %in% names(pv)) + 1L, 20 | # !mtry && !mtry.ratio 21 | pv, 22 | 23 | # !mtry && mtry.ratio 24 | { 25 | pv[[target]] = max(ceiling(pv[[ratio]] * n), 1) 26 | remove_named(pv, ratio) 27 | }, 28 | 29 | 30 | # mtry && !mtry.ratio 31 | pv, 32 | 33 | # mtry && mtry.ratio 34 | stopf("Hyperparameters '%s' and '%s' are mutually exclusive", target, ratio) 35 | ) 36 | } 37 | 38 | ranger_selected_features = function(model, feature_names) { 39 | if (is.null(model)) { 40 | stopf("No model stored") 41 | } 42 | 43 | splitvars = ranger::treeInfo(object = model, tree = 1)$splitvarName 44 | i = 2 45 | while (i <= model$num.trees && !all(feature_names %in% splitvars)) { 46 | sv = ranger::treeInfo(object = model, tree = i)$splitvarName 47 | splitvars = union(splitvars, sv) 48 | i = i + 1 49 | } 50 | 51 | splitvars[!is.na(splitvars)] 52 | } 53 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.cv_glmnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | skip_on_os("solaris") 4 | 5 | test_that("regr.cv_glmnet", { 6 | learner = lrn("regr.cv_glmnet") 7 | fun = list(glmnet::cv.glmnet, glmnet::glmnet.control, glmnet::glmnet) 8 | exclude = c( 9 | "x", # handled by mlr3 10 | "y", # handled by mlr3 11 | "weights", # handled by mlr3 12 | "itrace", # supported via param trace.it 13 | "factory", # only used in scripts, no effect within mlr3 14 | "offset" # handled by mlr3 15 | ) 16 | 17 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 18 | expect_true(ParamTest, info = paste0( 19 | "\nMissing parameters in mlr3 param set:\n", 20 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 21 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 22 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 23 | ) 24 | }) 25 | 26 | test_that("predict regr.cv_glmnet", { 27 | learner = lrn("regr.cv_glmnet") 28 | fun = glmnet:::predict.cv.glmnet 29 | exclude = c( 30 | "object", # handled via mlr3 31 | "newx", # handled via mlr3 32 | "predict.gamma", # renamed from gamma 33 | "use_pred_offset" # handled by mlr3 34 | ) 35 | 36 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 37 | expect_true(ParamTest, info = paste0( 38 | "\nMissing parameters in mlr3 param set:\n", 39 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 40 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 41 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 42 | ) 43 | }) 44 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.cv_glmnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.cv_glmnet", { 4 | learner = lrn("classif.cv_glmnet") 5 | fun = list(glmnet::cv.glmnet, glmnet::glmnet.control, glmnet::glmnet) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "y", # handled by mlr3 9 | "weights", # handled by mlr3 10 | "itrace", # supported via param trace.it 11 | "factory", # only used in scripts, no effect within mlr3 12 | "family", # handled by mlr3 13 | "offset" # handled by mlr3 14 | ) 15 | 16 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 17 | expect_true(ParamTest, info = paste0( 18 | "\nMissing parameters in mlr3 param set:\n", 19 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 20 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 21 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 22 | ) 23 | }) 24 | 25 | test_that("predict classif.cv_glmnet", { 26 | learner = lrn("classif.cv_glmnet") 27 | fun = glmnet:::predict.cv.glmnet 28 | exclude = c( 29 | "object", # handled via mlr3 30 | "newx", # handled via mlr3 31 | "predict.gamma", # renamed from gamma 32 | "use_pred_offset" # handled by mlr3 33 | ) 34 | 35 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 36 | expect_true(ParamTest, info = paste0( 37 | "\nMissing parameters in mlr3 param set:\n", 38 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 39 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 40 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 41 | ) 42 | }) 43 | -------------------------------------------------------------------------------- /.github/workflows/no-suggest-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow without suggests of the mlr3 ecosystem v0.3.1 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: no-suggest-cmd-check 19 | 20 | jobs: 21 | no-suggest-cmd-check: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | config: 33 | - {os: ubuntu-latest, r: 'release'} 34 | 35 | steps: 36 | - uses: actions/checkout@v5 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: | 47 | any::rcmdcheck 48 | any::testthat 49 | any::knitr 50 | any::rmarkdown 51 | needs: check 52 | dependencies: '"hard"' 53 | cache: false 54 | 55 | - uses: mxschmitt/action-tmate@v3 56 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 57 | with: 58 | limit-access-to-actor: true 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | args: 'c("--no-manual", "--as-cran")' 63 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_cv_glmnet.R: -------------------------------------------------------------------------------- 1 | skip_on_os("solaris") # glmnet not working properly on solaris 2 | skip_if_not_installed("glmnet") 3 | 4 | options(warnPartialMatchArgs = FALSE) 5 | on.exit(options(warnPartialMatchArgs = TRUE)) 6 | 7 | test_that("autotest", { 8 | learner = mlr3::lrn("classif.cv_glmnet") 9 | expect_learner(learner) 10 | 11 | skip_on_os("solaris") 12 | result = run_autotest(learner, exclude = "feat_single", N = 100L) 13 | expect_true(result, info = result$error) 14 | }) 15 | 16 | test_that("prob column reordering (#155)", { 17 | task = tsk("sonar") 18 | learner = mlr3::lrn("classif.cv_glmnet", predict_type = "prob") 19 | 20 | task$positive = "M" 21 | learner$train(task) 22 | p = learner$predict(task) 23 | expect_gt(p$score(msr("classif.acc")), 0.6) 24 | 25 | task$positive = "R" 26 | learner$train(task) 27 | p = learner$predict(task) 28 | expect_gt(p$score(msr("classif.acc")), 0.6) 29 | }) 30 | 31 | test_that("same label ordering as in glm() / log_reg", { 32 | task = with_seed(123, tgen("2dnormals")$generate(100)) 33 | for (pos in task$class_names) { 34 | task$positive = pos 35 | 36 | l1 = lrn("classif.log_reg") 37 | l2 = lrn("classif.cv_glmnet") 38 | l1$train(task) 39 | l2$train(task) 40 | 41 | expect_equal(sign(as.numeric(coef(l1$model))), sign(as.numeric(coef(l2$model, s = 0))), 42 | info = sprintf("positive label = %s", pos)) 43 | } 44 | }) 45 | 46 | 47 | test_that("selected_features", { 48 | task = tsk("iris") 49 | learner = lrn("classif.cv_glmnet") 50 | learner$train(task) 51 | 52 | expect_equal( 53 | learner$selected_features(0), 54 | task$feature_names 55 | ) 56 | 57 | expect_equal( 58 | learner$selected_features(Inf), 59 | character() 60 | ) 61 | }) 62 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.lm.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.lm", { 4 | learner = lrn("regr.lm") 5 | fun = stats::lm 6 | exclude = c( 7 | "formula", # handled via mlr3 8 | "data", # handled via mlr3 9 | "weights", # handled via mlr3 10 | "na.action", # handled via mlr3 11 | "method", # handled via mlr3 12 | "subset", # handled via mlr3 13 | "contrasts", # handled via mlr3 14 | "offset" # handled via mlr3 15 | ) 16 | 17 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 18 | expect_true(ParamTest, info = paste0( 19 | "\nMissing parameters in mlr3 param set:\n", 20 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 21 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 22 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 23 | ) 24 | }) 25 | 26 | test_that("predict regr.lm", { 27 | learner = lrn("regr.lm") 28 | fun = stats::predict.lm 29 | exclude = c( 30 | "object", # handled via mlr3 31 | "newdata", # handled via mlr3 32 | "type", # handled via mlr3 33 | "na.action", # handled via mlr3 34 | "terms", # not supported by mlr3 learner 35 | "weights", # handled via mlr3 36 | "se.fit", # controlled via predict type 37 | "use_pred_offset" # handled via mlr3 38 | ) 39 | 40 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 41 | expect_true(ParamTest, info = paste0( 42 | "\nMissing parameters in mlr3 param set:\n", 43 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 44 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 45 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 46 | ) 47 | }) 48 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.glmnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.glmnet", { 4 | learner = lrn("regr.glmnet") 5 | fun = list(glmnet::glmnet, glmnet::cv.glmnet, glmnet::glmnet.control) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "y", # handled by mlr3 9 | "weights", # handled by mlr3 10 | "nfolds", # not used by learner 11 | "foldid", # not used by learner 12 | "type.measure", # only used by cv.glmnet 13 | "itrace", # supported via param trace.it 14 | "factory", # only used in scripts, no effect within mlr3 15 | "offset" # handled by mlr3 16 | ) 17 | 18 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 19 | expect_true(ParamTest, info = paste0( 20 | "\nMissing parameters in mlr3 param set:\n", 21 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 22 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 23 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 24 | ) 25 | }) 26 | 27 | test_that("predict regr.glmnet", { 28 | learner = lrn("regr.glmnet") 29 | fun = glmnet::predict.glmnet 30 | exclude = c( 31 | "object", # handled via mlr3 32 | "newx", # handled via mlr3 33 | "type", # handled via mlr3 34 | "newoffset", # handled via mlr3 35 | "use_pred_offset" # handled via mlr3 36 | ) 37 | 38 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 39 | expect_true(ParamTest, info = paste0( 40 | "\nMissing parameters in mlr3 param set:\n", 41 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 42 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 43 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 44 | ) 45 | }) 46 | 47 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/learner-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Learner Request 3 | about: Request the implementation of a new learner 4 | title: Connect learner [LRN] from package [PKG] 5 | labels: 'Status: Pending, Type: Learner Request' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Before making this request, make sure that 11 | 12 | 1. The learner is not maintained in a third party repository (listed in the [Table of all additional Learners](https://mlr3learners.mlr-org.com/dev/articles/learners/additional-learners.html)), and 13 | 2. There is no other open or closed issue on this learner in this tracker. 14 | 15 | ## Checklist before requesting a review 16 | 17 | - [ ] Run `styler::style_pkg(style = styler::mlr_style)` (install `pat-s/styler@mlr-style` if not yet done) 18 | 19 | - [ ] Run `lintr::lint_package()` and fix all issues. 20 | 21 | - [ ] Run `usethis::use_tidy_description()` to format the `DESCRIPTION` file. 22 | 23 | - [ ] Check that the learner package name is all lower case, e.g. `mlr3learners.partykit`. 24 | 25 | - [ ] Ensure that there are not leftover of ``, `` or `` within the learner repo. 26 | 27 | - [ ] Ensure that the "Parameter Check" passed in the CI (both for the train **and** predict functions) 28 | 29 | - [ ] Ensure that "R CMD check" passed in the CI. 30 | 31 | - [ ] Check that your learners upstream package is **not** listed in the "Imports" but in the "Suggests" section within the `DESCRIPTION` file. 32 | 33 | - [ ] If you changed any parameter defaults: Did you document the change (reason and new default) in the help page of the respective learner? 34 | 35 | - [ ] Open a Pull Request in the mlr3learners repo to add your learner to the list of ["In Progress" learners](https://mlr3learners.mlr-org.com/dev/articles/learners/additional-learners.html#in-progress-1). Once approved, it will be moved to the "Approved" section. 36 | -------------------------------------------------------------------------------- /.github/workflows/dev-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # dev cmd check workflow of the mlr3 ecosystem v0.4.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | debug_enabled: 7 | type: boolean 8 | description: 'Run the build with tmate debugging enabled' 9 | required: false 10 | default: false 11 | push: 12 | branches: 13 | - main 14 | pull_request: 15 | branches: 16 | - main 17 | 18 | name: dev-check 19 | 20 | jobs: 21 | check-package: 22 | runs-on: ${{ matrix.config.os }} 23 | 24 | name: ${{ matrix.config.dev-package }} 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | config: 33 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3'} 34 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/mlr3misc'} 35 | - {os: ubuntu-latest, r: 'release', dev-package: 'mlr-org/paradox'} 36 | 37 | steps: 38 | - uses: actions/checkout@v5 39 | 40 | - uses: r-lib/actions/setup-pandoc@v2 41 | 42 | - uses: r-lib/actions/setup-r@v2 43 | with: 44 | r-version: ${{ matrix.config.r }} 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | extra-packages: any::rcmdcheck 49 | needs: check 50 | 51 | - name: Install dev versions 52 | run: pak::pkg_install('${{ matrix.config.dev-package }}') 53 | shell: Rscript {0} 54 | 55 | - uses: mxschmitt/action-tmate@v3 56 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 57 | with: 58 | limit-access-to-actor: true 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | args: 'c("--no-manual", "--as-cran")' 63 | error-on: '"note"' 64 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.glmnet.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | skip_on_os("solaris") 4 | 5 | test_that("classif.glmnet", { 6 | learner = lrn("classif.glmnet") 7 | fun = list(glmnet::glmnet, glmnet::glmnet.control) 8 | exclude = c( 9 | "x", # handled by mlr3 10 | "y", # handled by mlr3 11 | "weights", # handled by mlr3 12 | "nfolds", # not used by learner 13 | "foldid", # not used by learner 14 | "type.measure", # only used by cv.glmnet 15 | "family", # handled by mlr3 16 | "itrace", # supported via param trace.it 17 | "factory", # only used in scripts, no effect within mlr3 18 | "offset" # handled by mlr3 19 | ) 20 | 21 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 22 | expect_true(ParamTest, info = paste0( 23 | "\nMissing parameters in mlr3 param set:\n", 24 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 25 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 26 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 27 | ) 28 | }) 29 | 30 | test_that("predict classif.glmnet", { 31 | learner = lrn("classif.glmnet") 32 | fun = list(glmnet::predict.glmnet, glmnet::predict.relaxed) 33 | exclude = c( 34 | "object", # handled via mlr3 35 | "newx", # handled via mlr3 36 | "type", # handled via mlr3 37 | "newoffset", # handled via mlr3 38 | "use_pred_offset" # handled via mlr3 39 | ) 40 | 41 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 42 | expect_true(ParamTest, info = paste0( 43 | "\nMissing parameters in mlr3 param set:\n", 44 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 45 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 46 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 47 | ) 48 | }) 49 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.logreg.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.logreg", { 4 | learner = lrn("classif.log_reg") 5 | fun = list(stats::glm, stats::glm.control) 6 | exclude = c( 7 | "x", # handled by mlr3 8 | "formula", # handled by mlr3 9 | "family", # handled by mlr3 10 | "data", # handled by mlr3 11 | "weights", # handled by mlr3 12 | "subset", # handled by mlr3 13 | "na.action", # handled by mlr3 14 | "y", # handled by mlr3 15 | "method", # we always use glm() 16 | "control", # handled by glm.control 17 | "contrasts", # causes lots of troubles just when setting the default 18 | "offset" # handled via mlr3 19 | ) 20 | 21 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 22 | expect_true(ParamTest, info = paste0( 23 | "\nMissing parameters in mlr3 param set:\n", 24 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 25 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 26 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 27 | ) 28 | }) 29 | 30 | test_that("predict classif.log_reg", { 31 | learner = lrn("classif.log_reg") 32 | fun = stats::predict.glm 33 | exclude = c( 34 | "object", # handled via mlr3 35 | "newdata", # handled via mlr3 36 | "type", # handled via mlr3 37 | "terms", # handled via mlr3 type arg 38 | "na.action", # handled via mlr3 39 | "se.fit", # not supported for log reg 40 | "use_pred_offset" # handled via mlr3 41 | ) 42 | 43 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 44 | expect_true(ParamTest, info = paste0( 45 | "\nMissing parameters in mlr3 param set:\n", 46 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 47 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 48 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 49 | ) 50 | }) 51 | -------------------------------------------------------------------------------- /R/LearnerClassifNaiveBayes.R: -------------------------------------------------------------------------------- 1 | #' @title Naive Bayes Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.naive_bayes 4 | #' 5 | #' @description 6 | #' Naive Bayes classification. 7 | #' Calls [e1071::naiveBayes()] from package \CRANpkg{e1071}. 8 | #' 9 | #' @templateVar id classif.naive_bayes 10 | #' @template learner 11 | #' 12 | #' @export 13 | #' @template seealso_learner 14 | #' @template example 15 | LearnerClassifNaiveBayes = R6Class("LearnerClassifNaiveBayes", 16 | inherit = LearnerClassif, 17 | 18 | public = list( 19 | 20 | #' @description 21 | #' Creates a new instance of this [R6][R6::R6Class] class. 22 | initialize = function() { 23 | ps = ps( 24 | eps = p_dbl(default = 0, tags = "predict"), 25 | laplace = p_dbl(0, default = 0, tags = "train"), 26 | threshold = p_dbl(default = 0.001, tags = "predict") 27 | ) 28 | 29 | super$initialize( 30 | id = "classif.naive_bayes", 31 | param_set = ps, 32 | predict_types = c("response", "prob"), 33 | properties = c("twoclass", "multiclass"), 34 | feature_types = c("logical", "integer", "numeric", "factor"), 35 | packages = c("mlr3learners", "e1071"), 36 | label = "Naive Bayes", 37 | man = "mlr3learners::mlr_learners_classif.naive_bayes" 38 | ) 39 | } 40 | ), 41 | 42 | private = list( 43 | .train = function(task) { 44 | y = task$truth() 45 | x = task$data(cols = task$feature_names) 46 | invoke(e1071::naiveBayes, 47 | x = x, y = y, 48 | .args = self$param_set$get_values(tags = "train")) 49 | }, 50 | 51 | .predict = function(task) { 52 | pv = self$param_set$get_values(tags = "predict") 53 | newdata = ordered_features(task, self) 54 | 55 | if (self$predict_type == "response") { 56 | response = invoke(predict, self$model, 57 | newdata = newdata, 58 | type = "class", .args = pv) 59 | list(response = response) 60 | } else { 61 | prob = invoke(predict, self$model, newdata = newdata, 62 | type = "raw", .args = pv) 63 | list(prob = prob) 64 | } 65 | } 66 | ) 67 | ) 68 | 69 | #' @include aaa.R 70 | learners[["classif.naive_bayes"]] = LearnerClassifNaiveBayes 71 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.ranger.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.ranger", { 4 | learner = lrn("regr.ranger") 5 | fun = ranger::ranger 6 | exclude = c( 7 | "formula", # handled via mlr3 8 | "y", # handled via mlr3 9 | "x", # handled via mlr3 10 | "data", # handled via mlr3 11 | "probability", # handled via mlr3 12 | "case.weights", # handled via mlr3 13 | "local.importance", # handled via importance() method 14 | "class.weights", # handled via mlr3 15 | "inbag", # handled via mlr3 stratification 16 | "dependent.variable.name", # handled via mlr3 17 | "status.variable.name", # handled via mlr3 18 | "classification", # handled via mlr3 19 | "mtry.ratio", # custom hyperpar 20 | "time.interest", # survival only 21 | "quantreg", # handled by predict_type 22 | "alpha", # survival only 23 | "minprop", # survival only 24 | "sigma2.threshold" # added by mlr3 25 | ) 26 | 27 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 28 | expect_true(ParamTest, info = paste0( 29 | "\nMissing parameters in mlr3 param set:\n", 30 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 31 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 32 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 33 | ) 34 | }) 35 | 36 | test_that("predict regr.ranger", { 37 | learner = lrn("regr.ranger") 38 | fun = ranger:::predict.ranger 39 | exclude = c( 40 | "quantiles", # handled via mlr3 41 | "what", # not supported in mlr3 42 | "predict.all", # not supported in mlr3 43 | "formula", # handled via mlr3 44 | "object", # handled via mlr3 45 | "data", # handled via mlr3 46 | "type", # handled via mlr3 47 | "sigma2.threshold" # added by mlr3 48 | ) 49 | 50 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 51 | expect_true(ParamTest, info = paste0( 52 | "\nMissing parameters in mlr3 param set:\n", 53 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 54 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 55 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 56 | ) 57 | }) 58 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.ranger.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.ranger", { 4 | learner = lrn("classif.ranger") 5 | fun = ranger::ranger 6 | exclude = c( 7 | "formula", # handled via mlr3 8 | "y", # handled via mlr3 9 | "x", # handled via mlr3 10 | "data", # handled via mlr3 11 | "probability", # handled via mlr3 12 | "case.weights", # handled via mlr3 13 | "local.importance", # handled via importance() method 14 | "class.weights", # handled via mlr3 15 | "inbag", # handled via mlr3 stratification 16 | "quantreg", # regression only 17 | "poisson.tau", # regression only 18 | "dependent.variable.name", # handled via mlr3 19 | "status.variable.name", # handled via mlr3 20 | "classification", # handled via mlr3 21 | "mtry.ratio", # custom hyperpar 22 | "time.interest", # survival only 23 | "alpha", # survival only 24 | "minprop" # survival only 25 | ) 26 | 27 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 28 | expect_true(ParamTest, info = paste0( 29 | "\nMissing parameters in mlr3 param set:\n", 30 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 31 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 32 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 33 | ) 34 | }) 35 | 36 | test_that("predict classif.ranger", { 37 | learner = lrn("classif.ranger") 38 | fun = ranger:::predict.ranger 39 | exclude = c( 40 | "quantiles", # not supported by classification 41 | "what", # not supported by classification 42 | "predict.all", # not supported in mlr3 43 | "formula", # handled via mlr3 44 | "object", # handled via mlr3 45 | "data", # handled via mlr3 46 | "type", # handled via mlr3 47 | "se.method" # regression only 48 | ) 49 | 50 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 51 | expect_true(ParamTest, info = paste0( 52 | "\nMissing parameters in mlr3 param set:\n", 53 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 54 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 55 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 56 | ) 57 | }) 58 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_regr.xgboost.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("regr.xgboost", { 4 | learner = lrn("regr.xgboost", nrounds = 1L) 5 | fun = list(xgboost::xgb.train, xgboost::xgb.params) 6 | exclude = c( 7 | "data", # handled by mlr3 8 | "params", # handled by mlr3 9 | "custom_metric", # handled by eval_metric parameter 10 | "learning_rate", # handled by eta parameter 11 | "min_split_loss", # handled by gamma parameter 12 | "reg_alpha", # handled by alpha parameter 13 | "reg_lambda", # handled by lambda parameter 14 | "multi_strategy", # not supported 15 | "num_class", # handled by mlr3 16 | "quantile_alpha", # handled by mlr3 17 | "aft_loss_distribution", # survival only 18 | "lambdarank_pair_method", # rank only 19 | "lambdarank_num_pair_per_sample", # rank only 20 | "lambdarank_normalization", # rank only 21 | "lambdarank_score_normalization", # rank only 22 | "lambdarank_unbiased", # rank only 23 | "lambdarank_bias_norm", # rank only 24 | "ndcg_exp_gain" # rank only 25 | ) 26 | 27 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 28 | expect_true(ParamTest, info = paste0( 29 | "\nMissing parameters in mlr3 param set:\n", 30 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 31 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 32 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 33 | ) 34 | }) 35 | 36 | test_that("predict regr.xgboost", { 37 | learner = lrn("regr.xgboost") 38 | fun = xgboost:::predict.xgb.Booster 39 | exclude = c( 40 | "object", # handled by mlr3 41 | "newdata", # handled by mlr3o 42 | "outputmargin", # not supported 43 | "predcontrib", # not supported 44 | "predinteraction", # not supported 45 | "predleaf", # not supported 46 | "avoid_transpose", # not supported 47 | "base_margin", # not supported 48 | "objective", # use by mlr3 not xgboost 49 | "strict_shape" # destroys prediction format 50 | ) 51 | 52 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 53 | expect_true(ParamTest, info = paste0( 54 | "\nMissing parameters in mlr3 param set:\n", 55 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 56 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 57 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 58 | ) 59 | }) 60 | 61 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_glmnet.R: -------------------------------------------------------------------------------- 1 | skip_on_os("solaris") # glmnet not working properly on solaris 2 | skip_if_not_installed("glmnet") 3 | 4 | test_that("autotest", { 5 | learner = mlr3::lrn("regr.glmnet", lambda = 0.1) 6 | expect_learner(learner) 7 | 8 | skip_on_os("solaris") 9 | result = run_autotest(learner, exclude = "feat_single") 10 | expect_true(result, info = result$error) 11 | }) 12 | 13 | test_that("selected_features", { 14 | task = tsk("mtcars") 15 | learner = lrn("regr.glmnet") 16 | learner$train(task) 17 | 18 | expect_equal( 19 | learner$selected_features(0), 20 | task$feature_names 21 | ) 22 | 23 | expect_equal( 24 | learner$selected_features(Inf), 25 | character() 26 | ) 27 | }) 28 | 29 | test_that("offset works", { 30 | with_seed(7832, { 31 | data = data.table(x = 1:50, z = runif(50), y = stats::rpois(50, lambda = 5)) 32 | offset_col = runif(50) 33 | data_with_offset = cbind(data, offset_col) 34 | }) 35 | 36 | task = as_task_regr(x = data, target = "y") 37 | task_with_offset = as_task_regr(x = data_with_offset, target = "y") 38 | task_with_offset$set_col_roles(cols = "offset_col", roles = "offset") 39 | part = partition(task) 40 | 41 | # train learner 42 | learner = lrn("regr.glmnet", lambda = 0.01, family = "poisson") 43 | learner$train(task, part$train) # no offset 44 | learner_offset = lrn("regr.glmnet", lambda = 0.01, family = "poisson") 45 | learner_offset$train(task_with_offset, part$train) # with offset (during training) 46 | 47 | # trained models are different 48 | expect_true(learner_offset$model$offset) # offset is used 49 | expect_false(learner$model$offset) # offset not used 50 | expect_false(all(learner$model$beta == learner_offset$model$beta)) 51 | 52 | # predict on test set (offset is used by default) 53 | p1 = learner_offset$predict(task_with_offset, part$test) 54 | # no offset during predict 55 | learner_offset$param_set$set_values(.values = list(use_pred_offset = FALSE)) 56 | p2 = learner_offset$predict(task_with_offset, part$test) 57 | # predictions are different 58 | expect_true(all(p1$response != p2$response)) 59 | expect_equal(p2$response * exp(offset_col[part$test]), p1$response) 60 | 61 | # using a task with offset on a learner that didn't use offset during training 62 | # results in the same prediction: offset is completely ignored 63 | p3 = learner$predict(task, part$test) 64 | p4 = learner$predict(task_with_offset, part$test) 65 | expect_equal(p3$response, p4$response) 66 | }) 67 | -------------------------------------------------------------------------------- /inst/paramtest/test_paramtest_classif.xgboost.R: -------------------------------------------------------------------------------- 1 | library(mlr3learners) 2 | 3 | test_that("classif.xgboost", { 4 | learner = lrn("classif.xgboost", nrounds = 1L) 5 | fun = list(xgboost::xgb.train, xgboost::xgb.params) 6 | exclude = c( 7 | "data", # handled by mlr3 8 | "params", # handled by mlr3 9 | "custom_metric", # handled by eval_metric parameter 10 | "learning_rate", # handled by eta parameter 11 | "min_split_loss", # handled by gamma parameter 12 | "reg_alpha", # handled by alpha parameter 13 | "reg_lambda", # handled by lambda parameter 14 | "multi_strategy", # not supported 15 | "num_class", # handled by mlr3 16 | "huber_slope", # regression only 17 | "quantile_alpha", # regression only 18 | "aft_loss_distribution", # survival only 19 | "lambdarank_pair_method", # rank only 20 | "lambdarank_num_pair_per_sample", # rank only 21 | "lambdarank_normalization", # rank only 22 | "lambdarank_score_normalization", # rank only 23 | "lambdarank_unbiased", # rank only 24 | "lambdarank_bias_norm", # rank only 25 | "ndcg_exp_gain" # rank only 26 | ) 27 | 28 | ParamTest = run_paramtest(learner, fun, exclude, tag = "train") 29 | expect_true(ParamTest, info = paste0( 30 | "\nMissing parameters in mlr3 param set:\n", 31 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 32 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 33 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 34 | ) 35 | }) 36 | 37 | test_that("predict classif.xgboost", { 38 | learner = lrn("classif.xgboost") 39 | fun = xgboost:::predict.xgb.Booster 40 | exclude = c( 41 | "object", # handled by mlr3 42 | "newdata", # handled by mlr3o 43 | "outputmargin", # not supported 44 | "predcontrib", # not supported 45 | "predinteraction", # not supported 46 | "predleaf", # not supported 47 | "avoid_transpose", # not supported 48 | "base_margin", # not supported 49 | "objective", # use by mlr3 not xgboost 50 | "strict_shape" # destroys prediction format 51 | ) 52 | 53 | ParamTest = run_paramtest(learner, fun, exclude, tag = "predict") 54 | expect_true(ParamTest, info = paste0( 55 | "\nMissing parameters in mlr3 param set:\n", 56 | paste0("- ", ParamTest$missing, "\n", collapse = ""), 57 | "\nOutdated param or param defined in additional control function not included in list of function definitions:\n", 58 | paste0("- ", ParamTest$extra, "\n", collapse = "")) 59 | ) 60 | }) 61 | 62 | -------------------------------------------------------------------------------- /R/LearnerClassifQDA.R: -------------------------------------------------------------------------------- 1 | #' @title Quadratic Discriminant Analysis Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.qda 4 | #' 5 | #' @description 6 | #' Quadratic discriminant analysis. 7 | #' Calls [MASS::qda()] from package \CRANpkg{MASS}. 8 | #' 9 | #' @details 10 | #' Parameters `method` and `prior` exist for training and prediction but 11 | #' accept different values for each. Therefore, arguments for 12 | #' the predict stage have been renamed to `predict.method` and `predict.prior`, 13 | #' respectively. 14 | #' 15 | #' @templateVar id classif.qda 16 | #' @template learner 17 | #' 18 | #' @references 19 | #' `r format_bib("venables_2002")` 20 | #' 21 | #' @export 22 | #' @template seealso_learner 23 | #' @template example 24 | LearnerClassifQDA = R6Class("LearnerClassifQDA", 25 | inherit = LearnerClassif, 26 | 27 | public = list( 28 | 29 | #' @description 30 | #' Creates a new instance of this [R6][R6::R6Class] class. 31 | initialize = function() { 32 | ps = ps( 33 | method = p_fct(c("moment", "mle", "mve", "t"), default = "moment", tags = "train"), 34 | nu = p_int(tags = "train", depends = quote(method == "t")), 35 | predict.method = p_fct(c("plug-in", "predictive", "debiased"), default = "plug-in", tags = "predict"), 36 | predict.prior = p_uty(tags = "predict"), 37 | prior = p_uty(tags = "train") 38 | ) 39 | 40 | super$initialize( 41 | id = "classif.qda", 42 | param_set = ps, 43 | predict_types = c("response", "prob"), 44 | feature_types = c("logical", "integer", "numeric", "factor", "ordered"), 45 | properties = c("twoclass", "multiclass"), 46 | packages = c("mlr3learners", "MASS"), 47 | label = "Quadratic Discriminant Analysis", 48 | man = "mlr3learners::mlr_learners_classif.qda" 49 | ) 50 | } 51 | ), 52 | 53 | private = list( 54 | .train = function(task) { 55 | invoke(MASS::qda, task$formula(), 56 | data = task$data(), 57 | .args = self$param_set$get_values(tags = "train")) 58 | }, 59 | 60 | .predict = function(task) { 61 | pv = self$param_set$get_values(tags = "predict") 62 | pv = rename(pv, c("predict.method", "predict.prior"), c("method", "prior")) 63 | 64 | newdata = ordered_features(task, self) 65 | p = invoke(predict, self$model, newdata = newdata, .args = pv) 66 | 67 | if (self$predict_type == "response") { 68 | list(response = p$class) 69 | } else { 70 | list(prob = p$posterior) 71 | } 72 | } 73 | ) 74 | ) 75 | 76 | #' @include aaa.R 77 | learners[["classif.qda"]] = LearnerClassifQDA 78 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | library(checkmate) 2 | library(mlr3) 3 | 4 | lapply(list.files(system.file("testthat", package = "mlr3"), 5 | pattern = "^helper.*\\.[rR]$", full.names = TRUE), source) 6 | 7 | compute_mu_sigma2 = function(model, task) { 8 | prediction_nodes = predict(model, data = task$data(), type = "terminalNodes", predict.all = TRUE) 9 | y = task$truth() 10 | observation_node_table = prediction_nodes$predictions 11 | n_trees = NCOL(observation_node_table) 12 | unique_nodes_per_tree = apply(observation_node_table, MARGIN = 2L, FUN = unique, simplify = FALSE) 13 | mu_sigma2_per_node_per_tree = lapply(seq_len(n_trees), function(tree) { 14 | nodes = unique_nodes_per_tree[[tree]] 15 | setNames(lapply(nodes, function(node) { 16 | y_tmp = y[observation_node_table[, tree] == node] 17 | c(mu = mean(y_tmp), sigma2 = if (length(y_tmp) > 1L) var(y_tmp) else 0) 18 | }), nm = nodes) 19 | }) 20 | list(mu_sigma2_per_node_per_tree = mu_sigma2_per_node_per_tree, prediction_nodes = prediction_nodes) 21 | } 22 | 23 | simple_var = function(learner, newdata, mu_sigma2_per_node_per_tree) { 24 | prediction_nodes = predict(learner$model$model, data = newdata, type = "terminalNodes", predict.all = TRUE) 25 | n_observations = NROW(prediction_nodes$predictions) 26 | n_trees = length(mu_sigma2_per_node_per_tree) 27 | response = numeric(n_observations) 28 | se = numeric(n_observations) 29 | for (i in seq_len(n_observations)) { 30 | mu_sigma2_per_tree = lapply(seq_len(n_trees), function(tree) { 31 | mu_sigma2_per_node_per_tree[[tree]][[as.character(prediction_nodes$predictions[i, tree])]] 32 | }) 33 | mus = sapply(mu_sigma2_per_tree, "[[", 1) 34 | response[i] = mean(mus) 35 | se[i] = if (length(mus) > 1) sqrt(var(mus)) else 0 36 | } 37 | list(response = response, se = se) 38 | } 39 | 40 | ltv = function(learner, newdata, mu_sigma2_per_node_per_tree) { 41 | prediction_nodes = predict(learner$model$model, data = newdata, type = "terminalNodes", predict.all = TRUE) 42 | n_observations = NROW(prediction_nodes$predictions) 43 | n_trees = length(mu_sigma2_per_node_per_tree) 44 | response = numeric(n_observations) 45 | se = numeric(n_observations) 46 | for (i in seq_len(n_observations)) { 47 | mu_sigma2_per_tree = lapply(seq_len(n_trees), function(tree) { 48 | mu_sigma2_per_node_per_tree[[tree]][[as.character(prediction_nodes$predictions[i, tree])]] 49 | }) 50 | mus = sapply(mu_sigma2_per_tree, "[[", 1) 51 | sigmas2 = sapply(mu_sigma2_per_tree, "[[", 2) 52 | response[i] = mean(mus) 53 | # law of total variance assuming a mixture of normal distributions for each tree 54 | se[i] = sqrt(mean((mus ^ 2) + sigmas2) - (response[i] ^ 2)) 55 | } 56 | list(response = response, se = se) 57 | } 58 | -------------------------------------------------------------------------------- /R/LearnerClassifLDA.R: -------------------------------------------------------------------------------- 1 | #' @title Linear Discriminant Analysis Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.lda 4 | #' 5 | #' @description 6 | #' Linear discriminant analysis. 7 | #' Calls [MASS::lda()] from package \CRANpkg{MASS}. 8 | #' 9 | #' @details 10 | #' Parameters `method` and `prior` exist for training and prediction but 11 | #' accept different values for each. Therefore, arguments for 12 | #' the predict stage have been renamed to `predict.method` and `predict.prior`, 13 | #' respectively. 14 | #' 15 | #' @templateVar id classif.lda 16 | #' @template learner 17 | #' 18 | #' @references 19 | #' `r format_bib("venables_2002")` 20 | #' 21 | #' @export 22 | #' @template seealso_learner 23 | #' @template example 24 | LearnerClassifLDA = R6Class("LearnerClassifLDA", 25 | inherit = LearnerClassif, 26 | 27 | public = list( 28 | 29 | #' @description 30 | #' Creates a new instance of this [R6][R6::R6Class] class. 31 | initialize = function() { 32 | ps = ps( 33 | dimen = p_uty(tags = "predict"), 34 | method = p_fct(c("moment", "mle", "mve", "t"), default = "moment", tags = "train"), 35 | nu = p_int(tags = "train", depends = quote(method == "t")), 36 | predict.method = p_fct(c("plug-in", "predictive", "debiased"), default = "plug-in", tags = "predict"), 37 | predict.prior = p_uty(tags = "predict"), 38 | prior = p_uty(tags = "train"), 39 | tol = p_dbl(tags = "train") 40 | ) 41 | 42 | super$initialize( 43 | id = "classif.lda", 44 | param_set = ps, 45 | predict_types = c("response", "prob"), 46 | feature_types = c("logical", "integer", "numeric", "factor", "ordered"), 47 | properties = c("twoclass", "multiclass"), 48 | packages = c("mlr3learners", "MASS"), 49 | label = "Linear Discriminant Analysis", 50 | man = "mlr3learners::mlr_learners_classif.lda" 51 | ) 52 | } 53 | ), 54 | 55 | private = list( 56 | .train = function(task) { 57 | pv = self$param_set$get_values(tags = "train") 58 | formula = task$formula() 59 | invoke(MASS::lda, formula, data = task$data(), .args = pv) 60 | }, 61 | 62 | .predict = function(task) { 63 | pv = self$param_set$get_values(tags = "predict") 64 | pv = rename(pv, c("predict.method", "predict.prior"), c("method", "prior")) 65 | newdata = ordered_features(task, self) 66 | 67 | p = invoke(predict, self$model, newdata = newdata, .args = pv) 68 | 69 | if (self$predict_type == "response") { 70 | list(response = p[["class"]]) 71 | } else { 72 | list(response = p[["class"]], prob = p[["posterior"]]) 73 | } 74 | } 75 | ) 76 | ) 77 | 78 | #' @include aaa.R 79 | learners[["classif.lda"]] = LearnerClassifLDA 80 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mlr3learners 2 | Title: Recommended Learners for 'mlr3' 3 | Version: 0.14.0.9000 4 | Authors@R: c( 5 | person("Michel", "Lang", , "michellang@gmail.com", role = "aut", 6 | comment = c(ORCID = "0000-0001-9754-0393")), 7 | person("Quay", "Au", , "quayau@gmail.com", role = "aut", 8 | comment = c(ORCID = "0000-0002-5252-8902")), 9 | person("Stefan", "Coors", , "mail@stefancoors.de", role = "aut", 10 | comment = c(ORCID = "0000-0002-7465-2146")), 11 | person("Patrick", "Schratz", , "patrick.schratz@gmail.com", role = "aut", 12 | comment = c(ORCID = "0000-0003-0748-6624")), 13 | person("Marc", "Becker", , "marcbecker@posteo.de", role = c("cre", "aut"), 14 | comment = c(ORCID = "0000-0002-8115-0400")), 15 | person("John", "Zobolas", , "bblodfon@gmail.com", role = "aut", 16 | comment = c(ORCID = "0000-0002-3609-8674")) 17 | ) 18 | Description: Recommended Learners for 'mlr3'. Extends 'mlr3' with 19 | interfaces to essential machine learning packages on CRAN. This 20 | includes, but is not limited to: (penalized) linear and logistic 21 | regression, linear and quadratic discriminant analysis, k-nearest 22 | neighbors, naive Bayes, support vector machines, and gradient 23 | boosting. 24 | License: LGPL-3 25 | URL: https://mlr3learners.mlr-org.com, 26 | https://github.com/mlr-org/mlr3learners 27 | BugReports: https://github.com/mlr-org/mlr3learners/issues 28 | Depends: 29 | mlr3 (>= 1.2.0), 30 | R (>= 3.3.0) 31 | Imports: 32 | checkmate, 33 | data.table, 34 | methods, 35 | mlr3misc (>= 0.9.4), 36 | paradox (>= 1.0.0), 37 | R6 38 | Suggests: 39 | DiceKriging, 40 | e1071, 41 | future, 42 | glmnet, 43 | kknn, 44 | knitr, 45 | lgr, 46 | MASS, 47 | mirai, 48 | nnet, 49 | pracma, 50 | ranger, 51 | rgenoud, 52 | rmarkdown, 53 | testthat (>= 3.0.0), 54 | xgboost (>= 3.1.2.1) 55 | Config/testthat/edition: 3 56 | Encoding: UTF-8 57 | NeedsCompilation: yes 58 | Roxygen: list(markdown = TRUE) 59 | RoxygenNote: 7.3.3 60 | Collate: 61 | 'aaa.R' 62 | 'LearnerClassifCVGlmnet.R' 63 | 'LearnerClassifGlmnet.R' 64 | 'LearnerClassifKKNN.R' 65 | 'LearnerClassifLDA.R' 66 | 'LearnerClassifLogReg.R' 67 | 'LearnerClassifMultinom.R' 68 | 'LearnerClassifNaiveBayes.R' 69 | 'LearnerClassifNnet.R' 70 | 'LearnerClassifQDA.R' 71 | 'LearnerClassifRanger.R' 72 | 'LearnerClassifSVM.R' 73 | 'LearnerClassifXgboost.R' 74 | 'LearnerRegrCVGlmnet.R' 75 | 'LearnerRegrGlmnet.R' 76 | 'LearnerRegrKKNN.R' 77 | 'LearnerRegrKM.R' 78 | 'LearnerRegrLM.R' 79 | 'LearnerRegrNnet.R' 80 | 'LearnerRegrRanger.R' 81 | 'LearnerRegrSVM.R' 82 | 'LearnerRegrXgboost.R' 83 | 'bibentries.R' 84 | 'helpers.R' 85 | 'helpers_glmnet.R' 86 | 'helpers_ranger.R' 87 | 'zzz.R' 88 | -------------------------------------------------------------------------------- /tests/testthat/test_regr_lm.R: -------------------------------------------------------------------------------- 1 | test_that("autotest", { 2 | learner = mlr3::lrn("regr.lm") 3 | expect_learner(learner) 4 | result = run_autotest(learner) 5 | expect_true(result, info = result$error) 6 | }) 7 | 8 | test_that("contrasts", { 9 | task = tsk("mtcars") 10 | learner = mlr3::lrn("regr.lm") 11 | 12 | learner$train(task) 13 | coefs1 = coef(learner$model) 14 | names(coefs1) 15 | 16 | opts = list(contrasts = c(ordered = "contr.poly", unordered = "contr.poly")) 17 | old_opts = options(opts) 18 | on.exit(options(old_opts)) 19 | 20 | learner$train(task) 21 | coefs2 = coef(learner$model) 22 | 23 | expect_setequal(names(coefs1), names(coefs2)) 24 | }) 25 | 26 | test_that("offset works", { 27 | with_seed(7832, { 28 | data = data.table(x = 1:10, y = stats::rpois(10, lambda = 5)) 29 | offset_col = runif(10) 30 | data_with_offset = cbind(data, offset_col) 31 | }) 32 | 33 | task = as_task_regr(x = data, target = "y") 34 | task_with_offset = as_task_regr(x = data_with_offset, target = "y") 35 | task_with_offset$set_col_roles(cols = "offset_col", roles = "offset") 36 | part = partition(task) 37 | 38 | # train learner 39 | learner = lrn("regr.lm") 40 | learner$train(task, part$train) # no offset 41 | learner_offset = lrn("regr.lm") 42 | learner_offset$train(task_with_offset, part$train) # with offset (during training) 43 | 44 | # trained models are different 45 | expect_numeric(learner_offset$model$offset) # offset is used 46 | expect_null(learner$model$offset) # offset not used 47 | expect_false(all(learner$model$coefficients == learner_offset$model$coefficients)) 48 | 49 | # check: we get same trained model manually using the formula interface 50 | model = stats::lm(y ~ x + offset(offset_col), data = data_with_offset, subset = part$train) 51 | expect_equal(model$coefficients, learner_offset$model$coefficients) 52 | 53 | # predict on test set (offset is used by default) 54 | p1 = learner_offset$predict(task_with_offset, part$test) 55 | # same thing manually 56 | res = unname(predict(model, newdata = data_with_offset[part$test, ])) 57 | expect_equal(p1$response, res) 58 | # use offset during predict 59 | learner_offset$param_set$set_values(.values = list(use_pred_offset = FALSE)) 60 | p2 = learner_offset$predict(task_with_offset, part$test) 61 | # predictions are different 62 | expect_true(all(p1$response != p2$response)) 63 | # offset was added to the response 64 | expect_equal(p2$response + offset_col[part$test], p1$response) 65 | # verify predictions manually 66 | res = unname(predict(model, newdata = cbind(data[part$test, ], offset_col = 0))) 67 | expect_equal(p2$response, res) 68 | 69 | # using a task with offset on a learner that didn't use offset during training 70 | # results in the same prediction: offset is completely ignored 71 | p3 = learner$predict(task, part$test) 72 | p4 = learner$predict(task_with_offset, part$test) 73 | expect_equal(p3$response, p4$response) 74 | }) 75 | -------------------------------------------------------------------------------- /R/LearnerRegrKKNN.R: -------------------------------------------------------------------------------- 1 | #' @title k-Nearest-Neighbor Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.kknn 4 | #' 5 | #' @description 6 | #' k-Nearest-Neighbor regression. 7 | #' Calls [kknn::kknn()] from package \CRANpkg{kknn}. 8 | #' 9 | #' @section Initial parameter values: 10 | #' - `store_model`: 11 | #' - See note. 12 | #' 13 | #' @template note_kknn 14 | #' 15 | #' @templateVar id regr.kknn 16 | #' @template learner 17 | #' 18 | #' @references 19 | #' `r format_bib("hechenbichler_2004", "samworth_2012", "cover_1967")` 20 | #' 21 | #' @export 22 | #' @template seealso_learner 23 | #' @template example 24 | LearnerRegrKKNN = R6Class("LearnerRegrKKNN", 25 | inherit = LearnerRegr, 26 | public = list( 27 | 28 | #' @description 29 | #' Creates a new instance of this [R6][R6::R6Class] class. 30 | initialize = function() { 31 | ps = ps( 32 | k = p_int(default = 7L, lower = 1L, tags = "train"), 33 | distance = p_dbl(0, default = 2, tags = "train"), 34 | kernel = p_fct(c("rectangular", "triangular", "epanechnikov", "biweight", "triweight", "cos", "inv", "gaussian", "rank", "optimal"), default = "optimal", tags = "train"), 35 | scale = p_lgl(default = TRUE, tags = "train"), 36 | ykernel = p_uty(default = NULL, tags = "train"), 37 | store_model = p_lgl(default = FALSE, tags = "train") 38 | ) 39 | 40 | ps$set_values(k = 7L) 41 | 42 | super$initialize( 43 | id = "regr.kknn", 44 | param_set = ps, 45 | feature_types = c("logical", "integer", "numeric", "factor", "ordered"), 46 | packages = c("mlr3learners", "kknn"), 47 | label = "k-Nearest-Neighbor", 48 | man = "mlr3learners::mlr_learners_regr.kknn" 49 | ) 50 | } 51 | ), 52 | 53 | private = list( 54 | .train = function(task) { 55 | # https://github.com/mlr-org/mlr3learners/issues/191 56 | pv = self$param_set$get_values(tags = "train") 57 | if (pv$k >= task$nrow) { 58 | stopf("Parameter k = %i must be smaller than the number of observations n = %i", 59 | pv$k, task$nrow) 60 | } 61 | 62 | list( 63 | formula = task$formula(), 64 | data = task$data(), 65 | pv = pv, 66 | kknn = NULL 67 | ) 68 | }, 69 | 70 | .predict = function(task) { 71 | model = self$model 72 | newdata = ordered_features(task, self) 73 | pv = insert_named(model$pv, self$param_set$get_values(tags = "predict")) 74 | 75 | with_package("kknn", { # https://github.com/KlausVigo/kknn/issues/16 76 | p = invoke(kknn::kknn, 77 | formula = model$formula, train = model$data, 78 | test = newdata, .args = remove_named(pv, "store_model")) 79 | }) 80 | 81 | if (isTRUE(pv$store_model)) { 82 | self$state$model$kknn = p 83 | } 84 | 85 | list(response = p$fitted.values) 86 | } 87 | ) 88 | ) 89 | 90 | #' @include aaa.R 91 | learners[["regr.kknn"]] = LearnerRegrKKNN 92 | -------------------------------------------------------------------------------- /R/LearnerClassifKKNN.R: -------------------------------------------------------------------------------- 1 | #' @title k-Nearest-Neighbor Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.kknn 4 | #' 5 | #' @description 6 | #' k-Nearest-Neighbor classification. 7 | #' Calls [kknn::kknn()] from package \CRANpkg{kknn}. 8 | #' 9 | #' @section Initial parameter values: 10 | #' - `store_model`: 11 | #' - See note. 12 | #' 13 | #' @template note_kknn 14 | #' 15 | #' @templateVar id classif.kknn 16 | #' @template learner 17 | #' 18 | #' @references 19 | #' `r format_bib("hechenbichler_2004", "samworth_2012", "cover_1967")` 20 | #' 21 | #' @export 22 | #' @template seealso_learner 23 | #' @template example 24 | LearnerClassifKKNN = R6Class("LearnerClassifKKNN", 25 | inherit = LearnerClassif, 26 | public = list( 27 | 28 | #' @description 29 | #' Creates a new instance of this [R6][R6::R6Class] class. 30 | initialize = function() { 31 | ps = ps( 32 | k = p_int(default = 7L, lower = 1L, tags = "train"), 33 | distance = p_dbl(0, default = 2, tags = "train"), 34 | kernel = p_fct(c("rectangular", "triangular", "epanechnikov", "biweight", "triweight", "cos", "inv", "gaussian", "rank", "optimal"), default = "optimal", tags = "train"), 35 | scale = p_lgl(default = TRUE, tags = "train"), 36 | ykernel = p_uty(default = NULL, tags = "train"), 37 | store_model = p_lgl(default = FALSE, tags = "train") 38 | ) 39 | ps$set_values(k = 7L) 40 | 41 | super$initialize( 42 | id = "classif.kknn", 43 | param_set = ps, 44 | predict_types = c("response", "prob"), 45 | feature_types = c("logical", "integer", "numeric", "factor", "ordered"), 46 | properties = c("twoclass", "multiclass"), 47 | packages = c("mlr3learners", "kknn"), 48 | label = "k-Nearest-Neighbor", 49 | man = "mlr3learners::mlr_learners_classif.kknn" 50 | ) 51 | } 52 | ), 53 | 54 | private = list( 55 | .train = function(task) { 56 | # https://github.com/mlr-org/mlr3learners/issues/191 57 | pv = self$param_set$get_values(tags = "train") 58 | if (pv$k >= task$nrow) { 59 | stopf("Parameter k = %i must be smaller than the number of observations (n = %i)", 60 | pv$k, task$nrow) 61 | } 62 | 63 | list( 64 | formula = task$formula(), 65 | data = task$data(), 66 | pv = pv, 67 | kknn = NULL 68 | ) 69 | }, 70 | 71 | .predict = function(task) { 72 | model = self$state$model 73 | newdata = ordered_features(task, self) 74 | pv = insert_named(model$pv, self$param_set$get_values(tags = "predict")) 75 | 76 | with_package("kknn", { # https://github.com/KlausVigo/kknn/issues/16 77 | p = invoke(kknn::kknn, 78 | formula = model$formula, train = model$data, 79 | test = newdata, .args = remove_named(pv, "store_model")) 80 | }) 81 | 82 | if (isTRUE(pv$store_model)) { 83 | self$state$model$kknn = p 84 | } 85 | 86 | if (self$predict_type == "response") { 87 | list(response = p$fitted.values) 88 | } else { 89 | list(prob = p$prob) 90 | } 91 | } 92 | ) 93 | ) 94 | 95 | #' @include aaa.R 96 | learners[["classif.kknn"]] = LearnerClassifKKNN 97 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_log_reg.R: -------------------------------------------------------------------------------- 1 | test_that("autotest", { 2 | learner = mlr3::lrn("classif.log_reg") 3 | expect_learner(learner) 4 | result = run_autotest(learner) 5 | expect_true(result, info = result$error) 6 | }) 7 | 8 | test_that("class labels are correctly encoded", { 9 | task = tsk("sonar") 10 | learner = lrn("classif.log_reg") 11 | 12 | task$positive = "M" 13 | suppressWarnings(learner$train(task)) 14 | expect_equal(unname(learner$model$y), rep(0:1, c(97, 111))) 15 | 16 | task$positive = "R" 17 | suppressWarnings(learner$train(task)) 18 | expect_equal(unname(learner$model$y), rep(1:0, c(97, 111))) 19 | }) 20 | 21 | test_that("offset works", { 22 | with_seed(7832, { 23 | data = data.table(x = 1:30, y = stats::rbinom(30, size = 1, prob = 0.5)) 24 | offset_col = runif(30) 25 | data_with_offset = cbind(data, offset_col) 26 | }) 27 | 28 | task = as_task_classif(x = data, target = "y", positive = "1") 29 | task_with_offset = as_task_classif(x = data_with_offset, target = "y", positive = "1") 30 | task_with_offset$set_col_roles(cols = "offset_col", roles = "offset") 31 | part = partition(task) 32 | 33 | # train learner 34 | learner = lrn("classif.log_reg", predict_type = "prob") 35 | learner$train(task, part$train) # no offset 36 | learner_offset = lrn("classif.log_reg", predict_type = "prob") 37 | learner_offset$train(task_with_offset, part$train) # with offset (during training) 38 | 39 | # trained models are different 40 | expect_numeric(learner_offset$model$offset) # offset is used 41 | expect_null(learner$model$offset) # offset not used 42 | expect_false(all(learner$model$coefficients == learner_offset$model$coefficients)) 43 | 44 | # check: we get same trained model manually using the formula interface 45 | model = stats::glm(y ~ x + offset(offset_col), family = "binomial", 46 | data = data_with_offset, subset = part$train) 47 | expect_equal(model$coefficients, learner_offset$model$coefficients) 48 | 49 | # predict on test set (offset is used by default) 50 | p1 = learner_offset$predict(task_with_offset, part$test) 51 | # same thing manually 52 | res = unname(predict(model, type = "response", 53 | newdata = data_with_offset[part$test, ])) 54 | prob_offset = p1$prob[, "1"] 55 | expect_equal(prob_offset, res) 56 | # no offset during predict 57 | learner_offset$param_set$set_values(.values = list(use_pred_offset = FALSE)) 58 | p2 = learner_offset$predict(task_with_offset, part$test) 59 | prob = p2$prob[, "1"] 60 | off = offset_col[part$test] 61 | # predictions are different 62 | expect_true(all(prob_offset != prob)) 63 | # but connected via: 64 | expect_equal(log(prob_offset/(1 - prob_offset)), log(prob/(1 - prob)) + off) 65 | 66 | # verify predictions manually 67 | res = unname(predict(model, type = "response", 68 | newdata = cbind(data[part$test, ], offset_col = 0))) 69 | expect_equal(prob, res) 70 | 71 | # using a task with offset on a learner that didn't use offset during training 72 | # results in the same prediction: offset is completely ignored 73 | p3 = learner$predict(task, part$test) 74 | p4 = learner$predict(task_with_offset, part$test) 75 | expect_equal(p3$prob, p4$prob) 76 | }) 77 | -------------------------------------------------------------------------------- /R/LearnerRegrSVM.R: -------------------------------------------------------------------------------- 1 | #' @title Support Vector Machine 2 | #' 3 | #' @name mlr_learners_regr.svm 4 | #' 5 | #' @description 6 | #' Support vector machine for regression. 7 | #' Calls [e1071::svm()] from package \CRANpkg{e1071}. 8 | #' 9 | #' @templateVar id regr.svm 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("cortes_1995")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerRegrSVM = R6Class("LearnerRegrSVM", 19 | inherit = LearnerRegr, 20 | public = list( 21 | 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | ps = ps( 26 | cachesize = p_dbl(default = 40L, tags = "train"), 27 | coef0 = p_dbl(default = 0, tags = "train", depends = quote(kernel %in% c("polynomial", "sigmoid"))), 28 | cost = p_dbl(0, default = 1, tags = "train", depends = quote(type %in% c("eps-regression", "nu-regression"))), 29 | cross = p_int(0L, default = 0L, tags = "train"), # tunable = FALSE), 30 | degree = p_int(1L, default = 3L, tags = "train", depends = quote(kernel == "polynomial")), 31 | epsilon = p_dbl(0, default = 0.1, tags = "train", depends = quote(type == "eps-regression")), 32 | fitted = p_lgl(default = TRUE, tags = "train"), # tunable = FALSE), 33 | gamma = p_dbl(0, tags = "train", depends = quote(kernel %in% c("polynomial", "radial", "sigmoid"))), 34 | kernel = p_fct(c("linear", "polynomial", "radial", "sigmoid"), default = "radial", tags = "train"), 35 | nu = p_dbl(default = 0.5, tags = "train", depends = quote(type == "nu-regression")), 36 | scale = p_uty(default = TRUE, tags = "train"), 37 | shrinking = p_lgl(default = TRUE, tags = "train"), 38 | tolerance = p_dbl(0, default = 0.001, tags = "train"), 39 | type = p_fct(c("eps-regression", "nu-regression"), default = "eps-regression", tags = "train") 40 | ) 41 | 42 | super$initialize( 43 | id = "regr.svm", 44 | param_set = ps, 45 | feature_types = c("logical", "integer", "numeric"), 46 | packages = c("mlr3learners", "e1071"), 47 | label = "Support Vector Machine", 48 | man = "mlr3learners::mlr_learners_regr.svm" 49 | ) 50 | } 51 | ), 52 | 53 | private = list( 54 | .train = function(task) { 55 | pv = self$param_set$get_values(tags = "train") 56 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 57 | 58 | invoke(e1071::svm, x = data, y = task$truth(), .args = pv) 59 | }, 60 | 61 | .predict = function(task) { 62 | pv = self$param_set$get_values(tags = "predict") 63 | newdata = as_numeric_matrix(ordered_features(task, self)) 64 | response = invoke(predict, self$model, newdata = newdata, type = "response", .args = pv) 65 | list(response = response) 66 | } 67 | ) 68 | ) 69 | 70 | #' @export 71 | default_values.LearnerRegrSVM = function(x, search_space, task, ...) { # nolint 72 | special_defaults = list( 73 | gamma = 1 / length(task$feature_names) 74 | ) 75 | defaults = insert_named(default_values(x$param_set), special_defaults) 76 | defaults[["degree"]] = NULL 77 | defaults[search_space$ids()] 78 | } 79 | 80 | #' @include aaa.R 81 | learners[["regr.svm"]] = LearnerRegrSVM 82 | -------------------------------------------------------------------------------- /R/LearnerRegrNnet.R: -------------------------------------------------------------------------------- 1 | #' @title Neural Network Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.nnet 4 | #' 5 | #' @description 6 | #' Single Layer Neural Network. 7 | #' Calls [nnet::nnet.formula()] from package \CRANpkg{nnet}. 8 | #' 9 | #' Note that modern neural networks with multiple layers are connected 10 | #' via package [mlr3torch](https://github.com/mlr-org/mlr3torch). 11 | #' 12 | #' @templateVar id regr.nnet 13 | #' @template learner 14 | #' 15 | #' @section Initial parameter values: 16 | #' - `size`: 17 | #' - Adjusted default: 3L. 18 | #' - Reason for change: no default in `nnet()`. 19 | #' 20 | #' @section Custom mlr3 parameters: 21 | #' - `formula`: if not provided, the formula is set to `task$formula()`. 22 | #' 23 | #' @references 24 | #' `r format_bib("ripley_1996")` 25 | #' 26 | #' @export 27 | #' @template seealso_learner 28 | #' @template example 29 | LearnerRegrNnet = R6Class("LearnerRegrNnet", 30 | inherit = LearnerRegr, 31 | public = list( 32 | #' @description 33 | #' Creates a new instance of this [R6][R6::R6Class] class. 34 | initialize = function() { 35 | 36 | ps = ps( 37 | Hess = p_lgl(default = FALSE, tags = "train"), 38 | MaxNWts = p_int(1L, default = 1000L, tags = "train"), 39 | Wts = p_uty(tags = "train"), 40 | abstol = p_dbl(default = 1.0e-4, tags = "train"), 41 | censored = p_lgl(default = FALSE, tags = "train"), 42 | contrasts = p_uty(default = NULL, tags = "train"), 43 | decay = p_dbl(default = 0, tags = "train"), 44 | mask = p_uty(tags = "train"), 45 | maxit = p_int(1L, default = 100L, tags = "train"), 46 | na.action = p_uty(tags = "train"), 47 | rang = p_dbl(default = 0.7, tags = "train"), 48 | reltol = p_dbl(default = 1.0e-8, tags = "train"), 49 | size = p_int(0L, default = 3L, tags = "train"), 50 | skip = p_lgl(default = FALSE, tags = "train"), 51 | subset = p_uty(tags = "train"), 52 | trace = p_lgl(default = TRUE, tags = "train"), 53 | formula = p_uty(tags = "train") 54 | ) 55 | ps$set_values(size = 3L) 56 | 57 | super$initialize( 58 | id = "regr.nnet", 59 | packages = c("mlr3learners", "nnet"), 60 | feature_types = c("logical", "numeric", "factor", "ordered", "integer"), 61 | predict_types = "response", 62 | param_set = ps, 63 | properties = "weights", 64 | label = "Single Layer Neural Network", 65 | man = "mlr3learners::mlr_learners_regr.nnet" 66 | ) 67 | } 68 | ), 69 | 70 | private = list( 71 | .train = function(task) { 72 | pv = self$param_set$get_values(tags = "train") 73 | pv$weights = get_weights(task, private) 74 | 75 | if (is.null(pv$formula)) { 76 | pv$formula = task$formula() 77 | } 78 | data = task$data() 79 | # force linout = TRUE for regression 80 | invoke(nnet::nnet.formula, data = data, linout = TRUE, .args = pv) 81 | }, 82 | 83 | .predict = function(task) { 84 | pv = self$param_set$get_values(tags = "predict") 85 | newdata = ordered_features(task, self) 86 | 87 | prediction = invoke(predict, self$model, newdata = newdata, .args = pv) 88 | list(response = as.numeric(prediction)) 89 | } 90 | ) 91 | ) 92 | 93 | #' @include aaa.R 94 | learners[["regr.nnet"]] = LearnerRegrNnet 95 | -------------------------------------------------------------------------------- /R/helpers_glmnet.R: -------------------------------------------------------------------------------- 1 | glmnet_get_lambda = function(self, pv) { 2 | if (is.null(self$model)) { 3 | stopf("Learner '%s' has no model stored", self$id) 4 | } 5 | 6 | pv = pv %??% self$param_set$get_values(tags = "predict") 7 | s = pv$s 8 | 9 | if (is.character(s)) { 10 | self$model[[s]] 11 | } else if (is.numeric(s)) { 12 | s 13 | } else { # null / missing 14 | if (inherits(self$model, "cv.glmnet")) { 15 | self$model[["lambda.1se"]] 16 | } else if (length(self$model$lambda) == 1L) { 17 | self$model$lambda 18 | } else { 19 | default = self$param_set$default$s 20 | warningf("Multiple lambdas have been fit. Lambda will be set to %s (see parameter 's').", default) 21 | default 22 | } 23 | } 24 | } 25 | 26 | 27 | glmnet_feature_names = function(model) { 28 | beta = model$beta 29 | if (is.null(beta)) { 30 | beta = model$glmnet.fit$beta 31 | } 32 | 33 | rownames(if (is.list(beta)) beta[[1L]] else beta) 34 | } 35 | 36 | 37 | glmnet_selected_features = function(self, lambda = NULL) { 38 | if (is.null(self$model)) { 39 | stopf("No model stored") 40 | } 41 | 42 | assert_number(lambda, null.ok = TRUE, lower = 0) 43 | lambda = lambda %??% glmnet_get_lambda(self) 44 | nonzero = predict(self$model, type = "nonzero", s = lambda) 45 | if (is.data.frame(nonzero)) { 46 | nonzero = nonzero[[1L]] 47 | } else { 48 | nonzero = unlist(map(nonzero, 1L), use.names = FALSE) 49 | nonzero = if (length(nonzero)) sort(unique(nonzero)) else integer() 50 | } 51 | 52 | glmnet_feature_names(self$model)[nonzero] 53 | } 54 | 55 | 56 | glmnet_invoke = function(data, target, pv, cv = FALSE) { 57 | # glmnet for some reasons sets these parameters globally, therefore 58 | # we reset them afterwards 59 | saved_ctrl = glmnet::glmnet.control() 60 | on.exit(invoke(glmnet::glmnet.control, .args = saved_ctrl)) 61 | glmnet::glmnet.control(factory = TRUE) 62 | is_ctrl_pars = names(pv) %in% names(saved_ctrl) 63 | 64 | if (any(is_ctrl_pars)) { 65 | invoke(glmnet::glmnet.control, .args = pv[is_ctrl_pars]) 66 | pv = pv[!is_ctrl_pars] 67 | } 68 | 69 | invoke( 70 | if (cv) glmnet::cv.glmnet else glmnet::glmnet, 71 | x = data, y = target, .args = pv 72 | ) 73 | } 74 | 75 | glmnet_set_offset = function(task, phase = "train", pv) { 76 | assert_choice(phase, c("train", "predict")) 77 | 78 | if ("offset" %nin% task$properties) return(pv) 79 | 80 | use_pred_offset = isTRUE(pv$use_pred_offset) 81 | is_train = phase == "train" 82 | 83 | # regression 84 | if (task$task_type == "regr") { 85 | pv[[if (is_train) "offset" else "newoffset"]] = 86 | if (is_train || use_pred_offset) task$offset$offset else rep(0, task$nrow) 87 | } 88 | 89 | # classification 90 | if (task$task_type == "classif") { 91 | is_twoclass = length(task$class_names) == 2L 92 | offset_cols = paste0("offset_", task$class_names) 93 | 94 | if (is_twoclass) { 95 | pv[[if (is_train) "offset" else "newoffset"]] = 96 | if (is_train || use_pred_offset) task$offset$offset else rep(0, task$nrow) 97 | } else { 98 | pv[[if (is_train) "offset" else "newoffset"]] = 99 | if (is_train || use_pred_offset) as_numeric_matrix(task$offset)[, offset_cols] 100 | else matrix(0, nrow = task$nrow, ncol = length(task$class_names)) 101 | } 102 | } 103 | 104 | pv 105 | } 106 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_glmnet.R: -------------------------------------------------------------------------------- 1 | skip_on_os("solaris") # glmnet not working properly on solaris 2 | skip_if_not_installed("glmnet") 3 | 4 | test_that("autotest", { 5 | learner = mlr3::lrn("classif.glmnet", lambda = 0.1) 6 | expect_learner(learner) 7 | 8 | skip_on_os("solaris") 9 | result = run_autotest(learner, exclude = "feat_single") 10 | expect_true(result, info = result$error) 11 | }) 12 | 13 | test_that("prob column reordering (#155)", { 14 | task = tsk("sonar") 15 | learner = mlr3::lrn("classif.glmnet", predict_type = "prob", lambda = 0.1) 16 | 17 | task$positive = "M" 18 | learner$train(task) 19 | p = learner$predict(task) 20 | expect_gt(p$score(msr("classif.acc")), 0.6) 21 | 22 | task$positive = "R" 23 | learner$train(task) 24 | p = learner$predict(task) 25 | expect_gt(p$score(msr("classif.acc")), 0.6) 26 | }) 27 | 28 | test_that("same label ordering as in glm() / log_reg", { 29 | task = with_seed(123, tgen("2dnormals")$generate(100)) 30 | for (pos in task$class_names) { 31 | task$positive = pos 32 | 33 | l1 = lrn("classif.log_reg") 34 | l2 = lrn("classif.glmnet", lambda = 0) 35 | l1$train(task) 36 | l2$train(task) 37 | 38 | expect_equal(sign(as.numeric(coef(l1$model))), sign(as.numeric(coef(l2$model))), 39 | info = sprintf("positive label = %s", pos)) 40 | } 41 | }) 42 | 43 | test_that("selected_features", { 44 | task = tsk("iris") 45 | learner = lrn("classif.glmnet") 46 | learner$train(task) 47 | 48 | expect_equal( 49 | learner$selected_features(0), 50 | task$feature_names 51 | ) 52 | 53 | expect_equal( 54 | learner$selected_features(Inf), 55 | character() 56 | ) 57 | }) 58 | 59 | test_that("offset works", { 60 | with_seed(7832, { 61 | data = data.table(x = 1:50, z = runif(50), y = stats::rbinom(50, size = 1, prob = 0.5)) 62 | offset_col = runif(50) 63 | data_with_offset = cbind(data, offset_col) 64 | }) 65 | 66 | task = as_task_classif(x = data, target = "y", positive = "1") 67 | task_with_offset = as_task_classif(x = data_with_offset, target = "y", positive = "1") 68 | task_with_offset$set_col_roles(cols = "offset_col", roles = "offset") 69 | part = partition(task) 70 | 71 | # train learner 72 | learner = lrn("classif.glmnet", predict_type = "prob", lambda = 0.01) 73 | learner$train(task, part$train) # no offset 74 | learner_offset = lrn("classif.glmnet", predict_type = "prob", lambda = 0.01) 75 | learner_offset$train(task_with_offset, part$train) # with offset (during training) 76 | 77 | # trained models are different 78 | expect_true(learner_offset$model$offset) # offset is used 79 | expect_false(learner$model$offset) # offset not used 80 | expect_false(all(learner$model$beta == learner_offset$model$beta)) 81 | 82 | # predict on test set (offset is used by default) 83 | p1 = learner_offset$predict(task_with_offset, part$test) 84 | # no offset during predict 85 | learner_offset$param_set$set_values(.values = list(use_pred_offset = FALSE)) 86 | p2 = learner_offset$predict(task_with_offset, part$test) 87 | prob_offset = p1$prob[, "1"] 88 | prob = p2$prob[, "1"] 89 | off = offset_col[part$test] 90 | # predictions are different 91 | expect_true(all(prob != prob_offset)) 92 | # but connected via: 93 | expect_equal(log(prob_offset/(1 - prob_offset)), log(prob/(1 - prob)) + off) 94 | 95 | # using a task with offset on a learner that didn't use offset during training 96 | # results in the same prediction: offset is completely ignored 97 | p3 = learner$predict(task, part$test) 98 | p4 = learner$predict(task_with_offset, part$test) 99 | expect_equal(p3$prob, p4$prob) 100 | }) 101 | -------------------------------------------------------------------------------- /tests/testthat/test_classif_ranger.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("ranger") 2 | 3 | test_that("autotest", { 4 | learner = mlr3::lrn("classif.ranger") 5 | expect_learner(learner) 6 | learner$param_set$set_values(num.trees = 30L, importance = "impurity") 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("hotstart", { 12 | task = tsk("iris") 13 | 14 | learner_1 = lrn("classif.ranger", num.trees = 1000L) 15 | learner_1$train(task) 16 | expect_equal(learner_1$state$param_vals$num.trees, 1000L) 17 | expect_equal(learner_1$model$num.trees, 1000L) 18 | 19 | hot = HotstartStack$new(learner_1) 20 | 21 | learner_2 = lrn("classif.ranger", num.trees = 500L) 22 | expect_equal(hot$start_cost(learner_2, task$hash), 0L) 23 | learner_2$train(task) 24 | expect_equal(learner_2$model$num.trees, 500L) 25 | expect_equal(learner_2$param_set$values$num.trees, 500L) 26 | expect_equal(learner_2$state$param_vals$num.trees, 500L) 27 | 28 | learner_3 = lrn("classif.ranger", num.trees = 1500L) 29 | expect_equal(hot$start_cost(learner_3, task$hash), NA_real_) 30 | 31 | learner_4 = lrn("classif.ranger", num.trees = 1000L) 32 | expect_equal(hot$start_cost(learner_4, task$hash), -1L) 33 | learner_4$train(task) 34 | expect_equal(learner_4$model$num.trees, 1000L) 35 | expect_equal(learner_4$param_set$values$num.trees, 1000L) 36 | expect_equal(learner_4$state$param_vals$num.trees, 1000L) 37 | }) 38 | 39 | test_that("mtry.ratio", { 40 | task = mlr3::tsk("sonar") 41 | learner = mlr3::lrn("classif.ranger", mtry.ratio = 0.5) 42 | 43 | res = convert_ratio(learner$param_set$values, "mtry", "mtry.ratio", length(task$feature_names)) 44 | expect_equal( 45 | res$mtry, 46 | 30 47 | ) 48 | expect_null(res$mtry.ratio) 49 | 50 | learner$train(task) 51 | expect_equal( 52 | learner$model$mtry, 53 | 30 54 | ) 55 | }) 56 | 57 | test_that("convert_ratio", { 58 | task = tsk("sonar") 59 | learner = lrn("classif.ranger", num.trees = 5, mtry.ratio = 0.5) 60 | expect_equal(learner$train(task)$model$mtry, 30) 61 | 62 | learner$param_set$values$mtry.ratio = 0 63 | expect_equal(learner$train(task)$model$mtry, 1) 64 | 65 | learner$param_set$values$mtry.ratio = 1 66 | expect_equal(learner$train(task)$model$mtry, 60) 67 | 68 | learner$param_set$values$mtry = 10 69 | expect_error(learner$train(task), "exclusive") 70 | 71 | learner$param_set$values$mtry.ratio = NULL 72 | expect_equal(learner$train(task)$model$mtry, 10) 73 | 74 | learner$param_set$values$mtry = 10 75 | expect_equal(learner$train(task)$model$mtry, 10) 76 | }) 77 | 78 | test_that("default_values", { 79 | learner = lrn("classif.ranger") 80 | search_space = ps( 81 | replace = p_lgl(), 82 | sample.fraction = p_dbl(0.1, 1), 83 | num.trees = p_int(1, 2000), 84 | mtry.ratio = p_dbl(0, 1) 85 | ) 86 | task = tsk("pima") 87 | 88 | values = default_values(learner, search_space, task) 89 | expect_names(names(values), permutation.of = c("replace", "sample.fraction", "num.trees", "mtry.ratio")) 90 | }) 91 | 92 | test_that("selected_features", { 93 | learner = lrn("classif.ranger") 94 | expect_error(learner$selected_features()) 95 | 96 | task = tsk("iris") 97 | learner$train(task) 98 | expect_set_equal(learner$selected_features(), c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width")) 99 | }) 100 | 101 | test_that("oob_error available without stored model", { 102 | task = tsk("pima") 103 | learner = lrn("classif.ranger") 104 | 105 | rr = resample(task, learner, rsmp("holdout"), store_models = FALSE) 106 | 107 | expect_number(rr$aggregate(msr("oob_error"))) 108 | }) 109 | -------------------------------------------------------------------------------- /R/LearnerClassifNnet.R: -------------------------------------------------------------------------------- 1 | #' @title Classification Neural Network Learner 2 | #' 3 | #' @name mlr_learners_classif.nnet 4 | #' 5 | #' @description 6 | #' Single Layer Neural Network. 7 | #' Calls [nnet::nnet.formula()] from package \CRANpkg{nnet}. 8 | #' 9 | #' Note that modern neural networks with multiple layers are connected 10 | #' via package [mlr3torch](https://github.com/mlr-org/mlr3torch). 11 | #' 12 | #' @templateVar id classif.nnet 13 | #' @template learner 14 | #' 15 | #' @section Initial parameter values: 16 | #' - `size`: 17 | #' - Adjusted default: 3L. 18 | #' - Reason for change: no default in `nnet()`. 19 | #' 20 | #' @section Custom mlr3 parameters: 21 | #' - `formula`: if not provided, the formula is set to `task$formula()`. 22 | #' 23 | #' @references 24 | #' `r format_bib("ripley_1996")` 25 | #' 26 | #' @export 27 | #' @template seealso_learner 28 | #' @template example 29 | LearnerClassifNnet = R6Class("LearnerClassifNnet", 30 | inherit = LearnerClassif, 31 | public = list( 32 | #' @description 33 | #' Creates a new instance of this [R6][R6::R6Class] class. 34 | initialize = function() { 35 | 36 | ps = ps( 37 | Hess = p_lgl(default = FALSE, tags = "train"), 38 | MaxNWts = p_int(1L, default = 1000L, tags = "train"), 39 | Wts = p_uty(tags = "train"), 40 | abstol = p_dbl(default = 1.0e-4, tags = "train"), 41 | censored = p_lgl(default = FALSE, tags = "train"), 42 | contrasts = p_uty(default = NULL, tags = "train"), 43 | decay = p_dbl(default = 0, tags = "train"), 44 | mask = p_uty(tags = "train"), 45 | maxit = p_int(1L, default = 100L, tags = "train"), 46 | na.action = p_uty(tags = "train"), 47 | rang = p_dbl(default = 0.7, tags = "train"), 48 | reltol = p_dbl(default = 1.0e-8, tags = "train"), 49 | size = p_int(0L, default = 3L, tags = "train"), 50 | skip = p_lgl(default = FALSE, tags = "train"), 51 | subset = p_uty(tags = "train"), 52 | trace = p_lgl(default = TRUE, tags = "train"), 53 | formula = p_uty(tags = "train") 54 | ) 55 | ps$set_values(size = 3L) 56 | 57 | super$initialize( 58 | id = "classif.nnet", 59 | packages = c("mlr3learners", "nnet"), 60 | feature_types = c("logical", "numeric", "factor", "ordered", "integer"), 61 | predict_types = c("prob", "response"), 62 | param_set = ps, 63 | properties = c("twoclass", "multiclass", "weights"), 64 | label = "Single Layer Neural Network", 65 | man = "mlr3learners::mlr_learners_classif.nnet" 66 | ) 67 | } 68 | ), 69 | 70 | private = list( 71 | .train = function(task) { 72 | pv = self$param_set$get_values(tags = "train") 73 | pv$weights = get_weights(task, private) 74 | 75 | if (is.null(pv$formula)) { 76 | pv$formula = task$formula() 77 | } 78 | data = task$data() 79 | invoke(nnet::nnet.formula, data = data, .args = pv) 80 | }, 81 | 82 | .predict = function(task) { 83 | pv = self$param_set$get_values(tags = "predict") 84 | newdata = ordered_features(task, self) 85 | 86 | if (self$predict_type == "response") { 87 | response = invoke(predict, self$model, newdata = newdata, type = "class", .args = pv) 88 | return(list(response = response)) 89 | } else { 90 | prob = invoke(predict, self$model, newdata = newdata, type = "raw", .args = pv) 91 | 92 | lvls = self$model$lev 93 | if (length(lvls) == 2L) { 94 | prob = pvec2mat(prob[, 1L], lvls) 95 | } 96 | return(list(prob = prob)) 97 | } 98 | } 99 | ) 100 | ) 101 | 102 | #' @include aaa.R 103 | learners[["classif.nnet"]] = LearnerClassifNnet 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig 2 | # Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,r,macos,linux 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### macOS ### 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | 30 | # Thumbnails 31 | ._* 32 | 33 | # Files that might appear in the root of a volume 34 | .DocumentRevisions-V100 35 | .fseventsd 36 | .Spotlight-V100 37 | .TemporaryItems 38 | .Trashes 39 | .VolumeIcon.icns 40 | .com.apple.timemachine.donotpresent 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | ### macOS Patch ### 50 | # iCloud generated files 51 | *.icloud 52 | 53 | ### R ### 54 | # History files 55 | .Rhistory 56 | .Rapp.history 57 | 58 | # Session Data files 59 | .RData 60 | .RDataTmp 61 | 62 | # User-specific files 63 | .Ruserdata 64 | 65 | # Example code in package build process 66 | *-Ex.R 67 | 68 | # Output files from R CMD build 69 | /*.tar.gz 70 | 71 | # Output files from R CMD check 72 | /*.Rcheck/ 73 | 74 | # RStudio files 75 | .Rproj.user/ 76 | 77 | # produced vignettes 78 | vignettes/*.html 79 | vignettes/*.pdf 80 | 81 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 82 | .httr-oauth 83 | 84 | # knitr and R markdown default cache directories 85 | *_cache/ 86 | /cache/ 87 | 88 | # Temporary files created by R markdown 89 | *.utf8.md 90 | *.knit.md 91 | 92 | # R Environment Variables 93 | .Renviron 94 | 95 | # pkgdown site 96 | docs/ 97 | 98 | # translation temp files 99 | po/*~ 100 | 101 | # RStudio Connect folder 102 | rsconnect/ 103 | 104 | ### R.Bookdown Stack ### 105 | # R package: bookdown caching files 106 | /*_files/ 107 | 108 | ### VisualStudioCode ### 109 | .vscode/* 110 | !.vscode/settings.json 111 | !.vscode/tasks.json 112 | !.vscode/launch.json 113 | !.vscode/extensions.json 114 | !.vscode/*.code-snippets 115 | 116 | # Local History for Visual Studio Code 117 | .history/ 118 | 119 | # Built Visual Studio Code Extensions 120 | *.vsix 121 | 122 | ### VisualStudioCode Patch ### 123 | # Ignore all local history of files 124 | .history 125 | .ionide 126 | 127 | ### Windows ### 128 | # Windows thumbnail cache files 129 | Thumbs.db 130 | Thumbs.db:encryptable 131 | ehthumbs.db 132 | ehthumbs_vista.db 133 | 134 | # Dump file 135 | *.stackdump 136 | 137 | # Folder config file 138 | [Dd]esktop.ini 139 | 140 | # Recycle Bin used on file shares 141 | $RECYCLE.BIN/ 142 | 143 | # Windows Installer files 144 | *.cab 145 | *.msi 146 | *.msix 147 | *.msm 148 | *.msp 149 | 150 | # Windows shortcuts 151 | *.lnk 152 | 153 | # End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux 154 | 155 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) 156 | 157 | # R 158 | .Rprofile 159 | README.html 160 | src/*.o 161 | src/*.so 162 | src/*.dll 163 | 164 | # CRAN 165 | cran-comments.md 166 | CRAN-RELEASE 167 | CRAN-SUBMISSION 168 | 169 | # pkgdown 170 | docs/ 171 | 172 | # renv 173 | renv/ 174 | renv.lock 175 | 176 | # vscode 177 | .vscode 178 | 179 | # revdep 180 | revdep/ 181 | 182 | # misc 183 | Meta/ 184 | .clangd 185 | -------------------------------------------------------------------------------- /R/LearnerClassifMultinom.R: -------------------------------------------------------------------------------- 1 | #' @title Multinomial log-linear learner via neural networks 2 | #' 3 | #' @name mlr_learners_classif.multinom 4 | #' 5 | #' @description 6 | #' Multinomial log-linear models via neural networks. 7 | #' Calls [nnet::multinom()] from package \CRANpkg{nnet}. 8 | #' 9 | #' @templateVar id classif.multinom 10 | #' @template learner 11 | #' 12 | #' @export 13 | #' @template seealso_learner 14 | #' @template example 15 | LearnerClassifMultinom = R6Class("LearnerClassifMultinom", 16 | inherit = LearnerClassif, 17 | 18 | public = list( 19 | 20 | #' @description 21 | #' Creates a new instance of this [R6][R6::R6Class] class. 22 | initialize = function() { 23 | ps = ps( 24 | Hess = p_lgl(default = FALSE, tags = "train"), 25 | abstol = p_dbl(default = 1.0e-4, tags = "train"), 26 | censored = p_lgl(default = FALSE, tags = "train"), 27 | decay = p_dbl(default = 0, tags = "train"), 28 | entropy = p_lgl(default = FALSE, tags = "train"), 29 | mask = p_uty(tags = "train"), 30 | maxit = p_int(1L, default = 100L, tags = "train"), 31 | MaxNWts = p_int(1L, default = 1000L, tags = "train"), 32 | model = p_lgl(default = FALSE, tags = "train"), 33 | linout = p_lgl(default = FALSE, tags = "train"), 34 | rang = p_dbl(default = 0.7, tags = "train"), 35 | reltol = p_dbl(default = 1.0e-8, tags = "train"), 36 | size = p_int(1L, tags = "train"), 37 | skip = p_lgl(default = FALSE, tags = "train"), 38 | softmax = p_lgl(default = FALSE, tags = "train"), 39 | summ = p_fct(c("0", "1", "2", "3"), default = "0", tags = "train"), 40 | trace = p_lgl(default = TRUE, tags = "train"), 41 | Wts = p_uty(tags = "train") 42 | ) 43 | 44 | super$initialize( 45 | id = "classif.multinom", 46 | param_set = ps, 47 | predict_types = c("response", "prob"), 48 | feature_types = c("logical", "integer", "numeric", "factor"), 49 | properties = c("weights", "twoclass", "multiclass"), 50 | packages = c("mlr3learners", "nnet"), 51 | label = "Multinomial Log-Linear Model", 52 | man = "mlr3learners::mlr_learners_classif.multinom" 53 | ) 54 | } 55 | ), 56 | 57 | private = list( 58 | .train = function(task) { 59 | pv = self$param_set$get_values(tags = "train") 60 | pv$weights = get_weights(task, private) 61 | 62 | if (!is.null(pv$summ)) { 63 | pv$summ = as.integer(pv$summ) 64 | } 65 | 66 | # nnet does not handle formulas without env, we need to create it 67 | # here to work with `summary()`. 68 | pv$formula = reformulate(".", response = task$target_names) 69 | 70 | invoke(nnet::multinom, data = task$data(), .args = pv) 71 | }, 72 | 73 | .predict = function(task) { 74 | pv = self$param_set$get_values(tags = "predict") 75 | newdata = ordered_features(task, self) 76 | 77 | if (self$predict_type == "response") { 78 | response = invoke(predict, self$model, newdata = newdata, type = "class", .args = pv) 79 | list(response = drop(response)) 80 | } else { 81 | lvls = self$model$lev 82 | prob = unname(invoke(predict, self$model, newdata = newdata, type = "probs", .args = pv)) 83 | 84 | # fix dimensions being dropped for n == 1 (https://github.com/mlr-org/mlr3/issues/883) 85 | if (task$nrow == 1L) { 86 | prob = matrix(prob, nrow = 1L) 87 | } 88 | 89 | if (length(lvls) == 2L) { 90 | prob = pvec2mat(prob, lvls) 91 | } else { 92 | colnames(prob) = lvls 93 | } 94 | 95 | list(prob = prob) 96 | } 97 | } 98 | ) 99 | ) 100 | 101 | #' @include aaa.R 102 | learners[["classif.multinom"]] = LearnerClassifMultinom 103 | -------------------------------------------------------------------------------- /R/LearnerClassifSVM.R: -------------------------------------------------------------------------------- 1 | #' @title Support Vector Machine 2 | #' 3 | #' @name mlr_learners_classif.svm 4 | #' 5 | #' @description 6 | #' Support vector machine for classification. 7 | #' Calls [e1071::svm()] from package \CRANpkg{e1071}. 8 | #' 9 | #' @templateVar id classif.svm 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("cortes_1995")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClassifSVM = R6Class("LearnerClassifSVM", 19 | inherit = LearnerClassif, 20 | 21 | public = list( 22 | 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | ps = ps( 27 | cachesize = p_dbl(default = 40L, tags = "train"), 28 | class.weights = p_uty(default = NULL, tags = "train"), 29 | coef0 = p_dbl(default = 0, tags = "train", depends = quote(kernel %in% c("polynomial", "sigmoid"))), 30 | cost = p_dbl(0, default = 1, tags = "train", depends = quote(type == "C-classification")), 31 | cross = p_int(0L, default = 0L, tags = "train"), 32 | decision.values = p_lgl(default = FALSE, tags = "predict"), 33 | degree = p_int(1L, default = 3L, tags = "train", depends = quote(kernel == "polynomial")), 34 | epsilon = p_dbl(0, default = 0.1, tags = "train"), 35 | fitted = p_lgl(default = TRUE, tags = "train"), 36 | gamma = p_dbl(0, tags = "train", depends = quote(kernel %in% c("polynomial", "radial", "sigmoid"))), 37 | kernel = p_fct(c("linear", "polynomial", "radial", "sigmoid"), default = "radial", tags = "train"), 38 | nu = p_dbl(default = 0.5, tags = "train", depends = quote(type == "nu-classification")), 39 | scale = p_uty(default = TRUE, tags = "train"), 40 | shrinking = p_lgl(default = TRUE, tags = "train"), 41 | tolerance = p_dbl(0, default = 0.001, tags = "train"), 42 | type = p_fct(c("C-classification", "nu-classification"), default = "C-classification", tags = "train") 43 | ) 44 | 45 | super$initialize( 46 | id = "classif.svm", 47 | param_set = ps, 48 | predict_types = c("response", "prob"), 49 | feature_types = c("logical", "integer", "numeric"), 50 | properties = c("twoclass", "multiclass"), 51 | packages = c("mlr3learners", "e1071"), 52 | label = "Support Vector Machine", 53 | man = "mlr3learners::mlr_learners_classif.svm" 54 | ) 55 | } 56 | ), 57 | 58 | private = list( 59 | .train = function(task) { 60 | pv = self$param_set$get_values(tags = "train") 61 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 62 | 63 | invoke(e1071::svm, 64 | x = data, 65 | y = task$truth(), 66 | probability = (self$predict_type == "prob"), 67 | .args = pv 68 | ) 69 | }, 70 | 71 | .predict = function(task) { 72 | pv = self$param_set$get_values(tags = "predict") 73 | newdata = as_numeric_matrix(ordered_features(task, self)) 74 | p = invoke(predict, self$model, 75 | newdata = newdata, 76 | probability = (self$predict_type == "prob"), .args = pv) 77 | 78 | list( 79 | response = as.character(p), 80 | prob = attr(p, "probabilities") # is NULL if not requested during predict 81 | ) 82 | } 83 | ) 84 | ) 85 | 86 | #' @export 87 | default_values.LearnerClassifSVM = function(x, search_space, task, ...) { # nolint 88 | special_defaults = list( 89 | gamma = 1 / length(task$feature_names) 90 | ) 91 | defaults = insert_named(default_values(x$param_set), special_defaults) 92 | # defaults[["degree"]] = NULL 93 | defaults = defaults[search_space$ids()] 94 | 95 | # fix dependencies 96 | if (!is.null(defaults[["degree"]])) defaults[["degree"]] = NA_real_ 97 | if (!is.null(defaults[["coef0"]])) defaults[["coef0"]] = NA_real_ 98 | 99 | defaults 100 | } 101 | 102 | #' @include aaa.R 103 | learners[["classif.svm"]] = LearnerClassifSVM 104 | -------------------------------------------------------------------------------- /R/LearnerRegrLM.R: -------------------------------------------------------------------------------- 1 | #' @title Linear Model Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.lm 4 | #' 5 | #' @description 6 | #' Ordinary linear regression. 7 | #' Calls [stats::lm()]. 8 | #' 9 | #' @section Offset: 10 | #' If a `Task` has a column with the role `offset`, it will automatically be used during training. 11 | #' The offset is incorporated through the formula interface to ensure compatibility with [stats::lm()]. 12 | #' We add it to the model formula as `offset()` and also include it in the training data. 13 | #' During prediction, the default behavior is to use the offset column from the test set (enabled by `use_pred_offset = TRUE`). 14 | #' Otherwise, if the user sets `use_pred_offset = FALSE`, a zero offset is applied, effectively disabling the offset adjustment during prediction. 15 | #' 16 | #' @templateVar id regr.lm 17 | #' @template learner 18 | #' 19 | #' @template section_contrasts 20 | #' 21 | #' @export 22 | #' @template seealso_learner 23 | #' @template example 24 | LearnerRegrLM = R6Class("LearnerRegrLM", 25 | inherit = LearnerRegr, 26 | 27 | public = list( 28 | 29 | #' @description 30 | #' Creates a new instance of this [R6][R6::R6Class] class. 31 | initialize = function() { 32 | ps = ps( 33 | df = p_dbl(default = Inf, tags = "predict"), 34 | interval = p_fct(c("none", "confidence", "prediction"), tags = "predict"), 35 | level = p_dbl(default = 0.95, tags = "predict"), 36 | model = p_lgl(default = TRUE, tags = "train"), 37 | pred.var = p_uty(tags = "predict"), 38 | qr = p_lgl(default = TRUE, tags = "train"), 39 | scale = p_dbl(default = NULL, special_vals = list(NULL), tags = "predict"), 40 | singular.ok = p_lgl(default = TRUE, tags = "train"), 41 | x = p_lgl(default = FALSE, tags = "train"), 42 | y = p_lgl(default = FALSE, tags = "train"), 43 | rankdeficient = p_fct(c("warnif", "simple", "non-estim", "NA", "NAwarn"), tags = "predict"), 44 | tol = p_dbl(default = 1e-07, tags = "predict"), 45 | verbose = p_lgl(default = FALSE, tags = "predict"), 46 | use_pred_offset = p_lgl(default = TRUE, tags = "predict") 47 | ) 48 | 49 | ps$set_values(use_pred_offset = TRUE) 50 | 51 | super$initialize( 52 | id = "regr.lm", 53 | param_set = ps, 54 | predict_types = c("response", "se"), 55 | feature_types = c("logical", "integer", "numeric", "factor", "character"), 56 | properties = c("weights", "offset"), 57 | packages = c("mlr3learners", "stats"), 58 | label = "Linear Model", 59 | man = "mlr3learners::mlr_learners_regr.lm" 60 | ) 61 | } 62 | ), 63 | 64 | private = list( 65 | .train = function(task) { 66 | pv = self$param_set$get_values(tags = "train") 67 | pv$weights = get_weights(task, private) 68 | 69 | form = task$formula() 70 | data = task$data() 71 | 72 | if ("offset" %in% task$properties) { 73 | # we use the formula interface as `offset` = ... doesn't work during prediction 74 | offset_colname = task$col_roles$offset 75 | # re-write formula 76 | formula_terms = c(task$feature_names, paste0("offset(", offset_colname, ")")) 77 | # needs both `env = ...` and `quote = "left"` args to work 78 | form = mlr3misc::formulate(lhs = task$target_names, rhs = formula_terms, 79 | env = environment(), quote = "left") 80 | # add offset column to the data 81 | data = data[, (offset_colname) := task$offset$offset][] 82 | } 83 | 84 | invoke(stats::lm, 85 | formula = form, data = data, 86 | .args = pv, .opts = opts_default_contrasts) 87 | }, 88 | 89 | .predict = function(task) { 90 | pv = self$param_set$get_values(tags = "predict") 91 | newdata = ordered_features(task, self) 92 | se_fit = self$predict_type == "se" 93 | 94 | if ("offset" %in% task$properties) { 95 | # add offset to the test data 96 | offset_colname = task$col_roles$offset 97 | newdata[, (offset_colname) := if (isTRUE(pv$use_pred_offset)) task$offset$offset else 0] 98 | } 99 | 100 | prediction = invoke(predict, object = self$model, newdata = newdata, se.fit = se_fit, .args = pv) 101 | 102 | # need to remove NAs for this crazy replication that using offset in lm does 103 | if ("offset" %in% task$properties) { 104 | prediction = prediction[!is.na(prediction)] 105 | } 106 | 107 | if (se_fit) { 108 | list(response = unname(prediction$fit), se = unname(prediction$se.fit)) 109 | } else { 110 | list(response = unname(prediction)) 111 | } 112 | } 113 | ) 114 | ) 115 | 116 | #' @include aaa.R 117 | learners[["regr.lm"]] = LearnerRegrLM 118 | -------------------------------------------------------------------------------- /R/LearnerClassifLogReg.R: -------------------------------------------------------------------------------- 1 | #' @title Logistic Regression Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.log_reg 4 | #' 5 | #' @description 6 | #' Classification via logistic regression. 7 | #' Calls [stats::glm()] with `family` set to `"binomial"`. 8 | #' 9 | #' @section Internal Encoding: 10 | #' Starting with \CRANpkg{mlr3} v0.5.0, the order of class labels is reversed prior to 11 | #' model fitting to comply to the [stats::glm()] convention that the negative class is provided 12 | #' as the first factor level. 13 | #' 14 | #' @section Initial parameter values: 15 | #' - `model`: 16 | #' - Actual default: `TRUE`. 17 | #' - Adjusted default: `FALSE`. 18 | #' - Reason for change: Save some memory. 19 | #' 20 | #' @section Offset: 21 | #' If a `Task` has a column with the role `offset`, it will automatically be used during training. 22 | #' The offset is incorporated through the formula interface to ensure compatibility with [stats::glm()]. 23 | #' We add it to the model formula as `offset()` and also include it in the training data. 24 | #' During prediction, the default behavior is to use the offset column from the test set (enabled by `use_pred_offset = TRUE`). 25 | #' Otherwise, if the user sets `use_pred_offset = FALSE`, a zero offset is applied, effectively disabling the offset adjustment during prediction. 26 | #' 27 | #' @templateVar id classif.log_reg 28 | #' @template learner 29 | #' 30 | #' @template section_contrasts 31 | #' 32 | #' @export 33 | #' @template seealso_learner 34 | #' @template example 35 | LearnerClassifLogReg = R6Class("LearnerClassifLogReg", 36 | inherit = LearnerClassif, 37 | 38 | public = list( 39 | 40 | #' @description 41 | #' Creates a new instance of this [R6][R6::R6Class] class. 42 | initialize = function() { 43 | ps = ps( 44 | dispersion = p_uty(default = NULL, tags = "predict"), 45 | epsilon = p_dbl(default = 1e-8, tags = c("train", "control")), 46 | etastart = p_uty(tags = "train"), 47 | maxit = p_dbl(default = 25, tags = c("train", "control")), 48 | model = p_lgl(default = TRUE, tags = "train"), 49 | mustart = p_uty(tags = "train"), 50 | singular.ok = p_lgl(default = TRUE, tags = "train"), 51 | start = p_uty(default = NULL, tags = "train"), 52 | trace = p_lgl(default = FALSE, tags = c("train", "control")), 53 | x = p_lgl(default = FALSE, tags = "train"), 54 | y = p_lgl(default = TRUE, tags = "train"), 55 | use_pred_offset = p_lgl(default = TRUE, tags = "predict") 56 | ) 57 | 58 | ps$set_values(use_pred_offset = TRUE) 59 | 60 | super$initialize( 61 | id = "classif.log_reg", 62 | param_set = ps, 63 | predict_types = c("response", "prob"), 64 | feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), 65 | properties = c("weights", "twoclass", "offset"), 66 | packages = c("mlr3learners", "stats"), 67 | label = "Logistic Regression", 68 | man = "mlr3learners::mlr_learners_classif.log_reg" 69 | ) 70 | } 71 | ), 72 | 73 | private = list( 74 | .train = function(task) { 75 | pv = self$param_set$get_values(tags = "train") 76 | pv$weights = get_weights(task, private) 77 | 78 | form = task$formula() 79 | data = task$data() 80 | 81 | if ("offset" %in% task$properties) { 82 | # we use the formula interface as `offset` = ... doesn't work during prediction 83 | offset_colname = task$col_roles$offset 84 | # re-write formula 85 | formula_terms = c(task$feature_names, paste0("offset(", offset_colname, ")")) 86 | # needs both `env = ...` and `quote = "left"` args to work 87 | form = mlr3misc::formulate(lhs = task$target_names, rhs = formula_terms, env = environment(), quote = "left") 88 | # add offset column to the data 89 | data = data[, (offset_colname) := task$offset$offset][] 90 | } 91 | 92 | # logreg expects the first label to be the negative class, contrary 93 | # to the mlr3 convention that the positive class comes first. 94 | tn = task$target_names 95 | data[[tn]] = swap_levels(data[[tn]]) 96 | 97 | invoke(stats::glm, 98 | formula = form, data = data, 99 | family = "binomial", model = FALSE, .args = pv, .opts = opts_default_contrasts) 100 | }, 101 | 102 | .predict = function(task) { 103 | pv = self$param_set$get_values(tags = "predict") 104 | lvls = c(task$negative, task$positive) 105 | newdata = ordered_features(task, self) 106 | 107 | if ("offset" %in% task$properties) { 108 | # add offset to the test data 109 | offset_colname = task$col_roles$offset 110 | newdata[, (offset_colname) := if (isTRUE(pv$use_pred_offset)) task$offset$offset else 0] 111 | } 112 | 113 | p = unname(invoke(predict, object = self$model, newdata = newdata, type = "response", .args = pv)) 114 | 115 | if (self$predict_type == "response") { 116 | list(response = ifelse(p < 0.5, lvls[1L], lvls[2L])) 117 | } else { 118 | list(prob = pvec2mat(p, lvls)) 119 | } 120 | } 121 | ) 122 | ) 123 | 124 | #' @include aaa.R 125 | learners[["classif.log_reg"]] = LearnerClassifLogReg 126 | -------------------------------------------------------------------------------- /R/LearnerRegrKM.R: -------------------------------------------------------------------------------- 1 | #' @title Kriging Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.km 4 | #' 5 | #' @description 6 | #' Kriging regression. 7 | #' Calls [DiceKriging::km()] from package \CRANpkg{DiceKriging}. 8 | #' 9 | #' * The predict type hyperparameter "type" defaults to "SK" (simple kriging). 10 | #' * The additional hyperparameter `nugget.stability` is used to overwrite the 11 | #' hyperparameter `nugget` with `nugget.stability * var(y)` before training to 12 | #' improve the numerical stability. We recommend a value of `1e-8`. 13 | #' * The additional hyperparameter `jitter` can be set to add 14 | #' `N(0, [jitter])`-distributed noise to the data before prediction to avoid 15 | #' perfect interpolation. We recommend a value of `1e-12`. 16 | #' 17 | #' @templateVar id regr.km 18 | #' @template learner 19 | #' 20 | #' @references 21 | #' `r format_bib("roustant_2012")` 22 | #' 23 | #' @export 24 | #' @template seealso_learner 25 | #' @template example 26 | LearnerRegrKM = R6Class("LearnerRegrKM", 27 | inherit = LearnerRegr, 28 | 29 | public = list( 30 | 31 | #' @description 32 | #' Creates a new instance of this [R6][R6::R6Class] class. 33 | initialize = function() { 34 | ps = ps( 35 | bias.correct = p_lgl(default = FALSE, tags = "predict"), 36 | checkNames = p_lgl(default = TRUE, tags = "predict"), 37 | coef.cov = p_uty(default = NULL, tags = "train"), 38 | coef.trend = p_uty(default = NULL, tags = "train"), 39 | coef.var = p_uty(default = NULL, tags = "train"), 40 | control = p_uty(default = NULL, tags = "train"), 41 | cov.compute = p_lgl(default = TRUE, tags = "predict"), 42 | covtype = p_fct(c("gauss", "matern5_2", "matern3_2", "exp", "powexp"), default = "matern5_2", tags = "train"), 43 | estim.method = p_fct(c("MLE", "LOO"), default = "MLE", tags = "train"), 44 | gr = p_lgl(default = TRUE, tags = "train"), 45 | iso = p_lgl(default = FALSE, tags = "train"), 46 | jitter = p_dbl(0, default = 0, tags = "predict"), 47 | kernel = p_uty(default = NULL, tags = "train"), 48 | knots = p_uty(default = NULL, tags = "train", depends = quote(scaling == TRUE)), 49 | light.return = p_lgl(default = FALSE, tags = "predict"), 50 | lower = p_uty(default = NULL, tags = "train"), 51 | multistart = p_int(default = 1, tags = "train", depends = quote(optim.method == "BFGS")), 52 | noise.var = p_uty(default = NULL, tags = "train"), 53 | nugget = p_dbl(tags = "train"), 54 | nugget.estim = p_lgl(default = FALSE, tags = "train"), 55 | nugget.stability = p_dbl(0, default = 0, tags = "train"), 56 | optim.method = p_fct(c("BFGS", "gen"), default = "BFGS", tags = "train"), 57 | parinit = p_uty(default = NULL, tags = "train"), 58 | penalty = p_uty(default = NULL, tags = "train"), 59 | scaling = p_lgl(default = FALSE, tags = "train"), 60 | se.compute = p_lgl(default = TRUE, tags = "predict"), 61 | type = p_fct(c("SK", "UK"), default = "SK", tags = "predict"), 62 | upper = p_uty(default = NULL, tags = "train") 63 | ) 64 | 65 | super$initialize( 66 | id = "regr.km", 67 | param_set = ps, 68 | predict_types = c("response", "se"), 69 | feature_types = c("logical", "integer", "numeric"), 70 | packages = c("mlr3learners", "DiceKriging"), 71 | label = "Kriging", 72 | man = "mlr3learners::mlr_learners_regr.km" 73 | ) 74 | } 75 | ), 76 | 77 | private = list( 78 | .train = function(task) { 79 | 80 | pv = self$param_set$get_values(tags = "train") 81 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 82 | truth = task$truth() 83 | 84 | if (!is.null(pv$optim.method) && pv$optim.method == "gen" && !requireNamespace("rgenoud", quietly = TRUE)) { 85 | stopf("The 'rgenoud' package is required for optimization method 'gen'.") 86 | } 87 | 88 | ns = pv$nugget.stability 89 | if (!is.null(ns)) { 90 | pv$nugget = if (ns == 0) 0 else ns * stats::var(truth) 91 | } 92 | 93 | invoke(DiceKriging::km, 94 | response = truth, 95 | design = data, 96 | control = pv$control, 97 | .args = remove_named(pv, c("control", "nugget.stability")) 98 | ) 99 | }, 100 | 101 | .predict = function(task) { 102 | pv = self$param_set$get_values(tags = "predict") 103 | newdata = as_numeric_matrix(ordered_features(task, self)) 104 | 105 | jitter = pv$jitter 106 | if (!is.null(jitter) && jitter > 0) { 107 | newdata = newdata + stats::rnorm(length(newdata), mean = 0, sd = jitter) 108 | } 109 | 110 | # this is required to allow utf8 names 111 | # alternatively, we could set checkNames = FALSE 112 | colnames(newdata) = make.names(colnames(newdata), unique = TRUE) 113 | 114 | p = invoke(DiceKriging::predict.km, 115 | self$model, 116 | newdata = newdata, 117 | type = if (is.null(pv$type)) "SK" else pv$type, 118 | se.compute = self$predict_type == "se", 119 | .args = remove_named(pv, "jitter"), 120 | .opts = list(warnPartialMatchArgs = FALSE) 121 | ) 122 | 123 | list(response = p$mean, se = p$sd) 124 | } 125 | ) 126 | ) 127 | 128 | #' @include aaa.R 129 | learners[["regr.km"]] = LearnerRegrKM 130 | -------------------------------------------------------------------------------- /R/bibentries.R: -------------------------------------------------------------------------------- 1 | #' @importFrom utils bibentry 2 | bibentries = c( # nolint start 3 | breiman_2001 = bibentry("article", 4 | title = "Random Forests", 5 | author = "Breiman, Leo", 6 | year = "2001", 7 | journal = "Machine Learning", 8 | volume = "45", 9 | number = "1", 10 | pages = "5--32", 11 | doi = "10.1023/A:1010933404324", 12 | issn = "1573-0565" 13 | ), 14 | chen_2016 = bibentry("inproceedings", 15 | title = "Xgboost: A scalable tree boosting system", 16 | author = "Chen, Tianqi and Guestrin, Carlos", 17 | year = "2016", 18 | booktitle = "Proceedings of the 22nd ACM SIGKDD Conference on Knowledge Discovery and Data Mining", 19 | pages = "785--794", 20 | doi = "10.1145/2939672.2939785", 21 | organization = "ACM" 22 | ), 23 | cortes_1995 = bibentry("article", 24 | title = "Support-vector networks", 25 | author = "Cortes, Corinna and Vapnik, Vladimir", 26 | year = "1995", 27 | month = "sep", 28 | day = "1", 29 | journal = "Machine Learning", 30 | volume = "20", 31 | number = "3", 32 | pages = "273--297", 33 | doi = "10.1007/BF00994018" 34 | ), 35 | cover_1967 = bibentry("article", 36 | title = "Nearest neighbor pattern classification", 37 | author = "Cover, Thomas and Hart, Peter", 38 | year = "1967", 39 | journal = "IEEE transactions on information theory", 40 | publisher = "IEEE", 41 | volume = "13", 42 | number = "1", 43 | pages = "21--27", 44 | doi = "10.1109/TIT.1967.1053964" 45 | ), 46 | friedman_2010 = bibentry("article", 47 | title = "Regularization Paths for Generalized Linear Models via Coordinate Descent", 48 | author = "Jerome Friedman and Trevor Hastie and Robert Tibshirani", 49 | year = "2010", 50 | journal = "Journal of Statistical Software", 51 | volume = "33", 52 | number = "1", 53 | pages = "1--22", 54 | doi = "10.18637/jss.v033.i01" 55 | ), 56 | hechenbichler_2004 = bibentry("techreport", 57 | title = "Weighted k-nearest-neighbor techniques and ordinal classification", 58 | author = "Hechenbichler, Klaus and Schliep, Klaus", 59 | year = "2004", 60 | number = "Discussion Paper 399, SFB 386", 61 | doi = "10.5282/ubm/epub.1769", 62 | institution = "Ludwig-Maximilians University Munich" 63 | ), 64 | ripley_1996 = bibentry("book", 65 | doi = "10.1017/cbo9780511812651", 66 | year = "1996", 67 | month = "jan", 68 | publisher = "Cambridge University Press", 69 | author = "Brian D. Ripley", 70 | title = "Pattern Recognition and Neural Networks" 71 | ), 72 | roustant_2012 = bibentry("article", 73 | title = "{DiceKriging}, {DiceOptim}: Two {R} Packages for the Analysis of Computer Experiments by Kriging-Based Metamodeling and Optimization", 74 | author = "Olivier Roustant and David Ginsbourger and Yves Deville", 75 | year = "2012", 76 | journal = "Journal of Statistical Software", 77 | volume = "51", 78 | number = "1", 79 | pages = "1--55", 80 | doi = "10.18637/jss.v051.i01" 81 | ), 82 | samworth_2012 = bibentry("article", 83 | title = "Optimal weighted nearest neighbour classifiers", 84 | author = "Samworth, Richard J", 85 | year = "2012", 86 | journal = "The Annals of Statistics", 87 | volume = "40", 88 | number = "5", 89 | pages = "2733--2763", 90 | doi = "10.1214/12-AOS1049" 91 | ), 92 | venables_2002 = bibentry("book", 93 | title = "Modern Applied Statistics with S", 94 | author = "W. N. Venables and B. D. Ripley", 95 | year = "2002", 96 | publisher = "Springer", 97 | address = "New York", 98 | url = "http://www.stats.ox.ac.uk/pub/MASS4/", 99 | note = "ISBN 0-387-95457-0", 100 | edition = "Fourth" 101 | ), 102 | wright_2017 = bibentry("article", 103 | title = "{ranger}: A Fast Implementation of Random Forests for High Dimensional Data in {C++} and {R}", 104 | author = "Wright, Marvin N. and Ziegler, Andreas", 105 | year = "2017", 106 | journal = "Journal of Statistical Software", 107 | volume = "77", 108 | number = "1", 109 | pages = "1--17", 110 | doi = "10.18637/jss.v077.i01" 111 | ), 112 | hutter_2015 = bibentry("inproceedings", 113 | title = "Algorithm runtime prediction: methods and evaluation", 114 | author = "Hutter, Frank and Xu, Lin and Hoos, Holger H. and Leyton-Brown, Kevin", 115 | year = "2015", 116 | publisher = "AAAI Press", 117 | booktitle = "Proceedings of the 24th International Conference on Artificial Intelligence", 118 | pages = "4197--4201", 119 | series = "IJCAI'15" 120 | #doi = "10.5555/2832747.2832840" 121 | ) 122 | 123 | ) # nolint end 124 | -------------------------------------------------------------------------------- /R/LearnerRegrGlmnet.R: -------------------------------------------------------------------------------- 1 | #' @title GLM with Elastic Net Regularization Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.glmnet 4 | #' 5 | #' @description 6 | #' Generalized linear models with elastic net regularization. 7 | #' Calls [glmnet::glmnet()] from package \CRANpkg{glmnet}. 8 | #' 9 | #' The default for hyperparameter `family` is set to `"gaussian"`. 10 | #' 11 | #' @inherit mlr_learners_classif.glmnet details 12 | #' @inheritSection mlr_learners_classif.cv_glmnet Offset 13 | #' 14 | #' @templateVar id regr.glmnet 15 | #' @template learner 16 | #' 17 | #' @references 18 | #' `r format_bib("friedman_2010")` 19 | #' 20 | #' @export 21 | #' @template seealso_learner 22 | #' @template example 23 | LearnerRegrGlmnet = R6Class("LearnerRegrGlmnet", 24 | inherit = LearnerRegr, 25 | 26 | public = list( 27 | 28 | #' @description 29 | #' Creates a new instance of this [R6][R6::R6Class] class. 30 | initialize = function() { 31 | ps = ps( 32 | alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), 33 | alpha = p_dbl(0, 1, default = 1, tags = "train"), 34 | big = p_dbl(default = 9.9e35, tags = "train"), 35 | devmax = p_dbl(0, 1, default = 0.999, tags = "train"), 36 | dfmax = p_int(0L, tags = "train"), 37 | eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), 38 | epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), 39 | exact = p_lgl(default = FALSE, tags = "predict"), 40 | exclude = p_int(1L, tags = "train"), 41 | exmx = p_dbl(default = 250.0, tags = "train"), 42 | family = p_fct(c("gaussian", "poisson"), default = "gaussian", tags = "train"), 43 | fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), 44 | gamma = p_dbl(default = 1, tags = "train", depends = quote(relax == TRUE)), 45 | grouped = p_lgl(default = TRUE, tags = "train"), 46 | intercept = p_lgl(default = TRUE, tags = "train"), 47 | keep = p_lgl(default = FALSE, tags = "train"), 48 | lambda = p_uty(tags = "train"), 49 | lambda.min.ratio = p_dbl(0, 1, tags = "train"), 50 | lower.limits = p_uty(tags = "train"), 51 | maxit = p_int(1L, default = 100000L, tags = "train"), 52 | mnlam = p_int(1L, default = 5L, tags = "train"), 53 | mxit = p_int(1L, default = 100L, tags = "train"), 54 | mxitnr = p_int(1L, default = 25L, tags = "train"), 55 | use_pred_offset = p_lgl(default = TRUE, tags = "predict"), 56 | nlambda = p_int(1L, default = 100L, tags = "train"), 57 | parallel = p_lgl(default = FALSE, tags = "train"), 58 | penalty.factor = p_uty(tags = "train"), 59 | pmax = p_int(0L, tags = "train"), 60 | pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), 61 | prec = p_dbl(default = 1e-10, tags = "train"), 62 | relax = p_lgl(default = FALSE, tags = "train"), 63 | s = p_dbl(0, default = 0.01, tags = "predict"), 64 | standardize = p_lgl(default = TRUE, tags = "train"), 65 | standardize.response = p_lgl(default = FALSE, tags = "train"), 66 | thresh = p_dbl(0, default = 1e-07, tags = "train"), 67 | trace.it = p_int(0, 1, default = 0, tags = "train"), 68 | type.gaussian = p_fct(c("covariance", "naive"), tags = "train", depends = quote(family == "gaussian")), 69 | type.logistic = p_fct(c("Newton", "modified.Newton"), tags = "train"), 70 | type.multinomial = p_fct(c("ungrouped", "grouped"), tags = "train"), 71 | upper.limits = p_uty(tags = "train") 72 | ) 73 | 74 | ps$set_values(family = "gaussian", use_pred_offset = TRUE) 75 | 76 | super$initialize( 77 | id = "regr.glmnet", 78 | param_set = ps, 79 | feature_types = c("logical", "integer", "numeric"), 80 | properties = c("weights", "offset"), 81 | packages = c("mlr3learners", "glmnet"), 82 | label = "GLM with Elastic Net Regularization", 83 | man = "mlr3learners::mlr_learners_regr.glmnet" 84 | ) 85 | }, 86 | 87 | #' @description 88 | #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] 89 | #' with `type` set to `"nonzero"`. 90 | #' 91 | #' @param lambda (`numeric(1)`)\cr 92 | #' Custom `lambda`, defaults to the active lambda depending on parameter set. 93 | #' 94 | #' @return (`character()`) of feature names. 95 | selected_features = function(lambda = NULL) { 96 | glmnet_selected_features(self, lambda) 97 | } 98 | ), 99 | 100 | private = list( 101 | .train = function(task) { 102 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 103 | target = as_numeric_matrix(task$data(cols = task$target_names)) 104 | pv = self$param_set$get_values(tags = "train") 105 | pv$weights = get_weights(task, private) 106 | 107 | pv = glmnet_set_offset(task, "train", pv) 108 | 109 | glmnet_invoke(data, target, pv) 110 | }, 111 | 112 | .predict = function(task) { 113 | newdata = as_numeric_matrix(ordered_features(task, self)) 114 | pv = self$param_set$get_values(tags = "predict") 115 | pv = rename(pv, "predict.gamma", "gamma") 116 | pv$s = glmnet_get_lambda(self, pv) 117 | 118 | pv = glmnet_set_offset(task, "predict", pv) 119 | 120 | response = invoke(predict, self$model, 121 | newx = newdata, 122 | type = "response", .args = pv) 123 | list(response = drop(response)) 124 | } 125 | ) 126 | ) 127 | 128 | #' @include aaa.R 129 | learners[["regr.glmnet"]] = LearnerRegrGlmnet 130 | -------------------------------------------------------------------------------- /R/LearnerRegrCVGlmnet.R: -------------------------------------------------------------------------------- 1 | #' @title GLM with Elastic Net Regularization Regression Learner 2 | #' 3 | #' @name mlr_learners_regr.cv_glmnet 4 | #' 5 | #' @description 6 | #' Generalized linear models with elastic net regularization. 7 | #' Calls [glmnet::cv.glmnet()] from package \CRANpkg{glmnet}. 8 | #' 9 | #' The default for hyperparameter `family` is set to `"gaussian"`. 10 | #' 11 | #' @inheritSection mlr_learners_classif.cv_glmnet Offset 12 | #' 13 | #' @templateVar id regr.cv_glmnet 14 | #' @template learner 15 | #' 16 | #' @references 17 | #' `r format_bib("friedman_2010")` 18 | #' 19 | #' @export 20 | #' @template seealso_learner 21 | #' @template example 22 | LearnerRegrCVGlmnet = R6Class("LearnerRegrCVGlmnet", 23 | inherit = LearnerRegr, 24 | 25 | public = list( 26 | 27 | #' @description 28 | #' Creates a new instance of this [R6][R6::R6Class] class. 29 | initialize = function() { 30 | ps = ps( 31 | alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), 32 | alpha = p_dbl(0, 1, default = 1, tags = "train"), 33 | big = p_dbl(default = 9.9e35, tags = "train"), 34 | devmax = p_dbl(0, 1, default = 0.999, tags = "train"), 35 | dfmax = p_int(0L, tags = "train"), 36 | eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), 37 | epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), 38 | exclude = p_int(1L, tags = "train"), 39 | exmx = p_dbl(default = 250.0, tags = "train"), 40 | family = p_fct(c("gaussian", "poisson"), default = "gaussian", tags = "train"), 41 | fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), 42 | foldid = p_uty(default = NULL, tags = "train"), 43 | gamma = p_uty(tags = "train", depends = quote(relax == TRUE)), 44 | grouped = p_lgl(default = TRUE, tags = "train"), 45 | intercept = p_lgl(default = TRUE, tags = "train"), 46 | keep = p_lgl(default = FALSE, tags = "train"), 47 | lambda = p_uty(tags = "train"), 48 | lambda.min.ratio = p_dbl(0, 1, tags = "train"), 49 | lower.limits = p_uty(tags = "train"), 50 | maxit = p_int(1L, default = 100000L, tags = "train"), 51 | mnlam = p_int(1L, default = 5L, tags = "train"), 52 | mxit = p_int(1L, default = 100L, tags = "train"), 53 | mxitnr = p_int(1L, default = 25L, tags = "train"), 54 | nfolds = p_int(3L, default = 10L, tags = "train"), 55 | nlambda = p_int(1L, default = 100L, tags = "train"), 56 | use_pred_offset = p_lgl(default = TRUE, tags = "predict"), 57 | parallel = p_lgl(default = FALSE, tags = "train"), 58 | penalty.factor = p_uty(tags = "train"), 59 | pmax = p_int(0L, tags = "train"), 60 | pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), 61 | prec = p_dbl(default = 1e-10, tags = "train"), 62 | predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), 63 | relax = p_lgl(default = FALSE, tags = "train"), 64 | s = p_dbl(0, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"), 65 | standardize = p_lgl(default = TRUE, tags = "train"), 66 | standardize.response = p_lgl(default = FALSE, tags = "train"), 67 | thresh = p_dbl(0, default = 1e-07, tags = "train"), 68 | trace.it = p_int(0, 1, default = 0, tags = "train"), 69 | type.gaussian = p_fct(c("covariance", "naive"), tags = "train", depends = quote(family == "gaussian")), 70 | type.logistic = p_fct(c("Newton", "modified.Newton"), tags = "train"), 71 | type.measure = p_fct(c("deviance", "class", "auc", "mse", "mae"), default = "deviance", tags = "train"), 72 | type.multinomial = p_fct(c("ungrouped", "grouped"), tags = "train"), 73 | upper.limits = p_uty(tags = "train") 74 | ) 75 | 76 | ps$set_values(family = "gaussian", use_pred_offset = TRUE) 77 | 78 | super$initialize( 79 | id = "regr.cv_glmnet", 80 | param_set = ps, 81 | feature_types = c("logical", "integer", "numeric"), 82 | properties = c("weights", "selected_features", "offset"), 83 | packages = c("mlr3learners", "glmnet"), 84 | label = "GLM with Elastic Net Regularization", 85 | man = "mlr3learners::mlr_learners_regr.cv_glmnet" 86 | ) 87 | }, 88 | 89 | #' @description 90 | #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] 91 | #' with `type` set to `"nonzero"`. 92 | #' 93 | #' @param lambda (`numeric(1)`)\cr 94 | #' Custom `lambda`, defaults to the active lambda depending on parameter set. 95 | #' 96 | #' @return (`character()`) of feature names. 97 | selected_features = function(lambda = NULL) { 98 | glmnet_selected_features(self, lambda) 99 | } 100 | ), 101 | 102 | private = list( 103 | .train = function(task) { 104 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 105 | target = as_numeric_matrix(task$data(cols = task$target_names)) 106 | pv = self$param_set$get_values(tags = "train") 107 | pv$weights = get_weights(task, private) 108 | 109 | pv = glmnet_set_offset(task, "train", pv) 110 | 111 | glmnet_invoke(data, target, pv, cv = TRUE) 112 | }, 113 | 114 | .predict = function(task) { 115 | newdata = as_numeric_matrix(ordered_features(task, self)) 116 | pv = self$param_set$get_values(tags = "predict") 117 | pv = rename(pv, "predict.gamma", "gamma") 118 | 119 | pv = glmnet_set_offset(task, "predict", pv) 120 | 121 | response = invoke(predict, self$model, newx = newdata, 122 | type = "response", .args = pv) 123 | list(response = drop(response)) 124 | } 125 | ) 126 | ) 127 | 128 | #' @include aaa.R 129 | learners[["regr.cv_glmnet"]] = LearnerRegrCVGlmnet 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mlr3learners 2 | 3 | Package website: [release](https://mlr3learners.mlr-org.com/) | [dev](https://mlr3learners.mlr-org.com/dev/) 4 | 5 | 6 | 7 | [![r-cmd-check](https://github.com/mlr-org/mlr3learners/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3learners/actions/workflows/r-cmd-check.yml) 8 | [![Parameter Check](https://github.com/mlr-org/mlr3learners/workflows/Parameter%20Check/badge.svg?branch=main)](https://github.com/mlr-org/mlr3learners/actions) 9 | [![CRAN Status](https://www.r-pkg.org/badges/version-ago/mlr3learners)](https://cran.r-project.org/package=mlr3learners) 10 | [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) 11 | 12 | 13 | 14 | This packages provides essential learners for [mlr3](https://mlr3.mlr-org.com), maintained by the mlr-org team. 15 | Additional learners can be found in the [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) package on GitHub. 16 | Request additional learners over there. 17 | 18 | :point_right: [Table of all learners](https://mlr-org.com/learners.html) 19 | 20 | ## Installation 21 | 22 | ```r 23 | # CRAN version: 24 | install.packages("mlr3learners") 25 | 26 | # Development version: 27 | pak::pak("mlr-org/mlr3learners") 28 | ``` 29 | 30 | If you also want to install all packages of the connected learners, set `dependencies = TRUE`: 31 | 32 | ```r 33 | # CRAN version: 34 | install.packages("mlr3learners", dependencies = TRUE) 35 | 36 | # Development version: 37 | pak::pak("mlr-org/mlr3learners", dependencies = TRUE) 38 | ``` 39 | 40 | ## Classification Learners 41 | 42 | | ID | Learner | Package | 43 | | :------------------------------------------------------------------------------------------------------ | :---------------------------- | :---------------------------------------------------- | 44 | | [classif.cv_glmnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.cv_glmnet.html) | Penalized Logistic Regression | [glmnet](https://cran.r-project.org/package=glmnet) | 45 | | [classif.glmnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.glmnet.html) | Penalized Logistic Regression | [glmnet](https://cran.r-project.org/package=glmnet) | 46 | | [classif.kknn](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.kknn.html) | k-Nearest Neighbors | [kknn](https://cran.r-project.org/package=kknn) | 47 | | [classif.lda](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.lda.html) | LDA | [MASS](https://cran.r-project.org/package=MASS) | 48 | | [classif.log_reg](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.log_reg.html) | Logistic Regression | stats | 49 | | [classif.multinom](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.multinom.html) | Multinomial log-linear model | [nnet](https://cran.r-project.org/package=nnet) | 50 | | [classif.naive_bayes](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.naive_bayes.html) | Naive Bayes | [e1071](https://cran.r-project.org/package=e1071) | 51 | | [classif.nnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.nnet.html) | Single Layer Neural Network | [nnet](https://cran.r-project.org/package=nnet) | 52 | | [classif.qda](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.qda.html) | QDA | [MASS](https://cran.r-project.org/package=MASS) | 53 | | [classif.ranger](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.ranger.html) | Random Forest | [ranger](https://cran.r-project.org/package=ranger) | 54 | | [classif.svm](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.svm.html) | SVM | [e1071](https://cran.r-project.org/package=e1071) | 55 | | [classif.xgboost](https://mlr3learners.mlr-org.com/reference/mlr_learners_classif.xgboost.html) | Gradient Boosting | [xgboost](https://cran.r-project.org/package=xgboost) | 56 | 57 | ## Regression Learners 58 | 59 | | ID | Learner | Package | 60 | | :-------------------------------------------------------------------------------------------- | :-------------------------- | :------------------------------------------------------------ | 61 | | [regr.cv_glmnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.cv_glmnet.html) | Penalized Linear Regression | [glmnet](https://cran.r-project.org/package=glmnet) | 62 | | [regr.glmnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.glmnet.html) | Penalized Linear Regression | [glmnet](https://cran.r-project.org/package=glmnet) | 63 | | [regr.kknn](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.kknn.html) | k-Nearest Neighbors | [kknn](https://cran.r-project.org/package=kknn) | 64 | | [regr.km](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.km.html) | Kriging | [DiceKriging](https://cran.r-project.org/package=DiceKriging) | 65 | | [regr.lm](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.lm.html) | Linear Regression | stats | 66 | | [regr.nnet](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.nnet.html) | Single Layer Neural Network | nnet | 67 | | [regr.ranger](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.ranger.html) | Random Forest | [ranger](https://cran.r-project.org/package=ranger) | 68 | | [regr.svm](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.svm.html) | SVM | [e1071](https://cran.r-project.org/package=e1071) | 69 | | [regr.xgboost](https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.xgboost.html) | Gradient Boosting | [xgboost](https://cran.r-project.org/package=xgboost) | 70 | -------------------------------------------------------------------------------- /R/LearnerClassifGlmnet.R: -------------------------------------------------------------------------------- 1 | #' @title GLM with Elastic Net Regularization Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.glmnet 4 | #' 5 | #' @description 6 | #' Generalized linear models with elastic net regularization. 7 | #' Calls [glmnet::glmnet()] from package \CRANpkg{glmnet}. 8 | #' 9 | #' @details 10 | #' Caution: This learner is different to learners calling [glmnet::cv.glmnet()] 11 | #' in that it does not use the internal optimization of parameter `lambda`. 12 | #' Instead, `lambda` needs to be tuned by the user (e.g., via \CRANpkg{mlr3tuning}). 13 | #' When `lambda` is tuned, the `glmnet` will be trained for each tuning iteration. 14 | #' While fitting the whole path of `lambda`s would be more efficient, as is done 15 | #' by default in [glmnet::glmnet()], tuning/selecting the parameter at prediction time 16 | #' (using parameter `s`) is currently not supported in \CRANpkg{mlr3} 17 | #' (at least not in efficient manner). 18 | #' Tuning the `s` parameter is, therefore, currently discouraged. 19 | #' 20 | #' When the data are i.i.d. and efficiency is key, we recommend using the respective 21 | #' auto-tuning counterparts in [mlr_learners_classif.cv_glmnet()] or 22 | #' [mlr_learners_regr.cv_glmnet()]. 23 | #' However, in some situations this is not applicable, usually when data are 24 | #' imbalanced or not i.i.d. (longitudinal, time-series) and tuning requires 25 | #' custom resampling strategies (blocked design, stratification). 26 | #' 27 | #' @inheritSection mlr_learners_classif.log_reg Internal Encoding 28 | #' @inheritSection mlr_learners_classif.cv_glmnet Offset 29 | #' 30 | #' @templateVar id classif.glmnet 31 | #' @template learner 32 | #' 33 | #' @references 34 | #' `r format_bib("friedman_2010")` 35 | #' 36 | #' @export 37 | #' @template seealso_learner 38 | #' @template example 39 | LearnerClassifGlmnet = R6Class("LearnerClassifGlmnet", 40 | inherit = LearnerClassif, 41 | 42 | public = list( 43 | 44 | #' @description 45 | #' Creates a new instance of this [R6][R6::R6Class] class. 46 | initialize = function() { 47 | ps = ps( 48 | alpha = p_dbl(0, 1, default = 1, tags = "train"), 49 | big = p_dbl(default = 9.9e35, tags = "train"), 50 | devmax = p_dbl(0, 1, default = 0.999, tags = "train"), 51 | dfmax = p_int(0L, tags = "train"), 52 | eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), 53 | epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), 54 | exact = p_lgl(default = FALSE, tags = "predict"), 55 | exclude = p_int(1L, tags = "train"), 56 | exmx = p_dbl(default = 250.0, tags = "train"), 57 | fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), 58 | gamma = p_dbl(default = 1, tags = "predict", depends = quote(relax == TRUE)), 59 | intercept = p_lgl(default = TRUE, tags = "train"), 60 | lambda = p_uty(tags = "train"), 61 | lambda.min.ratio = p_dbl(0, 1, tags = "train"), 62 | lower.limits = p_uty(tags = "train"), 63 | maxit = p_int(1L, default = 100000L, tags = "train"), 64 | mnlam = p_int(1L, default = 5, tags = "train"), 65 | mxit = p_int(1L, default = 100L, tags = "train"), 66 | mxitnr = p_int(1L, default = 25L, tags = "train"), 67 | nlambda = p_int(1L, default = 100L, tags = "train"), 68 | use_pred_offset = p_lgl(default = TRUE, tags = "predict"), 69 | penalty.factor = p_uty(tags = "train"), 70 | pmax = p_int(0L, tags = "train"), 71 | pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), 72 | prec = p_dbl(default = 1e-10, tags = "train"), 73 | relax = p_lgl(default = FALSE, tags = "train"), 74 | s = p_dbl(0, default = 0.01, tags = "predict"), 75 | standardize = p_lgl(default = TRUE, tags = "train"), 76 | standardize.response = p_lgl(default = FALSE, tags = "train"), 77 | thresh = p_dbl(0, default = 1e-07, tags = "train"), 78 | trace.it = p_int(0, 1, default = 0, tags = "train"), 79 | type.gaussian = p_fct(c("covariance", "naive"), tags = "train"), 80 | type.logistic = p_fct(c("Newton", "modified.Newton"), tags = "train"), 81 | type.multinomial = p_fct(c("ungrouped", "grouped"), tags = "train"), 82 | upper.limits = p_uty(tags = "train") 83 | ) 84 | 85 | ps$set_values(use_pred_offset = TRUE) 86 | 87 | super$initialize( 88 | id = "classif.glmnet", 89 | param_set = ps, 90 | predict_types = c("response", "prob"), 91 | feature_types = c("logical", "integer", "numeric"), 92 | properties = c("weights", "twoclass", "multiclass", "offset"), 93 | packages = c("mlr3learners", "glmnet"), 94 | label = "GLM with Elastic Net Regularization", 95 | man = "mlr3learners::mlr_learners_classif.glmnet" 96 | ) 97 | }, 98 | 99 | #' @description 100 | #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] 101 | #' with `type` set to `"nonzero"`. 102 | #' 103 | #' @param lambda (`numeric(1)`)\cr 104 | #' Custom `lambda`, defaults to the active lambda depending on parameter set. 105 | #' 106 | #' @return (`character()`) of feature names. 107 | selected_features = function(lambda = NULL) { 108 | glmnet_selected_features(self, lambda) 109 | } 110 | ), 111 | 112 | private = list( 113 | .train = function(task) { 114 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 115 | target = swap_levels(task$truth()) 116 | pv = self$param_set$get_values(tags = "train") 117 | pv$family = ifelse(length(task$class_names) == 2L, "binomial", "multinomial") 118 | pv$weights = get_weights(task, private) 119 | pv = glmnet_set_offset(task, "train", pv) 120 | 121 | glmnet_invoke(data, target, pv) 122 | }, 123 | 124 | .predict = function(task) { 125 | newdata = as_numeric_matrix(ordered_features(task, self)) 126 | pv = self$param_set$get_values(tags = "predict") 127 | pv = rename(pv, "predict.gamma", "gamma") 128 | pv$s = glmnet_get_lambda(self, pv) 129 | 130 | pv = glmnet_set_offset(task, "predict", pv) 131 | 132 | if (self$predict_type == "response") { 133 | response = invoke(predict, self$model, 134 | newx = newdata, type = "class", 135 | .args = pv) 136 | list(response = drop(response)) 137 | } else { 138 | prob = invoke(predict, self$model, 139 | newx = newdata, type = "response", 140 | .args = pv) 141 | 142 | if (length(task$class_names) == 2L) { 143 | # the docs are really not clear here; before we tried to reorder the class 144 | # labels alphabetically; this does not seem to be required, we instead rely on 145 | # the (undocumented) class labels as stored in the model 146 | prob = cbind(1 - prob, prob) 147 | colnames(prob) = self$model$classnames 148 | } else { 149 | prob = prob[, , 1L] 150 | } 151 | 152 | list(prob = prob) 153 | } 154 | } 155 | ) 156 | ) 157 | 158 | #' @include aaa.R 159 | learners[["classif.glmnet"]] = LearnerClassifGlmnet 160 | -------------------------------------------------------------------------------- /R/LearnerClassifCVGlmnet.R: -------------------------------------------------------------------------------- 1 | #' @title GLM with Elastic Net Regularization Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.cv_glmnet 4 | #' 5 | #' @description 6 | #' Generalized linear models with elastic net regularization. 7 | #' Calls [glmnet::cv.glmnet()] from package \CRANpkg{glmnet}. 8 | #' 9 | #' The default for hyperparameter `family` is set to `"binomial"` or `"multinomial"`, 10 | #' depending on the number of classes. 11 | #' 12 | #' @inheritSection mlr_learners_classif.log_reg Internal Encoding 13 | #' 14 | #' @section Offset: 15 | #' If a `Task` contains a column with the `offset` role, it is automatically incorporated during training via the `offset` argument in [glmnet::glmnet()]. 16 | #' During prediction, the offset column from the test set is used only if `use_pred_offset = TRUE` (default), passed via the `newoffset` argument in [glmnet::predict.glmnet()]. 17 | #' Otherwise, if the user sets `use_pred_offset = FALSE`, a zero offset is applied, effectively disabling the offset adjustment during prediction. 18 | #' 19 | #' @templateVar id classif.cv_glmnet 20 | #' @template learner 21 | #' 22 | #' @references 23 | #' `r format_bib("friedman_2010")` 24 | #' 25 | #' @export 26 | #' @template seealso_learner 27 | #' @template example 28 | LearnerClassifCVGlmnet = R6Class("LearnerClassifCVGlmnet", 29 | inherit = LearnerClassif, 30 | 31 | public = list( 32 | 33 | #' @description 34 | #' Creates a new instance of this [R6][R6::R6Class] class. 35 | initialize = function() { 36 | ps = ps( 37 | alignment = p_fct(c("lambda", "fraction"), default = "lambda", tags = "train"), 38 | alpha = p_dbl(0, 1, default = 1, tags = "train"), 39 | big = p_dbl(tags = "train", default = 9.9e35), 40 | devmax = p_dbl(0, 1, default = 0.999, tags = "train"), 41 | dfmax = p_int(0L, tags = "train"), 42 | epsnr = p_dbl(0, 1, default = 1.0e-8, tags = "train"), 43 | eps = p_dbl(0, 1, default = 1.0e-6, tags = "train"), 44 | exclude = p_int(1L, tags = "train"), 45 | exmx = p_dbl(default = 250.0, tags = "train"), 46 | fdev = p_dbl(0, 1, default = 1.0e-5, tags = "train"), 47 | foldid = p_uty(default = NULL, tags = "train"), 48 | gamma = p_uty(tags = "train", depends = quote(relax == TRUE)), 49 | grouped = p_lgl(default = TRUE, tags = "train"), 50 | intercept = p_lgl(default = TRUE, tags = "train"), 51 | keep = p_lgl(default = FALSE, tags = "train"), 52 | lambda.min.ratio = p_dbl(0, 1, tags = "train"), 53 | lambda = p_uty(tags = "train"), 54 | lower.limits = p_uty(tags = "train"), 55 | maxit = p_int(1L, default = 100000L, tags = "train"), 56 | mnlam = p_int(1L, default = 5, tags = "train"), 57 | mxitnr = p_int(1L, default = 25L, tags = "train"), 58 | mxit = p_int(1L, default = 100L, tags = "train"), 59 | nfolds = p_int(3L, default = 10L, tags = "train"), 60 | nlambda = p_int(1L, default = 100L, tags = "train"), 61 | use_pred_offset = p_lgl(default = TRUE, tags = "predict"), 62 | parallel = p_lgl(default = FALSE, tags = "train"), 63 | penalty.factor = p_uty(tags = "train"), 64 | pmax = p_int(0L, tags = "train"), 65 | pmin = p_dbl(0, 1, default = 1.0e-9, tags = "train"), 66 | prec = p_dbl(default = 1e-10, tags = "train"), 67 | predict.gamma = p_dbl(default = "gamma.1se", special_vals = list("gamma.1se", "gamma.min"), tags = "predict"), 68 | relax = p_lgl(default = FALSE, tags = "train"), 69 | s = p_dbl(0, special_vals = list("lambda.1se", "lambda.min"), default = "lambda.1se", tags = "predict"), 70 | standardize = p_lgl(default = TRUE, tags = "train"), 71 | standardize.response = p_lgl(default = FALSE, tags = "train"), 72 | thresh = p_dbl(0, default = 1e-07, tags = "train"), 73 | trace.it = p_int(0, 1, default = 0, tags = "train"), 74 | type.gaussian = p_fct(c("covariance", "naive"), tags = "train"), 75 | type.logistic = p_fct(c("Newton", "modified.Newton"), tags = "train"), 76 | type.measure = p_fct(c("deviance", "class", "auc", "mse", "mae"), default = "deviance", tags = "train"), 77 | type.multinomial = p_fct(c("ungrouped", "grouped"), tags = "train"), 78 | upper.limits = p_uty(tags = "train") 79 | ) 80 | 81 | ps$set_values(use_pred_offset = TRUE) 82 | 83 | super$initialize( 84 | id = "classif.cv_glmnet", 85 | param_set = ps, 86 | predict_types = c("response", "prob"), 87 | feature_types = c("logical", "integer", "numeric"), 88 | properties = c("weights", "twoclass", "multiclass", "selected_features", "offset"), 89 | packages = c("mlr3learners", "glmnet"), 90 | label = "GLM with Elastic Net Regularization", 91 | man = "mlr3learners::mlr_learners_classif.cv_glmnet" 92 | ) 93 | }, 94 | 95 | #' @description 96 | #' Returns the set of selected features as reported by [glmnet::predict.glmnet()] 97 | #' with `type` set to `"nonzero"`. 98 | #' 99 | #' @param lambda (`numeric(1)`)\cr 100 | #' Custom `lambda`, defaults to the active lambda depending on parameter set. 101 | #' 102 | #' @return (`character()`) of feature names. 103 | selected_features = function(lambda = NULL) { 104 | glmnet_selected_features(self, lambda) 105 | } 106 | ), 107 | 108 | private = list( 109 | .train = function(task) { 110 | data = as_numeric_matrix(task$data(cols = task$feature_names)) 111 | target = swap_levels(task$truth()) 112 | pv = self$param_set$get_values(tags = "train") 113 | pv$family = ifelse(length(task$class_names) == 2L, "binomial", "multinomial") 114 | pv$weights = get_weights(task, private) 115 | 116 | pv = glmnet_set_offset(task, "train", pv) 117 | 118 | glmnet_invoke(data, target, pv, cv = TRUE) 119 | }, 120 | 121 | .predict = function(task) { 122 | newdata = as_numeric_matrix(ordered_features(task, self)) 123 | pv = self$param_set$get_values(tags = "predict") 124 | pv = rename(pv, "predict.gamma", "gamma") 125 | 126 | pv = glmnet_set_offset(task, "predict", pv) 127 | 128 | if (self$predict_type == "response") { 129 | response = invoke(predict, self$model, 130 | newx = newdata, type = "class", 131 | .args = pv) 132 | 133 | list(response = drop(response)) 134 | } else { 135 | prob = invoke(predict, self$model, 136 | newx = newdata, type = "response", 137 | .args = pv) 138 | 139 | if (length(task$class_names) == 2L) { 140 | # the docs are really not clear here; before we tried to reorder the class 141 | # labels alphabetically; this does not seem to be required, we instead rely on 142 | # the (undocumented) class labels as stored in the model 143 | prob = cbind(1 - prob, prob) 144 | colnames(prob) = self$model$glmnet.fit$classnames 145 | } else { 146 | prob = prob[, , 1L] 147 | } 148 | 149 | list(prob = prob) 150 | } 151 | } 152 | ) 153 | ) 154 | 155 | #' @include aaa.R 156 | learners[["classif.cv_glmnet"]] = LearnerClassifCVGlmnet 157 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /man/mlr_learners_classif.naive_bayes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LearnerClassifNaiveBayes.R 3 | \name{mlr_learners_classif.naive_bayes} 4 | \alias{mlr_learners_classif.naive_bayes} 5 | \alias{LearnerClassifNaiveBayes} 6 | \title{Naive Bayes Classification Learner} 7 | \description{ 8 | Naive Bayes classification. 9 | Calls \code{\link[e1071:naiveBayes]{e1071::naiveBayes()}} from package \CRANpkg{e1071}. 10 | } 11 | \section{Dictionary}{ 12 | 13 | This \link[mlr3:Learner]{mlr3::Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_learners]{mlr3::mlr_learners} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::lrn()}}: 14 | 15 | \if{html}{\out{
}}\preformatted{mlr_learners$get("classif.naive_bayes") 16 | lrn("classif.naive_bayes") 17 | }\if{html}{\out{
}} 18 | } 19 | 20 | \section{Meta Information}{ 21 | 22 | \itemize{ 23 | \item Task type: \dQuote{classif} 24 | \item Predict Types: \dQuote{response}, \dQuote{prob} 25 | \item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric}, \dQuote{factor} 26 | \item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3learners}, \CRANpkg{e1071} 27 | } 28 | } 29 | 30 | \section{Parameters}{ 31 | \tabular{llll}{ 32 | Id \tab Type \tab Default \tab Range \cr 33 | eps \tab numeric \tab 0 \tab \eqn{(-\infty, \infty)}{(-Inf, Inf)} \cr 34 | laplace \tab numeric \tab 0 \tab \eqn{[0, \infty)}{[0, Inf)} \cr 35 | threshold \tab numeric \tab 0.001 \tab \eqn{(-\infty, \infty)}{(-Inf, Inf)} \cr 36 | } 37 | } 38 | 39 | \examples{ 40 | \dontshow{if (mlr3misc::require_namespaces(lrn("classif.naive_bayes")$packages, quietly = TRUE)) withAutoprint(\{ # examplesIf} 41 | # Define the Learner and set parameter values 42 | learner = lrn("classif.naive_bayes") 43 | print(learner) 44 | 45 | # Define a Task 46 | task = tsk("sonar") 47 | 48 | # Create train and test set 49 | ids = partition(task) 50 | 51 | # Train the learner on the training ids 52 | learner$train(task, row_ids = ids$train) 53 | 54 | # Print the model 55 | print(learner$model) 56 | 57 | # Importance method 58 | if ("importance" \%in\% learner$properties) print(learner$importance) 59 | 60 | # Make predictions for the test rows 61 | predictions = learner$predict(task, row_ids = ids$test) 62 | 63 | # Score the predictions 64 | predictions$score() 65 | \dontshow{\}) # examplesIf} 66 | } 67 | \seealso{ 68 | \itemize{ 69 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: 70 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} 71 | \item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners. 72 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Learner]{Learners}: \link[mlr3:mlr_learners]{mlr3::mlr_learners} 73 | \item \code{as.data.table(mlr_learners)} for a table of available \link[mlr3:Learner]{Learners} in the running session (depending on the loaded packages). 74 | \item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. 75 | \item Extension packages for additional task types: 76 | \itemize{ 77 | \item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 78 | \item \CRANpkg{mlr3cluster} for unsupervised clustering. 79 | } 80 | \item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} 81 | for established default tuning spaces. 82 | } 83 | 84 | Other Learner: 85 | \code{\link{mlr_learners_classif.cv_glmnet}}, 86 | \code{\link{mlr_learners_classif.glmnet}}, 87 | \code{\link{mlr_learners_classif.kknn}}, 88 | \code{\link{mlr_learners_classif.lda}}, 89 | \code{\link{mlr_learners_classif.log_reg}}, 90 | \code{\link{mlr_learners_classif.multinom}}, 91 | \code{\link{mlr_learners_classif.nnet}}, 92 | \code{\link{mlr_learners_classif.qda}}, 93 | \code{\link{mlr_learners_classif.ranger}}, 94 | \code{\link{mlr_learners_classif.svm}}, 95 | \code{\link{mlr_learners_classif.xgboost}}, 96 | \code{\link{mlr_learners_regr.cv_glmnet}}, 97 | \code{\link{mlr_learners_regr.glmnet}}, 98 | \code{\link{mlr_learners_regr.kknn}}, 99 | \code{\link{mlr_learners_regr.km}}, 100 | \code{\link{mlr_learners_regr.lm}}, 101 | \code{\link{mlr_learners_regr.nnet}}, 102 | \code{\link{mlr_learners_regr.ranger}}, 103 | \code{\link{mlr_learners_regr.svm}}, 104 | \code{\link{mlr_learners_regr.xgboost}} 105 | } 106 | \concept{Learner} 107 | \section{Super classes}{ 108 | \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3:LearnerClassif]{mlr3::LearnerClassif}} -> \code{LearnerClassifNaiveBayes} 109 | } 110 | \section{Methods}{ 111 | \subsection{Public methods}{ 112 | \itemize{ 113 | \item \href{#method-LearnerClassifNaiveBayes-new}{\code{LearnerClassifNaiveBayes$new()}} 114 | \item \href{#method-LearnerClassifNaiveBayes-clone}{\code{LearnerClassifNaiveBayes$clone()}} 115 | } 116 | } 117 | \if{html}{\out{ 118 |
Inherited methods 119 | 133 |
134 | }} 135 | \if{html}{\out{
}} 136 | \if{html}{\out{}} 137 | \if{latex}{\out{\hypertarget{method-LearnerClassifNaiveBayes-new}{}}} 138 | \subsection{Method \code{new()}}{ 139 | Creates a new instance of this \link[R6:R6Class]{R6} class. 140 | \subsection{Usage}{ 141 | \if{html}{\out{
}}\preformatted{LearnerClassifNaiveBayes$new()}\if{html}{\out{
}} 142 | } 143 | 144 | } 145 | \if{html}{\out{
}} 146 | \if{html}{\out{}} 147 | \if{latex}{\out{\hypertarget{method-LearnerClassifNaiveBayes-clone}{}}} 148 | \subsection{Method \code{clone()}}{ 149 | The objects of this class are cloneable with this method. 150 | \subsection{Usage}{ 151 | \if{html}{\out{
}}\preformatted{LearnerClassifNaiveBayes$clone(deep = FALSE)}\if{html}{\out{
}} 152 | } 153 | 154 | \subsection{Arguments}{ 155 | \if{html}{\out{
}} 156 | \describe{ 157 | \item{\code{deep}}{Whether to make a deep clone.} 158 | } 159 | \if{html}{\out{
}} 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /R/LearnerClassifRanger.R: -------------------------------------------------------------------------------- 1 | #' @title Ranger Classification Learner 2 | #' 3 | #' @name mlr_learners_classif.ranger 4 | #' 5 | #' @description 6 | #' Random classification forest. 7 | #' Calls [ranger::ranger()] from package \CRANpkg{ranger}. 8 | #' 9 | #' @section Custom mlr3 parameters: 10 | #' - `mtry`: 11 | #' - This hyperparameter can alternatively be set via our hyperparameter `mtry.ratio` 12 | #' as `mtry = max(ceiling(mtry.ratio * n_features), 1)`. 13 | #' Note that `mtry` and `mtry.ratio` are mutually exclusive. 14 | #' 15 | #' @section Initial parameter values: 16 | #' - `num.threads`: 17 | #' - Actual default: `2`, using two threads, while also respecting environment variable 18 | #' `R_RANGER_NUM_THREADS`, `options(ranger.num.threads = N)`, or `options(Ncpus = N)`, with 19 | #' precedence in that order. 20 | #' - Adjusted value: 1. 21 | #' - Reason for change: Conflicting with parallelization via \CRANpkg{future}. 22 | #' 23 | #' @templateVar id classif.ranger 24 | #' @template learner 25 | #' 26 | #' @references 27 | #' `r format_bib("wright_2017", "breiman_2001")` 28 | #' 29 | #' @export 30 | #' @template seealso_learner 31 | #' @template example 32 | LearnerClassifRanger = R6Class("LearnerClassifRanger", 33 | inherit = LearnerClassif, 34 | 35 | public = list( 36 | 37 | #' @description 38 | #' Creates a new instance of this [R6][R6::R6Class] class. 39 | initialize = function() { 40 | ps = ps( 41 | always.split.variables = p_uty(tags = "train"), 42 | class.weights = p_uty(default = NULL, tags = "train"), 43 | holdout = p_lgl(default = FALSE, tags = "train"), 44 | importance = p_fct(c("none", "impurity", "impurity_corrected", "permutation"), tags = "train"), 45 | keep.inbag = p_lgl(default = FALSE, tags = "train"), 46 | max.depth = p_int(default = NULL, lower = 1L, special_vals = list(NULL), tags = "train"), 47 | min.bucket = p_uty(default = 1L, tags = "train", 48 | custom_check = function(x) { 49 | if (checkmate::test_integerish(x)) return(TRUE) 50 | "Must be integer of length 1 or greater" 51 | }), 52 | min.node.size = p_uty(default = NULL, special_vals = list(NULL), tags = "train", 53 | custom_check = function(x) { 54 | if (checkmate::test_integerish(x, null.ok = TRUE)) return(TRUE) 55 | "Must be integer of length 1 or greater" 56 | }), 57 | mtry = p_int(lower = 1L, special_vals = list(NULL), tags = "train"), 58 | mtry.ratio = p_dbl(lower = 0, upper = 1, tags = "train"), 59 | na.action = p_fct(c("na.learn", "na.omit", "na.fail"), default = "na.learn", tags = "train"), 60 | num.random.splits = p_int(1L, default = 1L, tags = "train", depends = quote(splitrule == "extratrees")), 61 | node.stats = p_lgl(default = FALSE, tags = "train"), 62 | num.threads = p_int(1L, default = 1L, tags = c("train", "predict", "threads")), 63 | num.trees = p_int(1L, default = 500L, tags = c("train", "predict", "hotstart")), 64 | oob.error = p_lgl(default = TRUE, tags = "train"), 65 | regularization.factor = p_uty(default = 1, tags = "train"), 66 | regularization.usedepth = p_lgl(default = FALSE, tags = "train"), 67 | replace = p_lgl(default = TRUE, tags = "train"), 68 | respect.unordered.factors = p_fct(c("ignore", "order", "partition"), tags = "train"), 69 | sample.fraction = p_dbl(0L, 1L, tags = "train"), 70 | save.memory = p_lgl(default = FALSE, tags = "train"), 71 | scale.permutation.importance = p_lgl(default = FALSE, tags = "train", depends = quote(importance == "permutation")), 72 | seed = p_int(default = NULL, special_vals = list(NULL), tags = c("train", "predict")), 73 | split.select.weights = p_uty(default = NULL, tags = "train"), 74 | splitrule = p_fct(c("gini", "extratrees", "hellinger"), default = "gini", tags = "train"), 75 | verbose = p_lgl(default = TRUE, tags = c("train", "predict")), 76 | write.forest = p_lgl(default = TRUE, tags = "train") 77 | ) 78 | 79 | ps$set_values(num.threads = 1L) 80 | 81 | super$initialize( 82 | id = "classif.ranger", 83 | param_set = ps, 84 | predict_types = c("response", "prob"), 85 | feature_types = c("logical", "integer", "numeric", "character", "factor", "ordered"), 86 | properties = c("weights", "twoclass", "multiclass", "importance", "oob_error", "hotstart_backward", "missings", "selected_features"), 87 | packages = c("mlr3learners", "ranger"), 88 | label = "Random Forest", 89 | man = "mlr3learners::mlr_learners_classif.ranger" 90 | ) 91 | }, 92 | 93 | #' @description 94 | #' The importance scores are extracted from the model slot `variable.importance`. 95 | #' Parameter `importance.mode` must be set to `"impurity"`, `"impurity_corrected"`, or 96 | #' `"permutation"` 97 | #' 98 | #' @return Named `numeric()`. 99 | importance = function() { 100 | if (is.null(self$model)) { 101 | stopf("No model stored") 102 | } 103 | if (self$model$importance.mode == "none") { 104 | stopf("No importance stored") 105 | } 106 | 107 | sort(self$model$variable.importance, decreasing = TRUE) 108 | }, 109 | 110 | #' @description 111 | #' The out-of-bag error, extracted from model slot `prediction.error`. 112 | #' 113 | #' @return `numeric(1)`. 114 | oob_error = function() { 115 | if (!is.null(self$state$oob_error)) { 116 | return(self$state$oob_error) 117 | } 118 | 119 | if (!is.null(self$model)) { 120 | return(self$model$prediction.error) 121 | } 122 | 123 | stopf("No model stored") 124 | }, 125 | 126 | #' @description 127 | #' The set of features used for node splitting in the forest. 128 | #' 129 | #' @return `character()`. 130 | selected_features = function() { 131 | ranger_selected_features(self$model, self$state$feature_names) 132 | } 133 | ), 134 | 135 | private = list( 136 | .train = function(task) { 137 | pv = self$param_set$get_values(tags = "train") 138 | pv = convert_ratio(pv, "mtry", "mtry.ratio", length(task$feature_names)) 139 | pv$case.weights = get_weights(task, private) 140 | 141 | invoke(ranger::ranger, 142 | dependent.variable.name = task$target_names, 143 | data = task$data(), 144 | probability = self$predict_type == "prob", 145 | .args = pv 146 | ) 147 | }, 148 | 149 | .predict = function(task) { 150 | pv = self$param_set$get_values(tags = "predict") 151 | newdata = ordered_features(task, self) 152 | 153 | prediction = invoke(predict, 154 | self$model, 155 | data = newdata, 156 | predict.type = "response", .args = pv 157 | ) 158 | 159 | if (self$predict_type == "response") { 160 | list(response = prediction$predictions) 161 | } else { 162 | list(prob = prediction$predictions) 163 | } 164 | }, 165 | 166 | .hotstart = function(task) { 167 | model = self$model 168 | model$num.trees = self$param_set$values$num.trees 169 | model 170 | }, 171 | 172 | .extract_oob_error = function() { 173 | self$model$prediction.error 174 | } 175 | ) 176 | ) 177 | 178 | #' @export 179 | default_values.LearnerClassifRanger = function(x, search_space, task, ...) { # nolint 180 | special_defaults = list( 181 | mtry = floor(sqrt(length(task$feature_names))), 182 | mtry.ratio = floor(sqrt(length(task$feature_names))) / length(task$feature_names), 183 | min.node.size = if (x$predict_type == "response") 5 else 10, 184 | sample.fraction = 1 185 | ) 186 | defaults = insert_named(default_values(x$param_set), special_defaults) 187 | defaults[search_space$ids()] 188 | } 189 | 190 | #' @include aaa.R 191 | learners[["classif.ranger"]] = LearnerClassifRanger 192 | --------------------------------------------------------------------------------