├── jarl.toml
├── man-roxygen
├── param_rows.R
├── param_id.R
├── param_label.R
├── param_param_set.R
├── param_measures.R
├── param_task_properties.R
├── param_primary_key.R
├── param_range.R
├── param_predict_types.R
├── param_aggregator.R
├── param_feature_types.R
├── param_task_type.R
├── param_man.R
├── param_predict_type.R
├── simple_example.R
├── param_packages.R
├── param_backend.R
├── param_minimize.R
├── task.R
├── param_predict_sets.R
├── learner.R
├── param_measure_properties.R
├── param_average.R
├── example.R
├── seealso_task.R
├── seealso_learner.R
├── param_learner_properties.R
├── measure_fpc.R
└── measure_sil.R
├── .ignore
├── pkgdown
├── favicon
│ ├── favicon.ico
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── apple-touch-icon.png
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ └── apple-touch-icon-76x76.png
└── _pkgdown.yml
├── tests
├── testthat
│ ├── helper.R
│ ├── test_MeasureClust.R
│ ├── helper_mlr3.R
│ ├── test_TaskClust.R
│ ├── test_mlr_learners_clust_featureless.R
│ ├── test_mlr_learners_clust_pam.R
│ ├── test_mlr_learners_clust_ff.R
│ ├── test_mlr_learners_clust_cobweb.R
│ ├── test_mlr_learners_clust_xmeans.R
│ ├── test_mlr_learners_clust_dbscan.R
│ ├── test_mlr_learners_clust_dbscan_fpc.R
│ ├── test_mlr_learners_clust_optics.R
│ ├── test_mlr_learners_clust_em.R
│ ├── test_mlr_learners_clust_bico.R
│ ├── test_mlr_learners_clust_simplekmeans.R
│ ├── test_mlr_learners_clust_mclust.R
│ ├── test_mlr_learners_clust_meanshift.R
│ ├── test_mlr_learners_clust_diana.R
│ ├── test_mlr_learners_clust_hdbscan.R
│ ├── test_mlr_learners_clust_agnes.R
│ ├── helper_expectations.R
│ ├── test_mlr_learners_clust_hclust.R
│ ├── test_mlr_learners_clust_fanny.R
│ ├── test_mlr_learners_clust_ap.R
│ ├── test_mlr_learners_clust_birch.R
│ ├── test_mlr_learners_clust_kmeans.R
│ ├── test_mlr_learners_clust_cmeans.R
│ ├── test_mlr_learners_clust_kkmeans.R
│ ├── test_mlr_learners_clust_mbatchkmeans.R
│ ├── test_PredictionClust.R
│ └── test_LearnerClust.R
└── testthat.R
├── man
├── figures
│ └── README-unnamed-chunk-6-1.png
├── mlr3cluster-package.Rd
├── mlr_measures_clust.dunn.Rd
├── mlr_measures_clust.ch.Rd
├── mlr_measures_clust.wss.Rd
├── as_prediction_clust.Rd
├── mlr_measures_clust.silhouette.Rd
├── as_task_clust.Rd
├── mlr_tasks_ruspini.Rd
├── mlr_tasks_usarrests.Rd
├── PredictionClust.Rd
├── MeasureClust.Rd
├── TaskClust.Rd
├── mlr_learners_clust.featureless.Rd
└── LearnerClust.Rd
├── .github
├── dependabot.yml
└── workflows
│ ├── pkgdown.yml
│ └── r-cmd-check.yml
├── cran-comments.md
├── .editorconfig
├── R
├── measures.R
├── TaskClust_ruspini.R
├── helper.R
├── TaskClust_usarrest.R
├── TaskClust.R
├── MeasureClust.R
├── as_prediction_clust.R
├── LearnerClustCobweb.R
├── LearnerClustBICO.R
├── LearnerClustFarthestFirst.R
├── LearnerClustBIRCH.R
├── LearnerClustHDBSCAN.R
├── as_task_clust.R
├── LearnerClust.R
├── LearnerClustDiana.R
├── LearnerClustMclust.R
├── LearnerClustMeanShift.R
├── LearnerClustDBSCAN.R
├── LearnerClustFanny.R
├── LearnerClustOPTICS.R
├── LearnerClustKMeans.R
├── LearnerClustEM.R
├── LearnerClustFeatureless.R
├── MeasureClustInternal.R
├── LearnerClustXMeans.R
├── LearnerClustDBSCANfpc.R
├── PredictionClust.R
├── LearnerClustPAM.R
├── LearnerClustSimpleKMeans.R
├── LearnerClustAgnes.R
├── PredictionDataClust.R
├── LearnerClustCMeans.R
├── LearnerClustHclust.R
├── LearnerClustAffinityPropagation.R
├── zzz.R
├── LearnerClustMiniBatchKMeans.R
└── LearnerClustKKMeans.R
├── mlr3cluster.Rproj
├── .Rbuildignore
├── .lintr
├── .gitignore
├── .pre-commit-config.yaml
├── NAMESPACE
├── NEWS.md
├── DESCRIPTION
└── README.Rmd
/jarl.toml:
--------------------------------------------------------------------------------
1 | [lint]
2 | assignment = "="
3 |
--------------------------------------------------------------------------------
/man-roxygen/param_rows.R:
--------------------------------------------------------------------------------
1 | #' @param rows `integer()`\cr
2 | #' Row indices.
3 |
--------------------------------------------------------------------------------
/.ignore:
--------------------------------------------------------------------------------
1 | man/
2 | docs/
3 | inst/doc/
4 | attic/
5 | vignettes/*.html
6 | pkgdown/
7 |
--------------------------------------------------------------------------------
/man-roxygen/param_id.R:
--------------------------------------------------------------------------------
1 | #' @param id (`character(1)`)\cr
2 | #' Identifier for the new instance.
3 |
--------------------------------------------------------------------------------
/man-roxygen/param_label.R:
--------------------------------------------------------------------------------
1 | #' @param label (`character(1)`)\cr
2 | #' Label for the new instance.
3 |
--------------------------------------------------------------------------------
/man-roxygen/param_param_set.R:
--------------------------------------------------------------------------------
1 | #' @param param_set ([paradox::ParamSet])\cr
2 | #' Set of hyperparameters.
3 |
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/favicon.ico
--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
1 | library(checkmate)
2 | library(mlr3)
3 | library(mlr3cluster)
4 | library(testthat)
5 |
--------------------------------------------------------------------------------
/man-roxygen/param_measures.R:
--------------------------------------------------------------------------------
1 | #' @param measures ([Measure] | list of [Measure])\cr
2 | #' Measure(s) to calculate.
3 |
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/favicon-16x16.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/favicon-32x32.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon.png
--------------------------------------------------------------------------------
/man-roxygen/param_task_properties.R:
--------------------------------------------------------------------------------
1 | #' @param task_properties (`character()`)\cr
2 | #' Required task properties, see [mlr3::Task].
3 |
--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/man/figures/README-unnamed-chunk-6-1.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3cluster/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/man-roxygen/param_primary_key.R:
--------------------------------------------------------------------------------
1 | #' @param primary_key (`character(1)` | `integer()`)\cr
2 | #' Name of the primary key column, or integer vector of row ids.
3 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 |
--------------------------------------------------------------------------------
/man-roxygen/param_range.R:
--------------------------------------------------------------------------------
1 | #' @param range (`numeric(2)`)\cr
2 | #' Feasible range for this measure as `c(lower_bound, upper_bound)`.
3 | #' Both bounds may be infinite.
4 |
--------------------------------------------------------------------------------
/man-roxygen/param_predict_types.R:
--------------------------------------------------------------------------------
1 | #' @param predict_types (`character()`)\cr
2 | #' Supported predict types. Must be a subset of [`mlr_reflections$learner_predict_types`][mlr3::mlr_reflections].
3 |
--------------------------------------------------------------------------------
/man-roxygen/param_aggregator.R:
--------------------------------------------------------------------------------
1 | #' @param aggregator (`function(x)`)\cr
2 | #' Function to aggregate individual performance scores `x` where `x` is a numeric vector.
3 | #' If `NULL`, defaults to [mean()].
4 |
--------------------------------------------------------------------------------
/man-roxygen/param_feature_types.R:
--------------------------------------------------------------------------------
1 | #' @param feature_types (`character()`)\cr
2 | #' Feature types the learner operates on. Must be a subset of [`mlr_reflections$task_feature_types`][mlr3::mlr_reflections].
3 |
--------------------------------------------------------------------------------
/man-roxygen/param_task_type.R:
--------------------------------------------------------------------------------
1 | #' @param task_type (`character(1)`)\cr
2 | #' Type of task, e.g. `"regr"` or `"classif"`.
3 | #' Must be an element of [mlr_reflections$task_types$type][mlr3::mlr_reflections].
4 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("testthat", quietly = TRUE)) {
2 | library("checkmate")
3 | library("testthat")
4 | library("mlr3")
5 | library("mlr3cluster")
6 | test_check("mlr3cluster")
7 | }
8 |
--------------------------------------------------------------------------------
/man-roxygen/param_man.R:
--------------------------------------------------------------------------------
1 | #' @param man (`character(1)`)\cr
2 | #' String in the format `[pkg]::[topic]` pointing to a manual page for this object.
3 | #' The referenced help package can be opened via method `$help()`.
4 |
--------------------------------------------------------------------------------
/man-roxygen/param_predict_type.R:
--------------------------------------------------------------------------------
1 | #' @param predict_type (`character(1)`)\cr
2 | #' Required predict type of the [mlr3::Learner].
3 | #' Possible values are stored in [mlr_reflections$learner_predict_types][mlr3::mlr_reflections].
4 |
--------------------------------------------------------------------------------
/man-roxygen/simple_example.R:
--------------------------------------------------------------------------------
1 | #' <%= sprintf("@examplesIf mlr3misc::require_namespaces(lrn(\"%s\")$packages, quietly = TRUE)", id) %>
2 | #' # Define the Learner and set parameter values
3 | #' <%= sprintf("learner = lrn(\"%s\")", id) %>
4 | #' print(learner)
5 |
--------------------------------------------------------------------------------
/man-roxygen/param_packages.R:
--------------------------------------------------------------------------------
1 | #' @param packages (`character()`)\cr
2 | #' Set of required packages.
3 | #' A warning is signaled by the constructor if at least one of the packages is not installed,
4 | #' but loaded (not attached) later on-demand via [requireNamespace()].
5 |
--------------------------------------------------------------------------------
/man-roxygen/param_backend.R:
--------------------------------------------------------------------------------
1 | #' @param backend ([mlr3::DataBackend])\cr
2 | #' Either a [mlr3::DataBackend], or any object which is convertible to a [mlr3::DataBackend] with `as_data_backend()`.
3 | #' E.g., a `data.frame()` will be converted to a [mlr3::DataBackendDataTable].
4 |
--------------------------------------------------------------------------------
/man-roxygen/param_minimize.R:
--------------------------------------------------------------------------------
1 | #' @param minimize (`logical(1)`)\cr
2 | #' Set to `TRUE` if good predictions correspond to small values,
3 | #' and to `FALSE` if good predictions correspond to large values.
4 | #' If set to `NA` (default), tuning this measure is not possible.
5 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 |
3 | 0 errors | 0 warnings | 0 notes
4 |
5 | ## revdepcheck results
6 |
7 | We checked 3 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
8 |
9 | * We saw 0 new problems
10 | * We failed to check 0 packages
11 |
--------------------------------------------------------------------------------
/man-roxygen/task.R:
--------------------------------------------------------------------------------
1 | #' @section Dictionary:
2 | #' This [mlr3::Task] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_tasks] or with the associated sugar function [mlr3::tsk()]:
3 | #' ```
4 | #' mlr_tasks$get("<%= id %>")
5 | #' tsk("<%= id %>")
6 | #' ```
7 | #'
8 | #' @section Meta Information:
9 | #' `r mlr3misc::rd_info(mlr3::tsk("<%= id %>"))`
10 | #' @md
11 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # See http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | insert_final_newline = true
8 | indent_style = space
9 | trim_trailing_whitespace = true
10 |
11 | [*.{r,R,md,Rmd}]
12 | indent_size = 2
13 |
14 | [*.{c,h}]
15 | indent_size = 4
16 |
17 | [*.{cpp,hpp}]
18 | indent_size = 4
19 |
20 | [{NEWS.md,DESCRIPTION,LICENSE}]
21 | max_line_length = 80
22 |
--------------------------------------------------------------------------------
/R/measures.R:
--------------------------------------------------------------------------------
1 | # we store the information about the measures in an environment.
2 | # this way, we can (a) construct them easily and (b) generate documentation.
3 | make_measure_info = function(crit, label, lower, upper, minimize, predict_type = "partition") {
4 | list(crit = crit, label = label, lower = lower, upper = upper, minimize = minimize, predict_type = predict_type)
5 | }
6 | measures = new.env(parent = emptyenv())
7 |
--------------------------------------------------------------------------------
/mlr3cluster.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/man-roxygen/param_predict_sets.R:
--------------------------------------------------------------------------------
1 | #' @param predict_sets (`character()`)\cr
2 | #' Prediction sets to operate on, used in `aggregate()` to extract the matching `predict_sets` from the [ResampleResult].
3 | #' Multiple predict sets are calculated by the respective [mlr3::Learner] during [resample()]/[benchmark()].
4 | #' Must be a non-empty subset of `{"train", "test"}`.
5 | #' If multiple sets are provided, these are first combined to a single prediction object.
6 | #' Default is `"test"`.
7 |
--------------------------------------------------------------------------------
/man-roxygen/learner.R:
--------------------------------------------------------------------------------
1 | #' @section Dictionary:
2 | #' This [mlr3::Learner] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_learners] or with the associated sugar function [mlr3::lrn()]:
3 | #' ```
4 | #' mlr_learners$get("<%= id %>")
5 | #' lrn("<%= id %>")
6 | #' ```
7 | #'
8 | #' @section Meta Information:
9 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))`
10 | #' @md
11 | #'
12 | #' @section Parameters:
13 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)`
14 | #' @md
15 |
--------------------------------------------------------------------------------
/tests/testthat/test_MeasureClust.R:
--------------------------------------------------------------------------------
1 | test_that("Cluster measures", {
2 | keys = mlr_measures$keys("clust")
3 | task = tsk("usarrests")
4 | learner = lrn("clust.kmeans", centers = 2L)
5 | p = learner$train(task)$predict(task)
6 |
7 | for (key in keys) {
8 | m = mlr_measures$get(key)
9 | if (m$task_type == "clust") {
10 | perf = m$score(prediction = p, task = task, learner = learner)
11 | expect_number(perf, na.ok = FALSE, lower = m$range[1], upper = m$range[2])
12 | }
13 | }
14 | })
15 |
--------------------------------------------------------------------------------
/man-roxygen/param_measure_properties.R:
--------------------------------------------------------------------------------
1 | #' @param properties (`character()`)\cr
2 | #' Properties of the measure.
3 | #' Must be a subset of [mlr_reflections$measure_properties][mlr3::mlr_reflections].
4 | #' Supported by `mlr3`:
5 | #' * `"requires_task"` (requires the complete [mlr3::Task]),
6 | #' * `"requires_learner"` (requires the trained [mlr3::Learner]),
7 | #' * `"requires_train_set"` (requires the training indices from the [mlr3::Resampling]), and
8 | #' * `"na_score"` (the measure is expected to occasionally return `NA` or `NaN`).
9 |
--------------------------------------------------------------------------------
/man-roxygen/param_average.R:
--------------------------------------------------------------------------------
1 | #' @param average (`character(1)`)\cr
2 | #' How to average multiple [mlr3::Prediction]s from a [ResampleResult].
3 | #'
4 | #' The default, `"macro"`, calculates the individual performances scores for each [mlr3::Prediction] and then uses the
5 | #' function defined in `$aggregator` to average them to a single number.
6 | #'
7 | #' If set to `"micro"`, the individual [mlr3::Prediction] objects are first combined into a single new [mlr3::Prediction] object which is then used to assess the performance.
8 | #' The function in `$aggregator` is not used in this case.
9 |
--------------------------------------------------------------------------------
/man-roxygen/example.R:
--------------------------------------------------------------------------------
1 | #' <%= sprintf("@examplesIf mlr3misc::require_namespaces(lrn(\"%s\")$packages, quietly = TRUE)", id) %>
2 | #' # Define the Learner and set parameter values
3 | #' <%= sprintf("learner = lrn(\"%s\")", id) %>
4 | #' print(learner)
5 | #'
6 | #' # Define a Task
7 | #' task = tsk("usarrests")
8 | #'
9 | #' # Train the learner on the task
10 | #' learner$train(task)
11 | #'
12 | #' # Print the model
13 | #' print(learner$model)
14 | #'
15 | #' # Make predictions for the task
16 | #' prediction = learner$predict(task)
17 | #'
18 | #' # Score the predictions
19 | #' prediction$score(task = task)
20 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^CITATION\.cff$
3 | ^CONTRIBUTING\.md$
4 | ^CRAN-RELEASE$
5 | ^CRAN-SUBMISSION$
6 | ^Dockerfile$
7 | ^LICENSE$
8 | ^LICENSE\.md$
9 | ^Meta$
10 | ^README\.Rmd$
11 | ^README\.html$
12 | ^\.Renviron$
13 | ^\.Rproj\.user$
14 | ^\.ccache$
15 | ^\.dockerignore$
16 | ^\.editorconfig$
17 | ^\.gitattributes$
18 | ^\.github$
19 | ^\.gitignore$
20 | ^\.httr-oauth$
21 | ^\.ignore$
22 | ^\.lintr$
23 | ^\.pre-commit-config\.yaml$
24 | ^\.vscode$
25 | ^_pkgdown\.yml$
26 | ^attic$
27 | ^codecov\.yml$
28 | ^codemeta\.json$
29 | ^compose\.ya?ml$
30 | ^cran-comments\.md$
31 | ^data-raw$
32 | ^docs?$
33 | ^inst/extdata/.+\.R$
34 | ^jarl\.toml$
35 | ^man-roxygen$
36 | ^paper$
37 | ^pkgdown$
38 | ^revdep$
39 |
--------------------------------------------------------------------------------
/R/TaskClust_ruspini.R:
--------------------------------------------------------------------------------
1 | #' @title Ruspini Cluster Task
2 | #'
3 | #' @name mlr_tasks_ruspini
4 | #' @format [R6::R6Class] inheriting from [TaskClust].
5 | #'
6 | #' @description
7 | #' A cluster task for the [cluster::ruspini] data set.
8 | #'
9 | #' @templateVar id ruspini
10 | #' @template task
11 | #'
12 | #' @references
13 | #' `r format_bib("ruspini_1970")`
14 | #'
15 | #' @template seealso_task
16 | NULL
17 |
18 | load_task_ruspini = function(id = "ruspini") {
19 | b = as_data_backend(load_dataset("ruspini", "cluster"))
20 | task = TaskClust$new(id, b, label = "Ruspini")
21 | b$hash = task$man = "mlr3cluster::mlr_tasks_ruspini"
22 | task
23 | }
24 |
25 | #' @include zzz.R
26 | register_task("ruspini", load_task_ruspini)
27 |
--------------------------------------------------------------------------------
/R/helper.R:
--------------------------------------------------------------------------------
1 | warn_prediction_useless = function(id) {
2 | warningf("Learner '%s' doesn't predict on new data and predictions may not make sense on new data.", id)
3 | }
4 |
5 | allow_partial_matching = list(
6 | warnPartialMatchArgs = FALSE,
7 | warnPartialMatchAttr = FALSE,
8 | warnPartialMatchDollar = FALSE
9 | )
10 |
11 | assert_centers_param = function(centers, task, test_class, name) {
12 | if (test_class(centers) && ncol(centers) != task$ncol) {
13 | stopf("`%s` must have same number of columns as data.", name)
14 | }
15 | }
16 |
17 | check_centers = function(x) {
18 | if (test_data_frame(x) || test_int(x, lower = 1L)) {
19 | TRUE
20 | } else {
21 | "`centers` must be integer or data.frame with initial cluster centers"
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/tests/testthat/helper_mlr3.R:
--------------------------------------------------------------------------------
1 | lapply(list.files(system.file("testthat", package = "mlr3"), pattern = "^helper.*\\.[rR]", full.names = TRUE), source)
2 |
3 | generate_tasks.LearnerClust = function(learner, N = 20L) { # nolint
4 | set.seed(1L)
5 | data = mlbench::mlbench.2dnormals(N, cl = 2L, r = 2, sd = 0.1)
6 | task = TaskClust$new("sanity", mlr3::as_data_backend(as.data.frame(data$x)))
7 | list(task)
8 | }
9 | registerS3method("generate_tasks", "LearnerClust", generate_tasks.LearnerClust, envir = parent.frame())
10 |
11 | sanity_check.PredictionClust = function(prediction, task, ...) { # nolint
12 | prediction$score(measures = msr("clust.silhouette"), task = task) > -1L
13 | }
14 | registerS3method("sanity_check", "PredictionClust", sanity_check.PredictionClust, envir = parent.frame())
15 |
--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 | # lintr defaults: https://lintr.r-lib.org/reference/default_linters.html
3 | # the following setup changes/removes certain linters
4 | assignment_linter = NULL, # do not force using <- for assignments
5 | object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names
6 | commented_code_linter = NULL, # allow code in comments
7 | line_length_linter(120L),
8 | object_length_linter(40L),
9 | undesirable_function_linter(fun = c(
10 | # base messaging
11 | cat = "use catf()",
12 | stop = "use stopf()",
13 | warning = "use warningf()",
14 | message = "use messagef()",
15 | # perf
16 | ifelse = "use fifelse()",
17 | rank = "use frank()"
18 | ))
19 | )
20 |
--------------------------------------------------------------------------------
/tests/testthat/test_TaskClust.R:
--------------------------------------------------------------------------------
1 | test_that("Basic ops on usarrests task", {
2 | task = tsk("usarrests")
3 | expect_task(task)
4 | expect_task_clust(task)
5 | expect_identical(task$target_names, character())
6 | })
7 |
8 | test_that("Basic ops on ruspini task", {
9 | task = tsk("ruspini")
10 | expect_task(task)
11 | expect_task_clust(task)
12 | expect_identical(task$target_names, character())
13 | })
14 |
15 | test_that("0 feature task", {
16 | b = as_data_backend(data.table(ids = 1:30))
17 | task = TaskClust$new(id = "zero_feat_task", b)
18 | expect_output(print(task))
19 | b = task$backend
20 | expect_backend(b)
21 | expect_task(task)
22 | expect_task_clust(task)
23 | expect_data_table(task$data(), ncols = 1L)
24 |
25 | learner = lrn("clust.featureless", num_clusters = 3L)
26 | p = learner$train(task)$predict(task)
27 | expect_prediction(p)
28 | })
29 |
--------------------------------------------------------------------------------
/man-roxygen/seealso_task.R:
--------------------------------------------------------------------------------
1 | #' @seealso
2 | #'
3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/):
4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html}
5 | #' * Package \CRANpkg{mlr3data} for more toy tasks.
6 | #' * Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}.
7 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations.
8 | #' * [Dictionary][mlr3misc::Dictionary] of [Tasks][mlr3::Task]: [mlr3::mlr_tasks]
9 | #' * `as.data.table(mlr_tasks)` for a table of available [Tasks][mlr3::Task] in the running session (depending on the loaded packages).
10 | #' * \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering.
11 | #' * Extension packages for additional task types:
12 | #' * Unsupervised clustering: \CRANpkg{mlr3cluster}
13 | #' * Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}.
14 | #'
15 | #' @family Task
16 |
--------------------------------------------------------------------------------
/R/TaskClust_usarrest.R:
--------------------------------------------------------------------------------
1 | #' @title US Arrests Cluster Task
2 | #'
3 | #' @name mlr_tasks_usarrests
4 | #' @format [R6::R6Class] inheriting from [TaskClust].
5 | #'
6 | #' @description
7 | #' A cluster task for the [datasets::USArrests] data set.
8 | #' Rownames are stored as variable `"states"` with column role `"name"`.
9 | #'
10 | #' @templateVar id usarrests
11 | #' @template task
12 | #'
13 | #' @references
14 | #' `r format_bib("berry1979inter")`
15 | #'
16 | #' @template seealso_task
17 | NULL
18 |
19 | load_task_usarrests = function(id = "usarrests") {
20 | b = as_data_backend(load_dataset("USArrests", "datasets", keep_rownames = TRUE), keep_rownames = "state")
21 | task = TaskClust$new(id, b, label = "US Arrests")
22 | b$hash = task$man = "mlr3cluster::mlr_tasks_usarrests"
23 | task$col_roles$name = "state"
24 | task$col_roles$feature = setdiff(task$col_roles$feature, "state")
25 | task
26 | }
27 |
28 | #' @include zzz.R
29 | register_task("usarrests", load_task_usarrests)
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 | .RDataTmp
8 |
9 | # User-specific files
10 | .Ruserdata
11 |
12 | # Example code in package build process
13 | *-Ex.R
14 |
15 | # Output files from R CMD build
16 | /*.tar.gz
17 |
18 | # Output files from R CMD check
19 | /*.Rcheck/
20 |
21 | # RStudio files
22 | .Rproj.user/
23 |
24 | # produced vignettes
25 | vignettes/*.html
26 | vignettes/*.pdf
27 | doc/
28 | Meta/
29 |
30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
31 | .httr-oauth
32 |
33 | # knitr and R markdown default cache directories
34 | *_cache/
35 | /cache/
36 |
37 | # Temporary files created by R markdown
38 | *.utf8.md
39 | *.knit.md
40 |
41 | # R Environment Variables
42 | .Renviron
43 |
44 | # pkgdown site
45 | docs/
46 |
47 | # translation temp files
48 | po/*~
49 |
50 | # RStudio Connect folder
51 | rsconnect/
52 |
53 | # CRAN
54 | CRAN-RELEASE
55 | CRAN-SUBMISSION
56 |
57 | # MacOS
58 | .DS_Store
59 |
--------------------------------------------------------------------------------
/man-roxygen/seealso_learner.R:
--------------------------------------------------------------------------------
1 | #' @seealso
2 | #'
3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/):
4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners}
5 | #' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners.
6 | #' * [Dictionary][mlr3misc::Dictionary] of [Learners][mlr3::Learner]: [mlr3::mlr_learners]
7 | #' * `as.data.table(mlr_learners)` for a table of available [Learners][mlr3::Learner] in the running session (depending on the loaded packages).
8 | #' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps.
9 | #' * Extension packages for additional task types:
10 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis.
11 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering.
12 | #' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces}
13 | #' for established default tuning spaces.
14 | #'
15 | #' @family Learner
16 |
--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://mlr3cluster.mlr-org.com
2 |
3 | template:
4 | bootstrap: 5
5 | light-switch: true
6 | math-rendering: mathjax
7 | package: mlr3pkgdowntemplate
8 |
9 | development:
10 | mode: auto
11 | version_label: default
12 | version_tooltip: "Version"
13 |
14 | toc:
15 | depth: 3
16 |
17 | navbar:
18 | structure:
19 | left: [reference, news, book]
20 | right: [search, github, mattermost, stackoverflow, rss, lightswitch]
21 | components:
22 | home: ~
23 | reference:
24 | icon: fa fa-file-alt
25 | text: Reference
26 | href: reference/index.html
27 | mattermost:
28 | icon: fa fa-comments
29 | href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/
30 | book:
31 | text: mlr3book
32 | icon: fa fa-link
33 | href: https://mlr3book.mlr-org.com
34 | stackoverflow:
35 | icon: fab fa-stack-overflow
36 | href: https://stackoverflow.com/questions/tagged/mlr3
37 | rss:
38 | icon: fa-rss
39 | href: https://mlr-org.com/
40 |
--------------------------------------------------------------------------------
/man-roxygen/param_learner_properties.R:
--------------------------------------------------------------------------------
1 | #' @param properties (`character()`)\cr
2 | #' Set of properties of the [mlr3::Learner].
3 | #' Must be a subset of [`mlr_reflections$learner_properties`][mlr3::mlr_reflections].
4 | #' The following properties are currently standardized and understood by learners in \CRANpkg{mlr3}:
5 | #' * `"missings"`: The learner can handle missing values in the data.
6 | #' * `"weights"`: The learner supports observation weights.
7 | #' * `"importance"`: The learner supports extraction of importance scores, i.e. comes with an `$importance()` extractor function (see section on optional extractors in [mlr3::Learner]).
8 | #' * `"selected_features"`: The learner supports extraction of the set of selected features, i.e. comes with a `$selected_features()` extractor function (see section on optional extractors in [mlr3::Learner]).
9 | #' * `"oob_error"`: The learner supports extraction of estimated out of bag error, i.e. comes with a `oob_error()` extractor function (see section on optional extractors in [mlr3::Learner]).
10 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_featureless.R:
--------------------------------------------------------------------------------
1 | test_that("autotest", {
2 | learner = lrn("clust.featureless")
3 | expect_learner(learner)
4 | result = run_autotest(learner)
5 | expect_true(result, info = result$error)
6 | })
7 |
8 | test_that("Learner properties are respected", {
9 | task = tsk("usarrests")
10 | learner = lrn("clust.featureless")
11 | expect_learner(learner, task)
12 |
13 | # test on multiple paramsets
14 | parset_list = list(
15 | list(num_clusters = 1L),
16 | list(num_clusters = 2L),
17 | list(num_clusters = 3L)
18 | )
19 |
20 | for (i in seq_along(parset_list)) {
21 | parset = parset_list[[i]]
22 | learner$param_set$values = parset
23 |
24 | p = learner$train(task)$predict(task)
25 | expect_prediction_clust(p)
26 |
27 | if ("complete" %chin% learner$properties) {
28 | expect_prediction_complete(p, learner$predict_type)
29 | }
30 | if ("exclusive" %chin% learner$properties) {
31 | expect_prediction_exclusive(p, learner$predict_type)
32 | }
33 | }
34 | })
35 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_pam.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("clue")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.pam")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.pam")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(k = 2L),
18 | list(k = 5L),
19 | list(k = 2L, metric = "manhattan")
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = learner$train(task)$predict(task)
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | }
36 | })
37 |
--------------------------------------------------------------------------------
/R/TaskClust.R:
--------------------------------------------------------------------------------
1 | #' @title Cluster Task
2 | #'
3 | #' @description
4 | #' This task specializes [mlr3::Task] for cluster problems.
5 | #' As an unsupervised task, this task has no target column.
6 | #' The `task_type` is set to `"clust"`.
7 | #'
8 | #' Predefined tasks are stored in the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_tasks].
9 | #'
10 | #' @template param_id
11 | #' @template param_backend
12 | #' @template param_label
13 | #' @family Task
14 | #' @export
15 | #' @examples
16 | #' library(mlr3)
17 | #' library(mlr3cluster)
18 | #' task = TaskClust$new("usarrests", backend = USArrests)
19 | #' task$task_type
20 | #'
21 | #' # possible properties:
22 | #' mlr_reflections$task_properties$clust
23 | TaskClust = R6Class("TaskClust",
24 | inherit = TaskUnsupervised,
25 | public = list(
26 | #' @description
27 | #' Creates a new instance of this [R6][R6::R6Class] class.
28 | initialize = function(id, backend, label = NA_character_) {
29 | super$initialize(id = id, task_type = "clust", backend = backend, label = label)
30 | }
31 | )
32 | )
33 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_ff.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("RWeka")
2 | skip_on_cran()
3 |
4 | test_that("autotest", {
5 | learner = lrn("clust.ff")
6 | expect_learner(learner)
7 | result = run_autotest(learner)
8 | expect_true(result, info = result$error)
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.ff")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | parset_list = list(
18 | list(N = 1L),
19 | list(N = 3L, S = 3L),
20 | list(S = 100L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_cobweb.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("RWeka")
2 | skip_on_cran()
3 |
4 | test_that("autotest", {
5 | learner = lrn("clust.cobweb")
6 | expect_learner(learner)
7 | result = run_autotest(learner)
8 | expect_true(result, info = result$error)
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.cobweb")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | parset_list = list(
18 | list(A = 0.5),
19 | list(C = 1L),
20 | list(S = 100L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/man-roxygen/measure_fpc.R:
--------------------------------------------------------------------------------
1 | #' @usage NULL
2 | #' @name mlr_measures_clust.<%= id %>
3 | #' @format [R6::R6Class()] inheriting from [MeasureClust].
4 | #'
5 | #' @description
6 | #' The score function calls [fpc::cluster.stats()] from package \CRANpkg{fpc}.
7 | #' "<%= measures[[id]]$crit %>" is used subset output of the function call.
8 | #'
9 | #' @section Construction:
10 | #' This measures can be retrieved from the dictionary [mlr3::mlr_measures]:
11 | #' ```
12 | #' mlr_measures$get("clust.<%= id %>")
13 | #' msr("clust.<%= id %>")
14 | #' ```
15 | #'
16 | #' @section Meta Information:
17 | #' <% item = measures[[id]] %>
18 | #' * Range: <%= rd_format_range(item$lower, item$upper) %>
19 | #' * Minimize: `<%= item$minimize %>`
20 | #' * Required predict type: `<%= item$predict_type %>`
21 | #'
22 | #'
23 | #' @family cluster measures
24 | #'
25 | #' @seealso
26 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][mlr3::Measure]: [mlr3::mlr_measures]
27 | #'
28 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [mlr3::Measure] implementations.
29 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_xmeans.R:
--------------------------------------------------------------------------------
1 | skip_on_cran()
2 | skip_if_not_installed("RWeka")
3 |
4 | test_that("autotest", {
5 | learner = lrn("clust.xmeans")
6 | expect_learner(learner)
7 | result = run_autotest(learner)
8 | expect_true(result, info = result$error)
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.xmeans")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | parset_list = list(
18 | list(use_kdtree = TRUE),
19 | list(L = 2L),
20 | list(I = 5L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/man-roxygen/measure_sil.R:
--------------------------------------------------------------------------------
1 | #' @usage NULL
2 | #' @name mlr_measures_clust.<%= id %>
3 | #' @format [R6::R6Class()] inheriting from [MeasureClust].
4 | #'
5 | #' @description
6 | #' The score function calls [cluster::silhouette()] from package \CRANpkg{cluster}.
7 | #' "<%= measures[[id]]$crit %>" is used subset output of the function call.
8 | #'
9 | #' @section Construction:
10 | #' This measures can be retrieved from the dictionary [mlr3::mlr_measures]:
11 | #' ```
12 | #' mlr_measures$get("clust.<%= id %>")
13 | #' msr("clust.<%= id %>")
14 | #' ```
15 | #'
16 | #' @section Meta Information:
17 | #' <% item = measures[[id]] %>
18 | #' * Range: <%= rd_format_range(item$lower, item$upper) %>
19 | #' * Minimize: `<%= item$minimize %>`
20 | #' * Required predict type: `<%= item$predict_type %>`
21 | #'
22 | #'
23 | #' @family cluster measures
24 | #'
25 | #' @seealso
26 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][mlr3::Measure]: [mlr3::mlr_measures]
27 | #'
28 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [mlr3::Measure] implementations.
29 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_dbscan.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("dbscan")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.dbscan", eps = 25)
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.dbscan", eps = 25)
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(eps = 25),
18 | list(eps = 25, minPts = 10L),
19 | list(eps = 25, search = "linear")
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = learner$train(task)$predict(task)
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | }
36 | })
37 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_dbscan_fpc.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("fpc")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.dbscan_fpc", eps = 25)
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.dbscan_fpc", eps = 25)
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(eps = 25),
18 | list(eps = 25, MinPts = 10),
19 | list(eps = 25, method = "hybrid")
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = learner$train(task)$predict(task)
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | }
36 | })
37 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_optics.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("dbscan")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.optics", eps = 25, eps_cl = 20)
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.optics")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(eps_cl = 25),
18 | list(eps = 25, eps_cl = 20),
19 | list(eps_cl = 25, search = "linear")
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = learner$train(task)$predict(task)
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | }
36 | })
37 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_em.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("RWeka")
2 | skip_on_cran()
3 |
4 | test_that("autotest", {
5 | learner = lrn("clust.em")
6 | expect_learner(learner)
7 | result = run_autotest(learner)
8 | expect_true(result, info = result$error)
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.em")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | parset_list = list(
18 | list(I = 200L, num_slots = 5L),
19 | list(output_debug_info = TRUE, K = 5L),
20 | list(M = 1e-3, ll_iter = 1L, ll_cv = 1L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_bico.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("stream")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.bico")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.bico")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(k = 5),
18 | list(k = 5, space = 5L),
19 | list(k = 5, space = 5L, p = 5L),
20 | list(k = 5, space = 5L, p = 5L, iterations = 5L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/man/mlr3cluster-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/zzz.R
3 | \docType{package}
4 | \name{mlr3cluster-package}
5 | \alias{mlr3cluster}
6 | \alias{mlr3cluster-package}
7 | \title{mlr3cluster: Cluster Extension for 'mlr3'}
8 | \description{
9 | Extends the 'mlr3' package with cluster analysis.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 | \item \url{https://mlr3cluster.mlr-org.com}
15 | \item \url{https://github.com/mlr-org/mlr3cluster}
16 | \item Report bugs at \url{https://github.com/mlr-org/mlr3cluster/issues}
17 | }
18 |
19 | }
20 | \author{
21 | \strong{Maintainer}: Maximilian Mücke \email{muecke.maximilian@gmail.com} (\href{https://orcid.org/0009-0000-9432-9795}{ORCID})
22 |
23 | Authors:
24 | \itemize{
25 | \item Damir Pulatov \email{damirpolat@protonmail.com}
26 | \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID})
27 | }
28 |
29 | Other contributors:
30 | \itemize{
31 | \item Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) [contributor]
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_simplekmeans.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("RWeka")
2 | skip_on_cran()
3 |
4 | test_that("autotest", {
5 | learner = lrn("clust.SimpleKMeans")
6 | expect_learner(learner)
7 | result = run_autotest(learner)
8 | expect_true(result, info = result$error)
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.SimpleKMeans")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | parset_list = list(
18 | list(N = 3, init = 2L, periodic_pruning = 1L),
19 | list(V = TRUE, M = TRUE, O = TRUE),
20 | list(num_slots = 2L, init = 2L, min_density = 1L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_mclust.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("mclust")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.mclust")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.mclust")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(G = 1:4, modelNames = "EII"),
18 | list(initialization = list(noise = 1)),
19 | list(G = 3)
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = suppressWarnings(learner$train(task)$predict(task))
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | if ("fuzzy" %chin% learner$properties) {
36 | expect_prediction_fuzzy(p)
37 | }
38 | }
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_meanshift.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("LPCM")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.meanshift")
5 | expect_learner(learner)
6 | task = generate_tasks(learner)
7 | learner$train(task[[1]])
8 | expect_class(learner$model, "ms")
9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data")
10 | })
11 |
12 | test_that("Learner properties are respected", {
13 | task = tsk("usarrests")
14 | learner = lrn("clust.meanshift")
15 | expect_learner(learner, task)
16 |
17 | # test on multiple paramsets
18 | parset_list = list(
19 | list(h = 2L),
20 | list(subset = 1:3, scaled = 2L),
21 | list(thr = 0.1, iter = 100L)
22 | )
23 |
24 | for (i in seq_along(parset_list)) {
25 | parset = parset_list[[i]]
26 | learner$param_set$values = parset
27 |
28 | p = suppressWarnings(learner$train(task)$predict(task))
29 | expect_prediction_clust(p)
30 |
31 | if ("complete" %chin% learner$properties) {
32 | expect_prediction_complete(p, learner$predict_type)
33 | }
34 | if ("exclusive" %chin% learner$properties) {
35 | expect_prediction_exclusive(p, learner$predict_type)
36 | }
37 | }
38 | })
39 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_diana.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("clue")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.diana")
5 | expect_learner(learner)
6 | task = generate_tasks(learner)
7 | learner$train(task[[1]])
8 | expect_class(learner$model, "diana")
9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data")
10 | })
11 |
12 | test_that("Learner properties are respected", {
13 | task = tsk("usarrests")
14 | learner = lrn("clust.diana")
15 | expect_learner(learner, task)
16 |
17 | # test on multiple paramsets
18 | parset_list = list(
19 | list(k = 2L),
20 | list(k = 5L),
21 | list(k = 2L, metric = "manhattan"),
22 | list(k = 2L, stand = TRUE)
23 | )
24 |
25 | for (i in seq_along(parset_list)) {
26 | parset = parset_list[[i]]
27 | learner$param_set$values = parset
28 |
29 | p = suppressWarnings(learner$train(task)$predict(task))
30 | expect_prediction_clust(p)
31 |
32 | if ("complete" %chin% learner$properties) {
33 | expect_prediction_complete(p, learner$predict_type)
34 | }
35 | if ("exclusive" %chin% learner$properties) {
36 | expect_prediction_exclusive(p, learner$predict_type)
37 | }
38 | }
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_hdbscan.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("dbscan")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.hdbscan", minPts = 5L)
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.hdbscan", minPts = 5L)
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(minPts = 5L),
18 | list(minPts = 5L, gen_hdbscan_tree = TRUE),
19 | list(minPts = 5L, gen_simplified_tree = TRUE),
20 | list(minPts = 5L, gen_hdbscan_tree = TRUE, gen_simplified_tree = TRUE)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = learner$train(task)$predict(task)
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_agnes.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("clue")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.agnes")
5 | expect_learner(learner)
6 | task = generate_tasks(learner)
7 | learner$train(task[[1]])
8 | expect_class(learner$model, "agnes")
9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data")
10 | })
11 |
12 | test_that("Learner properties are respected", {
13 | task = tsk("usarrests")
14 | learner = lrn("clust.agnes")
15 | expect_learner(learner, task)
16 |
17 | # test on multiple paramsets
18 | parset_list = list(
19 | list(k = 2L),
20 | list(k = 5L),
21 | list(k = 2L, metric = "manhattan", method = "single"),
22 | list(k = 2L, stand = TRUE)
23 | )
24 |
25 | for (i in seq_along(parset_list)) {
26 | parset = parset_list[[i]]
27 | learner$param_set$values = parset
28 |
29 | p = suppressWarnings(learner$train(task)$predict(task))
30 | expect_prediction_clust(p)
31 |
32 | if ("complete" %chin% learner$properties) {
33 | expect_prediction_complete(p, learner$predict_type)
34 | }
35 | if ("exclusive" %chin% learner$properties) {
36 | expect_prediction_exclusive(p, learner$predict_type)
37 | }
38 | }
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/helper_expectations.R:
--------------------------------------------------------------------------------
1 | expect_prediction_clust = function(p) {
2 | expect_prediction(p)
3 | expect_r6(p, "PredictionClust", public = c("row_ids", "truth", "predict_types", "prob", "partition"))
4 | expect_numeric(p$truth, any.missing = TRUE, len = length(p$row_ids), null.ok = TRUE)
5 | expect_numeric(p$partition, any.missing = FALSE, len = length(p$row_ids), null.ok = TRUE)
6 | if ("prob" %chin% p$predict_types) {
7 | expect_matrix(p$prob, "numeric", any.missing = FALSE, nrows = length(p$row_ids))
8 | }
9 | }
10 |
11 | expect_task_clust = function(task) expect_r6(task, "TaskClust")
12 |
13 | expect_prediction_complete = function(p, predict_type) {
14 | expect_false(anyMissing(p[[predict_type]]))
15 | }
16 |
17 | expect_prediction_exclusive = function(p, predict_type) {
18 | expect_atomic(p[[predict_type]])
19 | expect_integer(p[[predict_type]])
20 | }
21 |
22 | expect_prediction_fuzzy = function(p, predict_type) {
23 | expect_numeric(p$prob, lower = 0L, upper = 1L)
24 | expect_numeric(round(rowSums(p$prob), 2), lower = 1L, upper = 1L)
25 |
26 | partition = max.col(p$prob, ties.method = "first")
27 | partition = as.numeric(colnames(p$prob)[partition])
28 | expect_true(unique(partition == p$partition))
29 | }
30 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_hclust.R:
--------------------------------------------------------------------------------
1 | test_that("autotest", {
2 | learner = lrn("clust.hclust")
3 | expect_learner(learner)
4 | task = generate_tasks(learner)
5 | learner$train(task[[1]])
6 | expect_class(learner$model, "hclust")
7 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data")
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.hclust")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(k = 3L),
18 | list(k = 5L),
19 | list(k = 3L, method = "centroid")
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = suppressWarnings(learner$train(task)$predict(task))
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | if ("fuzzy" %chin% learner$properties) {
36 | expect_prediction_fuzzy(p)
37 | }
38 | }
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_fanny.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("clue")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.fanny")
5 | expect_learner(learner)
6 | task = generate_tasks(learner)
7 | learner$train(task[[1]])
8 | expect_class(learner$model, "fanny")
9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data")
10 | })
11 |
12 | test_that("Learner properties are respected", {
13 | task = tsk("usarrests")
14 | learner = lrn("clust.fanny")
15 | expect_learner(learner, task)
16 |
17 | # test on multiple paramsets
18 | parset_list = list(
19 | list(k = 2L),
20 | list(k = 5L),
21 | list(k = 2L, metric = "SqEuclidean")
22 | )
23 |
24 | for (i in seq_along(parset_list)) {
25 | parset = parset_list[[i]]
26 | learner$param_set$values = parset
27 |
28 | p = suppressWarnings(learner$train(task)$predict(task))
29 | expect_prediction_clust(p)
30 |
31 | if ("complete" %chin% learner$properties) {
32 | expect_prediction_complete(p, learner$predict_type)
33 | }
34 | if ("exclusive" %chin% learner$properties) {
35 | expect_prediction_exclusive(p, learner$predict_type)
36 | }
37 | if ("fuzzy" %chin% learner$properties) {
38 | expect_prediction_fuzzy(p)
39 | }
40 | }
41 | })
42 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_ap.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("apcluster")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.ap", s = apcluster::negDistMat(r = 2L))
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.ap")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(s = apcluster::negDistMat(r = 2L)),
18 | list(s = apcluster::linSimMat, details = TRUE, q = 0.5),
19 | list(s = apcluster::expSimMat, lam = 0.5, nonoise = TRUE, includeSim = TRUE),
20 | list(s = apcluster::corSimMat, convits = 50L, maxits = 500L)
21 | )
22 |
23 | for (i in seq_along(parset_list)) {
24 | parset = parset_list[[i]]
25 | learner$param_set$values = parset
26 |
27 | p = suppressWarnings(learner$train(task)$predict(task))
28 | expect_prediction_clust(p)
29 |
30 | if ("complete" %chin% learner$properties) {
31 | expect_prediction_complete(p, learner$predict_type)
32 | }
33 | if ("exclusive" %chin% learner$properties) {
34 | expect_prediction_exclusive(p, learner$predict_type)
35 | }
36 | }
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_birch.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("stream")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.birch", threshold = 0.1, branching = 8L, maxLeaf = 20L)
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.birch", threshold = 0.1, branching = 8L, maxLeaf = 20L)
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | parset_list = list(
17 | list(threshold = 0.1, branching = 8L, maxLeaf = 20L),
18 | list(threshold = 0.2, branching = 4L, maxLeaf = 10, maxMem = 2L),
19 | list(threshold = 0.3, branching = 12L, maxLeaf = 5L, outlierThreshold = 0.3)
20 | )
21 |
22 | for (i in seq_along(parset_list)) {
23 | parset = parset_list[[i]]
24 | learner$param_set$values = parset
25 |
26 | p = learner$train(task)$predict(task)
27 | expect_prediction_clust(p)
28 |
29 | if ("complete" %chin% learner$properties) {
30 | expect_prediction_complete(p, learner$predict_type)
31 | }
32 | if ("exclusive" %chin% learner$properties) {
33 | expect_prediction_exclusive(p, learner$predict_type)
34 | }
35 | }
36 | })
37 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yml:
--------------------------------------------------------------------------------
1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0
2 | # https://github.com/mlr-org/actions
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 | release:
11 | types:
12 | - published
13 | workflow_dispatch:
14 |
15 | name: pkgdown
16 |
17 | jobs:
18 | pkgdown:
19 | runs-on: ubuntu-latest
20 |
21 | concurrency:
22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
23 | env:
24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25 | steps:
26 | - uses: actions/checkout@v5
27 |
28 | - uses: r-lib/actions/setup-pandoc@v2
29 |
30 | - uses: r-lib/actions/setup-r@v2
31 |
32 | - uses: r-lib/actions/setup-r-dependencies@v2
33 | with:
34 | extra-packages: any::pkgdown, local::.
35 | needs: website
36 |
37 | - name: Install template
38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate")
39 | shell: Rscript {0}
40 |
41 | - name: Build site
42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
43 | shell: Rscript {0}
44 |
45 | - name: Deploy
46 | if: github.event_name != 'pull_request'
47 | uses: JamesIves/github-pages-deploy-action@v4.7.4
48 | with:
49 | clean: false
50 | branch: gh-pages
51 | folder: docs
52 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_kmeans.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("clue")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.kmeans")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.kmeans")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L))
17 | colnames(centers) = colnames(task$data())
18 | centers$Assault = c(100, 200, 150, 300)
19 | centers$Murder = c(11, 3, 10, 5)
20 | centers$Rape = c(20, 18, 10, 26)
21 | centers$UrbanPop = c(60, 54, 53, 69)
22 |
23 | parset_list = list(
24 | list(centers = 2L),
25 | list(centers = centers),
26 | list(centers = 2L, algorithm = "MacQueen")
27 | )
28 |
29 | for (i in seq_along(parset_list)) {
30 | parset = parset_list[[i]]
31 | learner$param_set$values = parset
32 |
33 | p = learner$train(task)$predict(task)
34 | expect_prediction_clust(p)
35 |
36 | if ("complete" %chin% learner$properties) {
37 | expect_prediction_complete(p, learner$predict_type)
38 | }
39 | if ("exclusive" %chin% learner$properties) {
40 | expect_prediction_exclusive(p, learner$predict_type)
41 | }
42 |
43 | learner$reset()
44 | }
45 | })
46 |
--------------------------------------------------------------------------------
/man/mlr_measures_clust.dunn.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeasureClustInternal.R
3 | \name{mlr_measures_clust.dunn}
4 | \alias{mlr_measures_clust.dunn}
5 | \title{Dunn Index}
6 | \format{
7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}.
8 | }
9 | \description{
10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}.
11 | "dunn" is used subset output of the function call.
12 | }
13 | \section{Construction}{
14 |
15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}:
16 |
17 | \if{html}{\out{
}}\preformatted{mlr_measures$get("clust.dunn")
18 | msr("clust.dunn")
19 | }\if{html}{\out{
}}
20 | }
21 |
22 | \section{Meta Information}{
23 |
24 | \itemize{
25 | \item Range: \eqn{[0, \infty)}{[0, Inf)}
26 | \item Minimize: \code{FALSE}
27 | \item Required predict type: \code{partition}
28 | }
29 | }
30 |
31 | \seealso{
32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures}
33 |
34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations.
35 |
36 | Other cluster measures:
37 | \code{\link{mlr_measures_clust.ch}},
38 | \code{\link{mlr_measures_clust.silhouette}},
39 | \code{\link{mlr_measures_clust.wss}}
40 | }
41 | \concept{cluster measures}
42 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_cmeans.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("e1071")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.cmeans")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.cmeans")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L))
17 | colnames(centers) = colnames(task$data())
18 | centers$Assault = c(100, 200, 150, 300)
19 | centers$Murder = c(11, 3, 10, 5)
20 | centers$Rape = c(20, 18, 10, 26)
21 | centers$UrbanPop = c(60, 54, 53, 69)
22 |
23 | parset_list = list(
24 | list(centers = 2L),
25 | list(centers = centers),
26 | list(centers = 2L, dist = "manhattan", m = 3)
27 | )
28 |
29 | for (i in seq_along(parset_list)) {
30 | parset = parset_list[[i]]
31 | learner$param_set$values = parset
32 |
33 | p = learner$train(task)$predict(task)
34 | expect_prediction_clust(p)
35 |
36 | if ("complete" %chin% learner$properties) {
37 | expect_prediction_complete(p, learner$predict_type)
38 | }
39 | if ("exclusive" %chin% learner$properties) {
40 | expect_prediction_exclusive(p, learner$predict_type)
41 | }
42 | if ("fuzzy" %chin% learner$properties) {
43 | expect_prediction_fuzzy(p)
44 | }
45 | }
46 | })
47 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_kkmeans.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("kernlab")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.kkmeans")
5 | expect_learner(learner)
6 | result = run_autotest(learner)
7 | expect_true(result, info = result$error)
8 | })
9 |
10 | test_that("Learner properties are respected", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.kkmeans")
13 | expect_learner(learner, task)
14 |
15 | # test on multiple paramsets
16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L))
17 | colnames(centers) = colnames(task$data())
18 | centers$Assault = c(100, 200, 150, 300)
19 | centers$Murder = c(11, 3, 10, 5)
20 | centers$Rape = c(20, 18, 10, 26)
21 | centers$UrbanPop = c(60, 54, 53, 69)
22 |
23 | parset_list = list(
24 | list(centers = 2L, kernel = "polydot", degree = 2L),
25 | list(centers = centers, kernel = "laplacedot", sigma = 2L),
26 | list(centers = 3L, kernel = "anovadot")
27 | )
28 |
29 | for (i in seq_along(parset_list)) {
30 | parset = parset_list[[i]]
31 | learner$param_set$values = parset
32 |
33 | p = learner$train(task)$predict(task)
34 | expect_prediction_clust(p)
35 |
36 | if ("complete" %chin% learner$properties) {
37 | expect_prediction_complete(p, learner$predict_type)
38 | }
39 | if ("exclusive" %chin% learner$properties) {
40 | expect_prediction_exclusive(p, learner$predict_type)
41 | }
42 |
43 | learner$reset()
44 | }
45 | })
46 |
--------------------------------------------------------------------------------
/man/mlr_measures_clust.ch.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeasureClustInternal.R
3 | \name{mlr_measures_clust.ch}
4 | \alias{mlr_measures_clust.ch}
5 | \title{Calinski Harabasz Pseudo F-Statistic}
6 | \format{
7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}.
8 | }
9 | \description{
10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}.
11 | "ch" is used subset output of the function call.
12 | }
13 | \section{Construction}{
14 |
15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}:
16 |
17 | \if{html}{\out{}}\preformatted{mlr_measures$get("clust.ch")
18 | msr("clust.ch")
19 | }\if{html}{\out{
}}
20 | }
21 |
22 | \section{Meta Information}{
23 |
24 | \itemize{
25 | \item Range: \eqn{[0, \infty)}{[0, Inf)}
26 | \item Minimize: \code{FALSE}
27 | \item Required predict type: \code{partition}
28 | }
29 | }
30 |
31 | \seealso{
32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures}
33 |
34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations.
35 |
36 | Other cluster measures:
37 | \code{\link{mlr_measures_clust.dunn}},
38 | \code{\link{mlr_measures_clust.silhouette}},
39 | \code{\link{mlr_measures_clust.wss}}
40 | }
41 | \concept{cluster measures}
42 |
--------------------------------------------------------------------------------
/man/mlr_measures_clust.wss.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeasureClustInternal.R
3 | \name{mlr_measures_clust.wss}
4 | \alias{mlr_measures_clust.wss}
5 | \title{Within Sum of Squares}
6 | \format{
7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}.
8 | }
9 | \description{
10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}.
11 | "within.cluster.ss" is used subset output of the function call.
12 | }
13 | \section{Construction}{
14 |
15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}:
16 |
17 | \if{html}{\out{}}\preformatted{mlr_measures$get("clust.wss")
18 | msr("clust.wss")
19 | }\if{html}{\out{
}}
20 | }
21 |
22 | \section{Meta Information}{
23 |
24 | \itemize{
25 | \item Range: \eqn{[0, \infty)}{[0, Inf)}
26 | \item Minimize: \code{TRUE}
27 | \item Required predict type: \code{partition}
28 | }
29 | }
30 |
31 | \seealso{
32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures}
33 |
34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations.
35 |
36 | Other cluster measures:
37 | \code{\link{mlr_measures_clust.ch}},
38 | \code{\link{mlr_measures_clust.dunn}},
39 | \code{\link{mlr_measures_clust.silhouette}}
40 | }
41 | \concept{cluster measures}
42 |
--------------------------------------------------------------------------------
/man/as_prediction_clust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/as_prediction_clust.R
3 | \name{as_prediction_clust}
4 | \alias{as_prediction_clust}
5 | \alias{as_prediction_clust.PredictionClust}
6 | \alias{as_prediction_clust.data.frame}
7 | \title{Convert to a Cluster Prediction}
8 | \usage{
9 | as_prediction_clust(x, ...)
10 |
11 | \method{as_prediction_clust}{PredictionClust}(x, ...)
12 |
13 | \method{as_prediction_clust}{data.frame}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{(any)\cr
17 | Object to convert.}
18 |
19 | \item{...}{(any)\cr
20 | Additional arguments.}
21 | }
22 | \value{
23 | \link{PredictionClust}.
24 | }
25 | \description{
26 | Convert object to a \link{PredictionClust}.
27 | }
28 | \examples{
29 | if (requireNamespace("e1071")) {
30 | # create a prediction object
31 | task = tsk("usarrests")
32 | learner = lrn("clust.kmeans")
33 | learner = lrn("clust.cmeans", predict_type = "prob")
34 | learner$train(task)
35 | p = learner$predict(task)
36 |
37 | # convert to a data.table
38 | tab = as.data.table(p)
39 |
40 | # convert back to a Prediction
41 | as_prediction_clust(tab)
42 |
43 | # split data.table into a 3 data.tables based on UrbanPop
44 | f = cut(task$data(rows = tab$row_ids)$UrbanPop, 3)
45 | tabs = split(tab, f)
46 |
47 | # convert back to list of predictions
48 | preds = lapply(tabs, as_prediction_clust)
49 |
50 | # calculate performance in each group
51 | sapply(preds, function(p) p$score(task = task))
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/man/mlr_measures_clust.silhouette.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeasureClustInternal.R
3 | \name{mlr_measures_clust.silhouette}
4 | \alias{mlr_measures_clust.silhouette}
5 | \title{Rousseeuw's Silhouette Quality Index}
6 | \format{
7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}.
8 | }
9 | \description{
10 | The score function calls \code{\link[cluster:silhouette]{cluster::silhouette()}} from package \CRANpkg{cluster}.
11 | "sil_width" is used subset output of the function call.
12 | }
13 | \section{Construction}{
14 |
15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}:
16 |
17 | \if{html}{\out{}}\preformatted{mlr_measures$get("clust.silhouette")
18 | msr("clust.silhouette")
19 | }\if{html}{\out{
}}
20 | }
21 |
22 | \section{Meta Information}{
23 |
24 | \itemize{
25 | \item Range: \eqn{[0, \infty)}{[0, Inf)}
26 | \item Minimize: \code{FALSE}
27 | \item Required predict type: \code{partition}
28 | }
29 | }
30 |
31 | \seealso{
32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures}
33 |
34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations.
35 |
36 | Other cluster measures:
37 | \code{\link{mlr_measures_clust.ch}},
38 | \code{\link{mlr_measures_clust.dunn}},
39 | \code{\link{mlr_measures_clust.wss}}
40 | }
41 | \concept{cluster measures}
42 |
--------------------------------------------------------------------------------
/man/as_task_clust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/as_task_clust.R
3 | \name{as_task_clust}
4 | \alias{as_task_clust}
5 | \alias{as_task_clust.TaskClust}
6 | \alias{as_task_clust.data.frame}
7 | \alias{as_task_clust.DataBackend}
8 | \alias{as_task_clust.formula}
9 | \title{Convert to a Cluster Task}
10 | \usage{
11 | as_task_clust(x, ...)
12 |
13 | \method{as_task_clust}{TaskClust}(x, clone = FALSE, ...)
14 |
15 | \method{as_task_clust}{data.frame}(x, id = deparse1(substitute(x)), ...)
16 |
17 | \method{as_task_clust}{DataBackend}(x, id = deparse1(substitute(x)), ...)
18 |
19 | \method{as_task_clust}{formula}(x, data, id = deparse1(substitute(data)), ...)
20 | }
21 | \arguments{
22 | \item{x}{(any)\cr
23 | Object to convert.}
24 |
25 | \item{...}{(any)\cr
26 | Additional arguments.}
27 |
28 | \item{clone}{(\code{logical(1)})\cr
29 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.}
30 |
31 | \item{id}{(\code{character(1)})\cr
32 | Id for the new task.
33 | Defaults to the (deparsed and substituted) name of the data argument.}
34 |
35 | \item{data}{(\code{data.frame()})\cr
36 | Data frame containing all columns specified in formula \code{x}.}
37 | }
38 | \value{
39 | \link{TaskClust}.
40 | }
41 | \description{
42 | Convert object to a \link{TaskClust}.
43 | This is a S3 generic, specialized for at least the following objects:
44 | \enumerate{
45 | \item \link{TaskClust}: ensure the identity.
46 | \item \code{\link[=data.frame]{data.frame()}} and \link[mlr3:DataBackend]{mlr3::DataBackend}: provides an alternative to calling constructor of \link{TaskClust}.
47 | }
48 | }
49 | \examples{
50 | as_task_clust(datasets::USArrests)
51 | }
52 |
--------------------------------------------------------------------------------
/.github/workflows/r-cmd-check.yml:
--------------------------------------------------------------------------------
1 | # Workflow sets WEKA_HOME and installs XMeans
2 | # r cmd check workflow of the mlr3 ecosystem v0.3.1
3 | # https://github.com/mlr-org/actions
4 | on:
5 | workflow_dispatch:
6 | inputs:
7 | debug_enabled:
8 | type: boolean
9 | description: 'Run the build with tmate debugging enabled'
10 | required: false
11 | default: false
12 | push:
13 | branches:
14 | - main
15 | pull_request:
16 | branches:
17 | - main
18 |
19 | name: r-cmd-check
20 |
21 | env:
22 | WEKA_HOME: /home/runner/work/mlr3cluster
23 |
24 | jobs:
25 | r-cmd-check:
26 | runs-on: ${{ matrix.config.os }}
27 |
28 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
29 |
30 | env:
31 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
32 |
33 | strategy:
34 | fail-fast: false
35 | matrix:
36 | config:
37 | - {os: ubuntu-latest, r: 'devel'}
38 | - {os: ubuntu-latest, r: 'release'}
39 |
40 | steps:
41 | - uses: actions/checkout@v5
42 |
43 | - uses: r-lib/actions/setup-pandoc@v2
44 |
45 | - uses: r-lib/actions/setup-r@v2
46 | with:
47 | r-version: ${{ matrix.config.r }}
48 |
49 | - uses: r-lib/actions/setup-r-dependencies@v2
50 | with:
51 | extra-packages: any::rcmdcheck
52 | needs: check
53 |
54 | - uses: mxschmitt/action-tmate@v3
55 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
56 | with:
57 | limit-access-to-actor: true
58 |
59 | - name: Install XMeans
60 | run: |
61 | RWeka::WPM("refresh-cache")
62 | RWeka::WPM("install-package", "XMeans")
63 | shell: Rscript {0}
64 |
65 | - uses: r-lib/actions/check-r-package@v2
66 |
--------------------------------------------------------------------------------
/tests/testthat/test_mlr_learners_clust_mbatchkmeans.R:
--------------------------------------------------------------------------------
1 | skip_if_not_installed("ClusterR")
2 |
3 | test_that("autotest", {
4 | learner = lrn("clust.MBatchKMeans")
5 | expect_learner(learner)
6 | task = generate_tasks(learner)
7 | suppressWarnings(learner$train(task[[1]]))
8 | expect_class(learner$model, "MBatchKMeans")
9 | })
10 |
11 | test_that("Learner properties are respected", {
12 | task = tsk("usarrests")
13 | learner = lrn("clust.MBatchKMeans")
14 | expect_learner(learner, task)
15 |
16 | # test on multiple paramsets
17 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L))
18 | colnames(centers) = colnames(task$data())
19 | centers$Assault = c(100, 200, 150, 300)
20 | centers$Murder = c(11, 3, 10, 5)
21 | centers$Rape = c(20, 18, 10, 26)
22 | centers$UrbanPop = c(60, 54, 53, 69)
23 | colnames(centers) = NULL
24 | centers = as.matrix(centers)
25 |
26 | parset_list = list(
27 | list(clusters = 2L),
28 | list(clusters = 4L, CENTROIDS = centers, initializer = "random"),
29 | list(clusters = 2L, early_stop_iter = 20L, batch_size = 15L, tol = 1e-03)
30 | )
31 |
32 | for (type in c("partition", "prob")) {
33 | learner$predict_type = type
34 | for (i in seq_along(parset_list)) {
35 | parset = parset_list[[i]]
36 | learner$param_set$values = parset
37 |
38 | p = learner$train(task)$predict(task)
39 | expect_prediction_clust(p)
40 |
41 | if ("complete" %chin% learner$properties) {
42 | expect_prediction_complete(p, learner$predict_type)
43 | }
44 | if ("exclusive" %chin% learner$properties) {
45 | expect_prediction_exclusive(p, "partition")
46 | }
47 | if (learner$predict_type == "prob") {
48 | expect_prediction_fuzzy(p)
49 | }
50 |
51 | learner$reset()
52 | }
53 | }
54 | })
55 |
--------------------------------------------------------------------------------
/R/MeasureClust.R:
--------------------------------------------------------------------------------
1 | #' @title Cluster Measure
2 | #'
3 | #' @description
4 | #' This measure specializes [mlr3::Measure] for cluster analysis:
5 | #'
6 | #' * `task_type` is set to `"clust"`.
7 | #' * Possible values for `predict_type` are `"partition"` and `"prob"`.
8 | #'
9 | #' Predefined measures can be found in the [mlr3misc::Dictionary] [mlr3::mlr_measures].
10 | #'
11 | #' @template param_id
12 | #' @template param_range
13 | #' @template param_minimize
14 | #' @template param_average
15 | #' @template param_aggregator
16 | #' @template param_predict_type
17 | #' @template param_measure_properties
18 | #' @template param_predict_sets
19 | #' @template param_task_properties
20 | #' @template param_packages
21 | #' @template param_label
22 | #' @template param_man
23 | #'
24 | #' @seealso
25 | #' Example cluster measures: [`clust.dunn`][mlr_measures_clust.dunn]
26 | #' @export
27 | MeasureClust = R6Class("MeasureClust",
28 | inherit = Measure,
29 | cloneable = FALSE,
30 | public = list(
31 | #' @description
32 | #' Creates a new instance of this [R6][R6::R6Class] class.
33 | initialize = function(
34 | id,
35 | range,
36 | minimize = NA,
37 | aggregator = NULL,
38 | properties = character(),
39 | predict_type = "partition",
40 | task_properties = character(),
41 | packages = character(),
42 | label = NA_character_,
43 | man = NA_character_
44 | ) {
45 | super$initialize(
46 | id = id,
47 | task_type = "clust",
48 | range = range,
49 | minimize = minimize,
50 | aggregator = aggregator,
51 | properties = properties,
52 | predict_type = predict_type,
53 | task_properties = task_properties,
54 | packages = c("mlr3cluster", packages),
55 | label = label,
56 | man = man
57 | )
58 | }
59 | )
60 | )
61 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # All available hooks: https://pre-commit.com/hooks.html
2 | # R specific hooks: https://github.com/lorenzwalthert/precommit
3 | repos:
4 | - repo: https://github.com/lorenzwalthert/precommit
5 | rev: v0.4.3.9008
6 | hooks:
7 | - id: style-files
8 | args: [--style_pkg=styler.mlr, --style_fun=mlr_style]
9 | additional_dependencies:
10 | - mlr-org/styler.mlr
11 | - id: roxygenize
12 | additional_dependencies:
13 | - ClusterR
14 | - LPCM
15 | - R6
16 | - RWeka
17 | - apcluster
18 | - backports
19 | - checkmate
20 | - clue
21 | - cluster
22 | - data.table
23 | - dbscan
24 | - e1071
25 | - fpc
26 | - kernlab
27 | - mclust
28 | - mlbench
29 | - mlr3
30 | - mlr3misc
31 | - paradox
32 | - stream
33 | # codemeta must be above use-tidy-description when both are used
34 | - id: use-tidy-description
35 | - id: readme-rmd-rendered
36 | - id: parsable-R
37 | - id: no-browser-statement
38 | - id: deps-in-desc
39 | exclude: data-raw|inst
40 | - repo: https://github.com/pre-commit/pre-commit-hooks
41 | rev: v5.0.0
42 | hooks:
43 | - id: check-added-large-files
44 | args: [--maxkb=200]
45 | - id: file-contents-sorter
46 | files: '^\.Rbuildignore$'
47 | - id: end-of-file-fixer
48 | exclude: '\.Rd'
49 | - repo: local
50 | hooks:
51 | - id: forbid-to-commit
52 | name: Don't commit common R artifacts
53 | entry: Cannot commit .Rhistory, .RData, .Rds or .rds.
54 | language: fail
55 | files: '\.Rhistory|\.RData|\.Rds|\.rds$'
56 | # `exclude: ` to allow committing specific files.
57 | ci:
58 | autoupdate_schedule: monthly
59 |
--------------------------------------------------------------------------------
/tests/testthat/test_PredictionClust.R:
--------------------------------------------------------------------------------
1 | test_that("Construction", {
2 | task = tsk("usarrests")
3 | p = PredictionClust$new(row_ids = task$row_ids, partition = rep.int(1L, nrow(task$data())))
4 | expect_prediction(p)
5 | expect_prediction_clust(p)
6 |
7 | expect_prediction(c(p, p))
8 | })
9 |
10 | test_that("Internally constructed Prediction", {
11 | task = tsk("usarrests")
12 | learner = lrn("clust.featureless", num_clusters = 1L)
13 | p = learner$train(task)$predict(task)
14 | expect_prediction(p)
15 | expect_prediction_clust(p)
16 | })
17 |
18 | test_that("filter works", {
19 | task = tsk("usarrests")
20 | learner = lrn("clust.featureless", num_clusters = 1L)
21 | p = learner$train(task)$predict(task)
22 | pdata = p$data
23 |
24 | pdata = filter_prediction_data(pdata, row_ids = 1:3)
25 | expect_set_equal(pdata$row_ids, 1:3)
26 | expect_integer(pdata$partition, len = 3)
27 | })
28 |
29 | test_that("construction of empty PredictionDataClust", {
30 | task = tsk("usarrests")
31 |
32 | learner = lrn("clust.featureless", predict_type = "partition")
33 | learner$train(task)
34 | pred = learner$predict(task, row_ids = integer())
35 | expect_prediction(pred)
36 | expect_set_equal(pred$predict_types, "partition")
37 | expect_integer(pred$row_ids, len = 0L)
38 | expect_numeric(pred$partition, len = 0L)
39 | expect_null(pred$prob)
40 | expect_data_table(as.data.table(pred), nrows = 0L, ncols = 2L)
41 |
42 | learner = lrn("clust.featureless", predict_type = "prob")
43 | learner$train(task)
44 | pred = learner$predict(task, row_ids = integer())
45 | expect_prediction(pred)
46 | expect_set_equal(pred$predict_types, c("partition", "prob"))
47 | expect_integer(pred$row_ids, len = 0L)
48 | expect_numeric(pred$partition, len = 0L)
49 | expect_numeric(pred$prob, len = 0L)
50 | expect_data_table(as.data.table(pred), nrows = 0L, ncols = 3L)
51 | })
52 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(as.data.table,PredictionClust)
4 | S3method(as_prediction,PredictionDataClust)
5 | S3method(as_prediction_clust,PredictionClust)
6 | S3method(as_prediction_clust,data.frame)
7 | S3method(as_task_clust,DataBackend)
8 | S3method(as_task_clust,TaskClust)
9 | S3method(as_task_clust,data.frame)
10 | S3method(as_task_clust,formula)
11 | S3method(c,PredictionDataClust)
12 | S3method(check_prediction_data,PredictionDataClust)
13 | S3method(create_empty_prediction_data,TaskClust)
14 | S3method(filter_prediction_data,PredictionDataClust)
15 | S3method(is_missing_prediction_data,PredictionDataClust)
16 | export(LearnerClust)
17 | export(LearnerClustAP)
18 | export(LearnerClustAgnes)
19 | export(LearnerClustBICO)
20 | export(LearnerClustBIRCH)
21 | export(LearnerClustCMeans)
22 | export(LearnerClustCobweb)
23 | export(LearnerClustDBSCAN)
24 | export(LearnerClustDBSCANfpc)
25 | export(LearnerClustDiana)
26 | export(LearnerClustEM)
27 | export(LearnerClustFanny)
28 | export(LearnerClustFarthestFirst)
29 | export(LearnerClustFeatureless)
30 | export(LearnerClustHDBSCAN)
31 | export(LearnerClustHclust)
32 | export(LearnerClustKKMeans)
33 | export(LearnerClustKMeans)
34 | export(LearnerClustMclust)
35 | export(LearnerClustMeanShift)
36 | export(LearnerClustMiniBatchKMeans)
37 | export(LearnerClustOPTICS)
38 | export(LearnerClustPAM)
39 | export(LearnerClustSimpleKMeans)
40 | export(LearnerClustXMeans)
41 | export(MeasureClust)
42 | export(PredictionClust)
43 | export(TaskClust)
44 | export(as_prediction_clust)
45 | export(as_task_clust)
46 | import(checkmate)
47 | import(data.table)
48 | import(mlr3)
49 | import(mlr3misc)
50 | import(paradox)
51 | importFrom(R6,R6Class)
52 | importFrom(clue,cl_predict)
53 | importFrom(cluster,silhouette)
54 | importFrom(fpc,cluster.stats)
55 | importFrom(stats,dist)
56 | importFrom(stats,model.frame)
57 | importFrom(stats,predict)
58 | importFrom(stats,runif)
59 | importFrom(stats,terms)
60 | importFrom(utils,bibentry)
61 |
--------------------------------------------------------------------------------
/tests/testthat/test_LearnerClust.R:
--------------------------------------------------------------------------------
1 | test_that("predict on newdata works / clust", {
2 | task = tsk("usarrests")$filter(1:40)
3 | learner = lrn("clust.featureless", num_clusters = 1L)
4 | expect_error(learner$predict(task), "trained")
5 | learner$train(task)
6 | expect_task(learner$state$train_task)
7 | newdata = tsk("usarrests")$filter(41:50)$data()
8 |
9 | # passing the task
10 | p = learner$predict_newdata(newdata = newdata, task = task)
11 | expect_data_table(as.data.table(p), nrows = 10)
12 | expect_set_equal(as.data.table(p)$row_ids, 1:10)
13 | expect_null(p$truth)
14 |
15 | # rely on internally stored task representation
16 | p = learner$predict_newdata(newdata = newdata, task = NULL)
17 | expect_data_table(as.data.table(p), nrows = 10L)
18 | expect_set_equal(as.data.table(p)$row_ids, 1:10)
19 | expect_null(p$truth)
20 | })
21 |
22 | test_that("reset()", {
23 | task = tsk("usarrests")
24 | learner = lrn("clust.featureless", num_clusters = 2L)
25 |
26 | learner$train(task)
27 | expect_list(learner$state, names = "unique")
28 | expect_learner(learner$reset())
29 | expect_null(learner$state)
30 | })
31 |
32 | test_that("empty predict set (#421)", {
33 | task = tsk("usarrests")
34 | learner = lrn("clust.featureless", num_clusters = 1L)
35 | resampling = rsmp("holdout", ratio = 1)
36 | hout = resampling$instantiate(task)
37 | model = learner$train(task, hout$train_set(1))
38 | pred = learner$predict(task, hout$test_set(1))
39 | expect_match(learner$log$msg, "No data to predict on", fixed = TRUE, all = FALSE)
40 | })
41 |
42 | test_that("assignment saving works", {
43 | task = tsk("usarrests")
44 | learner = lrn("clust.featureless")
45 |
46 | expect_true(learner$save_assignments)
47 | learner$train(task)
48 | expect_vector(learner$assignments)
49 | expect_length(learner$assignments, task$nrow)
50 |
51 | learner$reset()
52 | learner$save_assignments = FALSE
53 | expect_false(learner$save_assignments)
54 | learner$train(task)
55 | expect_null(learner$assignments)
56 | })
57 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # mlr3cluster (development version)
2 |
3 | # mlr3cluster 0.1.12
4 |
5 | * feat: Add `cluster_selection_epsilon` parameter to HDBSCAN learner and
6 | initialize `minPts` to 5.
7 | * docs: Better learner example section
8 |
9 | # mlr3cluster 0.1.11
10 |
11 | * fix: Mclust learner no longer sets the control default with a function not in
12 | import to stay compliant with {paradox} conventions
13 |
14 | # mlr3cluster 0.1.10
15 |
16 | * feat: Add BIRCH learner from 'stream' package
17 | * feat: Add BICO learner from 'stream' package
18 |
19 | # mlr3cluster 0.1.9
20 |
21 | * feat: Add DBSCAN learner from 'fpc' package
22 | * feat: Add HDBSCAN learner from 'dbscan' package
23 | * feat: Add OPTICS learner from 'dbscan' package
24 | * chore: Compatibility with upcoming 'paradox' release
25 | * chore: Move to testthat3
26 | * refactor: General code refactoring
27 |
28 | # mlr3cluster 0.1.8
29 |
30 | * feat: Add new task based on `ruspini` dataset
31 |
32 | # mlr3cluster 0.1.7
33 |
34 | * chore: Replace 'clusterCrit' measures with alternatives from 'cluster' and 'fpc' packages
35 | * fix: Remove broken unloading test
36 |
37 | # mlr3cluster 0.1.6
38 |
39 | * feat: Add states as row names to `usarrest` task
40 | * fix: Remove dictionary items after unloading package
41 |
42 | # mlr3cluster 0.1.5
43 |
44 | * feat: Add Mclust learner
45 | * fix: Fix error associated with new dbscan release
46 |
47 | # mlr3cluster 0.1.4
48 |
49 | * refactor: General code refactoring
50 |
51 | # mlr3cluster 0.1.3
52 |
53 | * refactor: General code refactoring
54 | * fix: Small bug fixes
55 | * feat: Add filter to PredictionClust
56 |
57 | # mlr3cluster 0.1.2
58 |
59 | * feat: Add Hclust learner
60 | * docs: Add tests and documentation for Hclust
61 | * feat: Add within sum of squares measure
62 | * docs: Add documentation for WSS measure
63 | * refactor: Code factor adaptations
64 |
65 | # mlr3cluster 0.1.1
66 |
67 | * feat: Add eight new learners
68 | * feat: Add `assignments` and `save_assignments` fields to `LearnerClust` class
69 |
70 | # mlr3cluster 0.1.0
71 |
72 | * Initial upload to CRAN
73 |
--------------------------------------------------------------------------------
/R/as_prediction_clust.R:
--------------------------------------------------------------------------------
1 | #' @title Convert to a Cluster Prediction
2 | #'
3 | #' @description
4 | #' Convert object to a [PredictionClust].
5 | #'
6 | #' @inheritParams mlr3::as_prediction
7 | #'
8 | #' @return [PredictionClust].
9 | #' @export
10 | #' @examples
11 | #' if (requireNamespace("e1071")) {
12 | #' # create a prediction object
13 | #' task = tsk("usarrests")
14 | #' learner = lrn("clust.kmeans")
15 | #' learner = lrn("clust.cmeans", predict_type = "prob")
16 | #' learner$train(task)
17 | #' p = learner$predict(task)
18 | #'
19 | #' # convert to a data.table
20 | #' tab = as.data.table(p)
21 | #'
22 | #' # convert back to a Prediction
23 | #' as_prediction_clust(tab)
24 | #'
25 | #' # split data.table into a 3 data.tables based on UrbanPop
26 | #' f = cut(task$data(rows = tab$row_ids)$UrbanPop, 3)
27 | #' tabs = split(tab, f)
28 | #'
29 | #' # convert back to list of predictions
30 | #' preds = lapply(tabs, as_prediction_clust)
31 | #'
32 | #' # calculate performance in each group
33 | #' sapply(preds, function(p) p$score(task = task))
34 | #' }
35 | as_prediction_clust = function(x, ...) {
36 | UseMethod("as_prediction_clust")
37 | }
38 |
39 | #' @rdname as_prediction_clust
40 | #' @export
41 | as_prediction_clust.PredictionClust = function(x, ...) { # nolint
42 | x
43 | }
44 |
45 | #' @rdname as_prediction_clust
46 | #' @export
47 | as_prediction_clust.data.frame = function(x, ...) { # nolint
48 | assert_names(names(x), must.include = c("row_ids", "partition"))
49 | prob_cols = setdiff(names(x), c("row_ids", "partition"))
50 |
51 | if (length(prob_cols) > 0L) {
52 | if (!all(startsWith(prob_cols, "prob."))) {
53 | stopf(
54 | "Table may only contain columns 'row_ids', 'partition' as well as columns prefixed with 'prob.' for class probabilities." # nolint
55 | )
56 | }
57 | prob = as.matrix(x[, prob_cols, with = FALSE])
58 | cn = colnames(prob)
59 | colnames(prob) = substr(cn, 6L, nchar(cn))
60 | } else {
61 | prob = NULL
62 | }
63 |
64 | invoke(PredictionClust$new, prob = prob, .args = x[, -prob_cols, with = FALSE])
65 | }
66 |
--------------------------------------------------------------------------------
/R/LearnerClustCobweb.R:
--------------------------------------------------------------------------------
1 | #' @title Cobweb Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.cobweb
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Cobweb clustering implemented in [RWeka::Cobweb()].
7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the
8 | #' cluster memberships for new data.
9 | #'
10 | #' @templateVar id clust.cobweb
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("witten2002data", "fisher1987knowledge", "gennari1989models")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template example
19 | LearnerClustCobweb = R6Class("LearnerClustCobweb",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | param_set = ps(
26 | A = p_dbl(0, default = 1, tags = "train"),
27 | C = p_dbl(0, default = 0.002, tags = "train"),
28 | S = p_int(1L, default = 42L, tags = "train")
29 | )
30 |
31 | super$initialize(
32 | id = "clust.cobweb",
33 | feature_types = c("logical", "integer", "numeric"),
34 | predict_types = "partition",
35 | param_set = param_set,
36 | properties = c("partitional", "exclusive", "complete"),
37 | packages = "RWeka",
38 | man = "mlr3cluster::mlr_learners_clust.cobweb",
39 | label = "Cobweb Clustering"
40 | )
41 | }
42 | ),
43 |
44 | private = list(
45 | .train = function(task) {
46 | pv = self$param_set$get_values(tags = "train")
47 | ctrl = invoke(RWeka::Weka_control, .args = pv)
48 | m = invoke(RWeka::Cobweb, x = task$data(), control = ctrl)
49 | if (self$save_assignments) {
50 | self$assignments = unname(m$class_ids + 1L)
51 | }
52 | m
53 | },
54 |
55 | .predict = function(task) {
56 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L
57 | PredictionClust$new(task = task, partition = partition)
58 | }
59 | )
60 | )
61 |
62 | #' @include zzz.R
63 | register_learner("clust.cobweb", LearnerClustCobweb)
64 |
--------------------------------------------------------------------------------
/R/LearnerClustBICO.R:
--------------------------------------------------------------------------------
1 | #' @title BICO Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.bico
4 | #'
5 | #' @description
6 | #' BICO (Fast computation of k-means coresets in a data stream) clustering.
7 | #' Calls [stream::DSC_BICO()] from \CRANpkg{stream}.
8 | #'
9 | #' @templateVar id clust.bico
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("fichtenberger2013bico", "hahsler2017stream")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template example
18 | LearnerClustBICO = R6Class("LearnerClustBICO",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | k = p_int(1L, default = 5L, tags = "train"),
26 | space = p_int(1L, default = 10L, tags = "train"),
27 | p = p_int(1L, default = 10L, tags = "train"),
28 | iterations = p_int(1L, default = 10L, tags = "train")
29 | )
30 |
31 | super$initialize(
32 | id = "clust.bico",
33 | feature_types = c("integer", "numeric"),
34 | predict_types = "partition",
35 | param_set = param_set,
36 | properties = c("partitional", "exclusive", "complete"),
37 | packages = "stream",
38 | man = "mlr3cluster::mlr_learners_clust.bico",
39 | label = "BICO Clustering"
40 | )
41 | }
42 | ),
43 |
44 | private = list(
45 | .train = function(task) {
46 | pv = self$param_set$get_values(tags = "train")
47 | data = task$data()
48 | m = invoke(stream::DSC_BICO, .args = pv)
49 | x = stream::DSD_Memory(data)
50 | stats::update(m, x, n = nrow(data))
51 |
52 | if (self$save_assignments) {
53 | self$assignments = as.integer(invoke(predict, m, newdata = data)[[1L]])
54 | }
55 | m
56 | },
57 |
58 | .predict = function(task) {
59 | partition = as.integer(invoke(predict, self$model, newdata = task$data())[[1L]])
60 | PredictionClust$new(task = task, partition = partition)
61 | }
62 | )
63 | )
64 |
65 | #' @include zzz.R
66 | register_learner("clust.bico", LearnerClustBICO)
67 |
--------------------------------------------------------------------------------
/R/LearnerClustFarthestFirst.R:
--------------------------------------------------------------------------------
1 | #' @title Farthest First Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.ff
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Farthest First clustering implemented in [RWeka::FarthestFirst()].
7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the
8 | #' cluster memberships for new data.
9 | #'
10 | #' @templateVar id clust.ff
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("witten2002data", "hochbaum1985best")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template example
19 | LearnerClustFarthestFirst = R6Class("LearnerClustFF",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | param_set = ps(
26 | N = p_int(1L, default = 2L, tags = "train"),
27 | S = p_int(1L, default = 1L, tags = "train"),
28 | output_debug_info = p_lgl(default = FALSE, tags = "train")
29 | )
30 |
31 | super$initialize(
32 | id = "clust.ff",
33 | feature_types = c("logical", "integer", "numeric"),
34 | predict_types = "partition",
35 | param_set = param_set,
36 | properties = c("partitional", "exclusive", "complete"),
37 | packages = "RWeka",
38 | man = "mlr3cluster::mlr_learners_clust.ff",
39 | label = "Farthest First Clustering"
40 | )
41 | }
42 | ),
43 |
44 | private = list(
45 | .train = function(task) {
46 | pv = self$param_set$get_values(tags = "train")
47 | names(pv) = chartr("_", "-", names(pv))
48 | ctrl = invoke(RWeka::Weka_control, .args = pv)
49 | m = invoke(RWeka::FarthestFirst, x = task$data(), control = ctrl)
50 | if (self$save_assignments) {
51 | self$assignments = unname(m$class_ids + 1L)
52 | }
53 | m
54 | },
55 |
56 | .predict = function(task) {
57 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L
58 | PredictionClust$new(task = task, partition = partition)
59 | }
60 | )
61 | )
62 |
63 | #' @include zzz.R
64 | register_learner("clust.ff", LearnerClustFarthestFirst)
65 |
--------------------------------------------------------------------------------
/R/LearnerClustBIRCH.R:
--------------------------------------------------------------------------------
1 | #' @title BIRCH Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.birch
4 | #'
5 | #' @description
6 | #' BIRCH (Balanced Iterative Reducing Clustering using Hierarchies) clustering.
7 | #' Calls [stream::DSC_BIRCH()] from \CRANpkg{stream}.
8 | #'
9 | #' @templateVar id clust.birch
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("zhang1996birch", "zhang1997birch", "hahsler2017stream")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template simple_example
18 | LearnerClustBIRCH = R6Class("LearnerClustBIRCH",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | threshold = p_dbl(0L, tags = c("train", "required")),
26 | branching = p_int(1L, tags = c("train", "required")),
27 | maxLeaf = p_int(1L, tags = c("train", "required")),
28 | maxMem = p_int(0L, default = 0L, tags = "train"),
29 | outlierThreshold = p_dbl(default = 0.25, tags = "train")
30 | )
31 |
32 | super$initialize(
33 | id = "clust.birch",
34 | feature_types = c("integer", "numeric"),
35 | predict_types = "partition",
36 | param_set = param_set,
37 | properties = c("hierarchical", "exclusive", "complete"),
38 | packages = "stream",
39 | man = "mlr3cluster::mlr_learners_clust.birch",
40 | label = "BIRCH Clustering"
41 | )
42 | }
43 | ),
44 |
45 | private = list(
46 | .train = function(task) {
47 | pv = self$param_set$get_values(tags = "train")
48 | data = task$data()
49 | m = invoke(stream::DSC_BIRCH, .args = pv)
50 | x = stream::DSD_Memory(data)
51 | stats::update(m, x, n = nrow(data))
52 |
53 | if (self$save_assignments) {
54 | self$assignments = as.integer(invoke(predict, m, newdata = data)[[1L]])
55 | }
56 | m
57 | },
58 |
59 | .predict = function(task) {
60 | partition = as.integer(invoke(predict, self$model, newdata = task$data())[[1L]])
61 | PredictionClust$new(task = task, partition = partition)
62 | }
63 | )
64 | )
65 |
66 | #' @include zzz.R
67 | register_learner("clust.birch", LearnerClustBIRCH)
68 |
--------------------------------------------------------------------------------
/R/LearnerClustHDBSCAN.R:
--------------------------------------------------------------------------------
1 | #' @title Hierarchical DBSCAN (HDBSCAN) Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.hdbscan
4 | #'
5 | #' @description
6 | #' HDBSCAN (Hierarchical DBSCAN) clustering.
7 | #' Calls [dbscan::hdbscan()] from \CRANpkg{dbscan}.
8 | #'
9 | #' @templateVar id clust.hdbscan
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("hahsler2019dbscan", "campello2013density")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template example
18 | LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | minPts = p_int(0L, tags = c("train", "required")),
26 | cluster_selection_epsilon = p_dbl(default = 0, tags = "train"),
27 | gen_hdbscan_tree = p_lgl(default = FALSE, tags = "train"),
28 | gen_simplified_tree = p_lgl(default = FALSE, tags = "train"),
29 | verbose = p_lgl(default = FALSE, tags = "train")
30 | )
31 |
32 | param_set$set_values(minPts = 5L)
33 |
34 | super$initialize(
35 | id = "clust.hdbscan",
36 | feature_types = c("logical", "integer", "numeric"),
37 | predict_types = "partition",
38 | param_set = param_set,
39 | properties = c("density", "exclusive", "complete"),
40 | packages = "dbscan",
41 | man = "mlr3cluster::mlr_learners_clust.hdbscan",
42 | label = "HDBSCAN Clustering"
43 | )
44 | }
45 | ),
46 |
47 | private = list(
48 | .train = function(task) {
49 | pv = self$param_set$get_values(tags = "train")
50 | data = task$data()
51 | m = invoke(dbscan::hdbscan, x = data, .args = pv)
52 | m = insert_named(m, list(data = data))
53 |
54 | if (self$save_assignments) {
55 | self$assignments = m$cluster
56 | }
57 | m
58 | },
59 |
60 | .predict = function(task) {
61 | partition = as.integer(invoke(predict, self$model, newdata = task$data(), data = self$model$data))
62 | PredictionClust$new(task = task, partition = partition)
63 | }
64 | )
65 | )
66 |
67 | #' @include zzz.R
68 | register_learner("clust.hdbscan", LearnerClustHDBSCAN)
69 |
--------------------------------------------------------------------------------
/R/as_task_clust.R:
--------------------------------------------------------------------------------
1 | #' @title Convert to a Cluster Task
2 | #'
3 | #' @description
4 | #' Convert object to a [TaskClust].
5 | #' This is a S3 generic, specialized for at least the following objects:
6 | #'
7 | #' 1. [TaskClust]: ensure the identity.
8 | #' 2. [data.frame()] and [mlr3::DataBackend]: provides an alternative to calling constructor of [TaskClust].
9 | #'
10 | #' @inheritParams mlr3::as_task
11 | #'
12 | #' @return [TaskClust].
13 | #' @export
14 | #' @examples
15 | #' as_task_clust(datasets::USArrests)
16 | as_task_clust = function(x, ...) {
17 | UseMethod("as_task_clust")
18 | }
19 |
20 | #' @rdname as_task_clust
21 | #' @param clone (`logical(1)`)\cr
22 | #' If `TRUE`, ensures that the returned object is not the same as the input `x`.
23 | #' @export
24 | as_task_clust.TaskClust = function(x, clone = FALSE, ...) { # nolint
25 | if (clone) x$clone() else x
26 | }
27 |
28 | #' @rdname as_task_clust
29 | #' @param id (`character(1)`)\cr
30 | #' Id for the new task.
31 | #' Defaults to the (deparsed and substituted) name of the data argument.
32 | #' @export
33 | as_task_clust.data.frame = function(x, id = deparse1(substitute(x)), ...) { # nolint
34 | force(id)
35 |
36 | ii = which(map_lgl(keep(x, is.double), anyInfinite))
37 | if (length(ii) > 0L) {
38 | warningf("Detected columns with unsupported Inf values in data: %s", str_collapse(names(ii)))
39 | }
40 |
41 | TaskClust$new(id = id, backend = x)
42 | }
43 |
44 | #' @rdname as_task_clust
45 | #' @export
46 | as_task_clust.DataBackend = function(x, id = deparse1(substitute(x)), ...) { # nolint
47 | force(id)
48 |
49 | TaskClust$new(id = id, backend = x)
50 | }
51 |
52 | #' @rdname as_task_clust
53 | #' @param data (`data.frame()`)\cr
54 | #' Data frame containing all columns specified in formula `x`.
55 | #' @export
56 | as_task_clust.formula = function(x, data, id = deparse1(substitute(data)), ...) { # nolint
57 | force(id)
58 |
59 | assert_data_frame(data)
60 | assert_subset(all.vars(x), c(names(data), "."), .var.name = "formula")
61 | if (attr(terms(x, data = data), "response")) {
62 | stopf("Formula %s has a response.", format(x))
63 | }
64 | tab = model.frame(x, data, na.action = "na.pass")
65 | setattr(tab, "terms", NULL)
66 | setattr(tab, "na.action", NULL)
67 |
68 | as_task_clust(tab, id = id, ...)
69 | }
70 |
--------------------------------------------------------------------------------
/man/mlr_tasks_ruspini.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/TaskClust_ruspini.R
3 | \name{mlr_tasks_ruspini}
4 | \alias{mlr_tasks_ruspini}
5 | \title{Ruspini Cluster Task}
6 | \format{
7 | \link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskClust}.
8 | }
9 | \description{
10 | A cluster task for the \link[cluster:ruspini]{cluster::ruspini} data set.
11 | }
12 | \section{Dictionary}{
13 |
14 | This \link[mlr3:Task]{mlr3::Task} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::tsk()}}:
15 |
16 | \if{html}{\out{}}\preformatted{mlr_tasks$get("ruspini")
17 | tsk("ruspini")
18 | }\if{html}{\out{
}}
19 | }
20 |
21 | \section{Meta Information}{
22 |
23 | \itemize{
24 | \item Task type: \dQuote{clust}
25 | \item Dimensions: 75x2
26 | \item Properties: -
27 | \item Has Missings: \code{FALSE}
28 | \item Target: -
29 | \item Features: \dQuote{x}, \dQuote{y}
30 | }
31 | }
32 |
33 | \references{
34 | Ruspini EH (1970).
35 | \dQuote{Numerical methods for fuzzy clustering.}
36 | \emph{Information Sciences}, \bold{2}(3), 319-350.
37 | \doi{10.1016/S0020-0255(70)80056-1}.
38 | }
39 | \seealso{
40 | \itemize{
41 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}:
42 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html}
43 | \item Package \CRANpkg{mlr3data} for more toy tasks.
44 | \item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}.
45 | \item Package \CRANpkg{mlr3viz} for some generic visualizations.
46 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Task]{Tasks}: \link[mlr3:mlr_tasks]{mlr3::mlr_tasks}
47 | \item \code{as.data.table(mlr_tasks)} for a table of available \link[mlr3:Task]{Tasks} in the running session (depending on the loaded packages).
48 | \item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering.
49 | \item Extension packages for additional task types:
50 | \itemize{
51 | \item Unsupervised clustering: \CRANpkg{mlr3cluster}
52 | \item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}.
53 | }
54 | }
55 |
56 | Other Task:
57 | \code{\link{TaskClust}},
58 | \code{\link{mlr_tasks_usarrests}}
59 | }
60 | \concept{Task}
61 |
--------------------------------------------------------------------------------
/R/LearnerClust.R:
--------------------------------------------------------------------------------
1 | #' @title Cluster Learner
2 | #'
3 | #' @description
4 | #' This Learner specializes [mlr3::Learner] for cluster problems:
5 | #' * `task_type` is set to `"clust"`.
6 | #' * Creates [mlr3::Prediction]s of class [PredictionClust].
7 | #' * Possible values for `predict_types` are:
8 | #' - `"partition"`: Integer indicating the cluster membership.
9 | #' - `"prob"`: Probability for belonging to each cluster.
10 | #'
11 | #' Predefined learners can be found in the [mlr3misc::Dictionary] [mlr3::mlr_learners].
12 | #'
13 | #' @template param_id
14 | #' @template param_param_set
15 | #' @template param_predict_types
16 | #' @template param_feature_types
17 | #' @template param_learner_properties
18 | #' @template param_packages
19 | #' @template param_label
20 | #' @template param_man
21 | #'
22 | #' @export
23 | #' @examples
24 | #' library(mlr3)
25 | #' library(mlr3cluster)
26 | #' ids = mlr_learners$keys("^clust")
27 | #' ids
28 | #'
29 | #' # get a specific learner from mlr_learners:
30 | #' learner = lrn("clust.kmeans")
31 | #' print(learner)
32 | LearnerClust = R6Class("LearnerClust",
33 | inherit = Learner,
34 | public = list(
35 | #' @field assignments (`NULL` | `vector()`)\cr
36 | #' Cluster assignments from learned model.
37 | assignments = NULL,
38 |
39 | #' @field save_assignments (`logical()`)\cr
40 | #' Should assignments for 'train' data be saved in the learner?
41 | #' Default is `TRUE`.
42 | save_assignments = TRUE,
43 |
44 | #' @description
45 | #' Creates a new instance of this [R6][R6::R6Class] class.
46 | initialize = function(
47 | id,
48 | param_set = ps(),
49 | predict_types = "partition",
50 | feature_types = character(),
51 | properties = character(),
52 | packages = character(),
53 | label = NA_character_,
54 | man = NA_character_
55 | ) {
56 | super$initialize(
57 | id = id,
58 | task_type = "clust",
59 | param_set = param_set,
60 | predict_types = predict_types,
61 | feature_types = feature_types,
62 | properties = properties,
63 | packages = c("mlr3cluster", packages),
64 | label = label,
65 | man = man
66 | )
67 | },
68 |
69 | #' @description
70 | #' Reset `assignments` field before calling parent's `reset()`.
71 | reset = function() {
72 | self$assignments = NULL
73 | super$reset()
74 | }
75 | )
76 | )
77 |
--------------------------------------------------------------------------------
/man/mlr_tasks_usarrests.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/TaskClust_usarrest.R
3 | \name{mlr_tasks_usarrests}
4 | \alias{mlr_tasks_usarrests}
5 | \title{US Arrests Cluster Task}
6 | \format{
7 | \link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskClust}.
8 | }
9 | \description{
10 | A cluster task for the \link[datasets:USArrests]{datasets::USArrests} data set.
11 | Rownames are stored as variable \code{"states"} with column role \code{"name"}.
12 | }
13 | \section{Dictionary}{
14 |
15 | This \link[mlr3:Task]{mlr3::Task} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::tsk()}}:
16 |
17 | \if{html}{\out{}}\preformatted{mlr_tasks$get("usarrests")
18 | tsk("usarrests")
19 | }\if{html}{\out{
}}
20 | }
21 |
22 | \section{Meta Information}{
23 |
24 | \itemize{
25 | \item Task type: \dQuote{clust}
26 | \item Dimensions: 50x4
27 | \item Properties: -
28 | \item Has Missings: \code{FALSE}
29 | \item Target: -
30 | \item Features: \dQuote{Assault}, \dQuote{Murder}, \dQuote{Rape}, \dQuote{UrbanPop}
31 | }
32 | }
33 |
34 | \references{
35 | Berry, Brian J (1979).
36 | \dQuote{Interactive Data Analysis: A Practical Primer.}
37 | \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, \bold{28}, 181.
38 | }
39 | \seealso{
40 | \itemize{
41 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}:
42 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html}
43 | \item Package \CRANpkg{mlr3data} for more toy tasks.
44 | \item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}.
45 | \item Package \CRANpkg{mlr3viz} for some generic visualizations.
46 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Task]{Tasks}: \link[mlr3:mlr_tasks]{mlr3::mlr_tasks}
47 | \item \code{as.data.table(mlr_tasks)} for a table of available \link[mlr3:Task]{Tasks} in the running session (depending on the loaded packages).
48 | \item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering.
49 | \item Extension packages for additional task types:
50 | \itemize{
51 | \item Unsupervised clustering: \CRANpkg{mlr3cluster}
52 | \item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}.
53 | }
54 | }
55 |
56 | Other Task:
57 | \code{\link{TaskClust}},
58 | \code{\link{mlr_tasks_ruspini}}
59 | }
60 | \concept{Task}
61 |
--------------------------------------------------------------------------------
/R/LearnerClustDiana.R:
--------------------------------------------------------------------------------
1 | #' @title Divisive Hierarchical Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.diana
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for divisive hierarchical clustering implemented in [cluster::diana()].
7 | #' The predict method uses [stats::cutree()] which cuts the tree resulting from
8 | #' hierarchical clustering into specified number of groups (see parameter `k`).
9 | #' The default value for `k` is 2.
10 | #'
11 | #' @templateVar id clust.diana
12 | #' @template learner
13 | #'
14 | #' @references
15 | #' `r format_bib("kaufman2009finding")`
16 | #'
17 | #' @export
18 | #' @template seealso_learner
19 | #' @template example
20 | LearnerClustDiana = R6Class("LearnerClustDiana",
21 | inherit = LearnerClust,
22 | public = list(
23 | #' @description
24 | #' Creates a new instance of this [R6][R6::R6Class] class.
25 | initialize = function() {
26 | param_set = ps(
27 | metric = p_fct(c("euclidean", "manhattan"), default = "euclidean", tags = "train"),
28 | stand = p_lgl(default = FALSE, tags = "train"),
29 | trace.lev = p_int(0L, default = 0L, tags = "train"),
30 | k = p_int(1L, default = 2L, tags = c("train", "predict"))
31 | )
32 |
33 | param_set$set_values(k = 2L)
34 |
35 | super$initialize(
36 | id = "clust.diana",
37 | feature_types = c("logical", "integer", "numeric"),
38 | predict_types = "partition",
39 | param_set = param_set,
40 | properties = c("hierarchical", "exclusive", "complete"),
41 | packages = "cluster",
42 | man = "mlr3cluster::mlr_learners_clust.diana",
43 | label = "Divisive Hierarchical Clustering"
44 | )
45 | }
46 | ),
47 |
48 | private = list(
49 | .train = function(task) {
50 | pv = self$param_set$get_values(tags = "train")
51 | m = invoke(cluster::diana,
52 | x = task$data(),
53 | diss = FALSE,
54 | .args = remove_named(pv, "k")
55 | )
56 | if (self$save_assignments) {
57 | self$assignments = stats::cutree(m, pv$k)
58 | }
59 | m
60 | },
61 |
62 | .predict = function(task) {
63 | pv = self$param_set$get_values(tags = "predict")
64 | if (pv$k > task$nrow) {
65 | stopf("`k` needs to be between 1 and %i.", task$nrow)
66 | }
67 |
68 | warn_prediction_useless(self$id)
69 |
70 | PredictionClust$new(task = task, partition = self$assignments)
71 | }
72 | )
73 | )
74 |
75 | #' @include zzz.R
76 | register_learner("clust.diana", LearnerClustDiana)
77 |
--------------------------------------------------------------------------------
/R/LearnerClustMclust.R:
--------------------------------------------------------------------------------
1 | #' @title Gaussian Mixture Models-Based Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.mclust
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for model-based clustering implemented in [mclust::Mclust()].
7 | #' The predict method uses [mclust::predict.Mclust()] to compute the
8 | #' cluster memberships for new data.
9 | #'
10 | #' @templateVar id clust.mclust
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("scrucca2016mclust", "fraley2002model")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template example
19 | LearnerClustMclust = R6Class("LearnerClustMclust",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | parma_set = ps(
26 | G = p_uty(default = 1:9, tags = "train", custom_check = check_numeric),
27 | modelNames = p_uty(tags = "train", custom_check = check_character),
28 | prior = p_uty(tags = "train", custom_check = check_list),
29 | control = p_uty(tags = "train", custom_check = check_list),
30 | initialization = p_uty(tags = "train", custom_check = check_list),
31 | x = p_uty(tags = "train", custom_check = crate(function(x) check_class(x, "mclustBIC")))
32 | )
33 |
34 | super$initialize(
35 | id = "clust.mclust",
36 | feature_types = c("logical", "integer", "numeric"),
37 | predict_types = c("partition", "prob"),
38 | param_set = parma_set,
39 | properties = c("partitional", "fuzzy", "complete"),
40 | packages = "mclust",
41 | man = "mlr3cluster::mlr_learners_clust.mclust",
42 | label = "Gaussian Mixture Models Clustering"
43 | )
44 | }
45 | ),
46 |
47 | private = list(
48 | .train = function(task) {
49 | pv = self$param_set$get_values(tags = "train")
50 | with_package("mclust", {
51 | m = invoke(mclust::Mclust, data = task$data(), .args = pv)
52 | })
53 | if (self$save_assignments) {
54 | self$assignments = m$classification
55 | }
56 | m
57 | },
58 |
59 | .predict = function(task) {
60 | predictions = invoke(predict, self$model, newdata = task$data())
61 | partition = as.integer(predictions$classification)
62 | prob = predictions$z
63 | PredictionClust$new(task = task, partition = partition, prob = prob)
64 | }
65 | )
66 | )
67 |
68 | #' @include zzz.R
69 | register_learner("clust.mclust", LearnerClustMclust)
70 |
--------------------------------------------------------------------------------
/R/LearnerClustMeanShift.R:
--------------------------------------------------------------------------------
1 | #' @title Mean Shift Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.meanshift
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Mean Shift clustering implemented in [LPCM::ms()].
7 | #' There is no predict method for [`LPCM::ms()`], so the method
8 | #' returns cluster labels for the 'training' data.
9 | #'
10 | #' @templateVar id clust.meanshift
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("cheng1995mean")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template example
19 | LearnerClustMeanShift = R6Class("LearnerClustMeanShift",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | param_set = ps(
26 | h = p_uty(tags = "train", custom_check = crate(function(x) {
27 | if (test_numeric(x) || test_int(x)) {
28 | TRUE
29 | } else {
30 | "`h` must be either integer or numeric vector"
31 | }
32 | })),
33 | subset = p_uty(tags = "train", custom_check = check_numeric),
34 | scaled = p_int(0L, default = 1, tags = "train"),
35 | iter = p_int(1L, default = 200L, tags = "train"),
36 | thr = p_dbl(default = 0.01, tags = "train")
37 | )
38 |
39 | super$initialize(
40 | id = "clust.meanshift",
41 | feature_types = c("logical", "integer", "numeric"),
42 | predict_types = "partition",
43 | param_set = param_set,
44 | properties = c("partitional", "exclusive", "complete"),
45 | packages = "LPCM",
46 | man = "mlr3cluster::mlr_learners_clust.meanshift",
47 | label = "Mean Shift Clustering"
48 | )
49 | }
50 | ),
51 |
52 | private = list(
53 | .train = function(task) {
54 | pv = self$param_set$get_values(tags = "train")
55 | if (!is.null(pv$subset) && length(pv$subset) > task$nrow) {
56 | stopf("`subset` length must be less than or equal to number of observations in task.")
57 | }
58 |
59 | m = invoke(LPCM::ms, X = task$data(), .args = pv)
60 | if (self$save_assignments) {
61 | self$assignments = m$cluster.label
62 | }
63 | m
64 | },
65 |
66 | .predict = function(task) {
67 | warn_prediction_useless(self$id)
68 | partition = as.integer(self$model$cluster.label)
69 | PredictionClust$new(task = task, partition = partition)
70 | }
71 | )
72 | )
73 |
74 | #' @include zzz.R
75 | register_learner("clust.meanshift", LearnerClustMeanShift)
76 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: mlr3cluster
2 | Title: Cluster Extension for 'mlr3'
3 | Version: 0.1.12.9000
4 | Authors@R: c(
5 | person("Maximilian", "Mücke", , "muecke.maximilian@gmail.com", role = c("aut", "cre"),
6 | comment = c(ORCID = "0009-0000-9432-9795")),
7 | person("Damir", "Pulatov", , "damirpolat@protonmail.com", role = "aut"),
8 | person("Michel", "Lang", , "michellang@gmail.com", role = "aut",
9 | comment = c(ORCID = "0000-0001-9754-0393")),
10 | person("Marc", "Becker", , "marcbecker@posteo.de", role = "ctb",
11 | comment = c(ORCID = "0000-0002-8115-0400"))
12 | )
13 | Description: Extends the 'mlr3' package with cluster analysis.
14 | License: LGPL-3
15 | URL: https://mlr3cluster.mlr-org.com,
16 | https://github.com/mlr-org/mlr3cluster
17 | BugReports: https://github.com/mlr-org/mlr3cluster/issues
18 | Depends:
19 | mlr3 (>= 0.21.1),
20 | R (>= 3.3.0)
21 | Imports:
22 | backports (>= 1.1.10),
23 | checkmate (>= 2.0.0),
24 | clue,
25 | cluster,
26 | data.table (>= 1.15.0),
27 | fpc,
28 | mlr3misc (>= 0.15.0),
29 | paradox (>= 1.0.1),
30 | R6,
31 | stats
32 | Suggests:
33 | apcluster,
34 | ClusterR (>= 1.3.1),
35 | dbscan,
36 | e1071,
37 | kernlab,
38 | LPCM,
39 | mclust,
40 | mlbench,
41 | RWeka,
42 | stream,
43 | testthat (>= 3.0.0)
44 | Config/testthat/edition: 3
45 | Encoding: UTF-8
46 | Roxygen: list(markdown = TRUE, r6 = TRUE)
47 | RoxygenNote: 7.3.3
48 | Collate:
49 | 'LearnerClust.R'
50 | 'zzz.R'
51 | 'LearnerClustAffinityPropagation.R'
52 | 'LearnerClustAgnes.R'
53 | 'LearnerClustBICO.R'
54 | 'LearnerClustBIRCH.R'
55 | 'LearnerClustCMeans.R'
56 | 'LearnerClustCobweb.R'
57 | 'LearnerClustDBSCAN.R'
58 | 'LearnerClustDBSCANfpc.R'
59 | 'LearnerClustDiana.R'
60 | 'LearnerClustEM.R'
61 | 'LearnerClustFanny.R'
62 | 'LearnerClustFarthestFirst.R'
63 | 'LearnerClustFeatureless.R'
64 | 'LearnerClustHDBSCAN.R'
65 | 'LearnerClustHclust.R'
66 | 'LearnerClustKKMeans.R'
67 | 'LearnerClustKMeans.R'
68 | 'LearnerClustMclust.R'
69 | 'LearnerClustMeanShift.R'
70 | 'LearnerClustMiniBatchKMeans.R'
71 | 'LearnerClustOPTICS.R'
72 | 'LearnerClustPAM.R'
73 | 'LearnerClustSimpleKMeans.R'
74 | 'LearnerClustXMeans.R'
75 | 'MeasureClust.R'
76 | 'measures.R'
77 | 'MeasureClustInternal.R'
78 | 'PredictionClust.R'
79 | 'PredictionDataClust.R'
80 | 'TaskClust.R'
81 | 'TaskClust_ruspini.R'
82 | 'TaskClust_usarrest.R'
83 | 'as_prediction_clust.R'
84 | 'as_task_clust.R'
85 | 'bibentries.R'
86 | 'helper.R'
87 |
--------------------------------------------------------------------------------
/R/LearnerClustDBSCAN.R:
--------------------------------------------------------------------------------
1 | #' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.dbscan
4 | #'
5 | #' @description
6 | #' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
7 | #' Calls [dbscan::dbscan()] from \CRANpkg{dbscan}.
8 | #'
9 | #' @templateVar id clust.dbscan
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("hahsler2019dbscan", "ester1996density")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template simple_example
18 | LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | eps = p_dbl(0, tags = c("train", "required")),
26 | minPts = p_int(0L, default = 5L, tags = "train"),
27 | weights = p_uty(tags = "train", custom_check = check_numeric),
28 | borderPoints = p_lgl(default = TRUE, tags = "train"),
29 | search = p_fct(c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
30 | bucketSize = p_int(1L, default = 10L, tags = "train", depends = quote(search == "kdtree")),
31 | splitRule = p_fct(
32 | levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"),
33 | default = "SUGGEST",
34 | tags = "train",
35 | depends = quote(search == "kdtree")
36 | ),
37 | approx = p_dbl(default = 0, tags = "train")
38 | )
39 |
40 | super$initialize(
41 | id = "clust.dbscan",
42 | feature_types = c("logical", "integer", "numeric"),
43 | predict_types = "partition",
44 | param_set = param_set,
45 | properties = c("density", "exclusive", "complete"),
46 | packages = "dbscan",
47 | man = "mlr3cluster::mlr_learners_clust.dbscan",
48 | label = "Density-Based Clustering"
49 | )
50 | }
51 | ),
52 |
53 | private = list(
54 | .train = function(task) {
55 | pv = self$param_set$get_values(tags = "train")
56 | data = task$data()
57 | m = invoke(dbscan::dbscan, x = data, .args = pv)
58 | m = insert_named(m, list(data = data))
59 | if (self$save_assignments) {
60 | self$assignments = m$cluster
61 | }
62 | m
63 | },
64 |
65 | .predict = function(task) {
66 | partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data)
67 | PredictionClust$new(task = task, partition = partition)
68 | }
69 | )
70 | )
71 |
72 | #' @include zzz.R
73 | register_learner("clust.dbscan", LearnerClustDBSCAN)
74 |
--------------------------------------------------------------------------------
/R/LearnerClustFanny.R:
--------------------------------------------------------------------------------
1 | #' @title Fuzzy Analysis Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.fanny
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for fuzzy clustering implemented in [cluster::fanny()].
7 | #' [cluster::fanny()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `k` parameter which corresponds to the number
9 | #' of clusters here is set to 2 by default.
10 | #' The predict method copies cluster assignments and memberships
11 | #' generated for train data. The predict does not work for
12 | #' new data.
13 | #'
14 | #' @templateVar id clust.fanny
15 | #' @template learner
16 | #'
17 | #' @references
18 | #' `r format_bib("kaufman2009finding")`
19 | #'
20 | #' @export
21 | #' @template seealso_learner
22 | #' @template example
23 | LearnerClustFanny = R6Class("LearnerClustFanny",
24 | inherit = LearnerClust,
25 | public = list(
26 | #' @description
27 | #' Creates a new instance of this [R6][R6::R6Class] class.
28 | initialize = function() {
29 | param_set = ps(
30 | k = p_int(1L, tags = c("train", "required")),
31 | memb.exp = p_dbl(1, default = 2, tags = "train"),
32 | metric = p_fct(c("euclidean", "manhattan", "SqEuclidean"), default = "euclidean", tags = "train"),
33 | stand = p_lgl(default = FALSE, tags = "train"),
34 | maxit = p_int(0L, default = 500L, tags = "train"),
35 | tol = p_dbl(0, default = 1e-15, tags = "train"),
36 | trace.lev = p_int(0L, default = 0L, tags = "train")
37 | )
38 |
39 | param_set$set_values(k = 2L)
40 |
41 | super$initialize(
42 | id = "clust.fanny",
43 | feature_types = c("logical", "integer", "numeric"),
44 | predict_types = c("partition", "prob"),
45 | param_set = param_set,
46 | properties = c("partitional", "fuzzy", "complete"),
47 | packages = "cluster",
48 | man = "mlr3cluster::mlr_learners_clust.fanny",
49 | label = "Fuzzy Analysis Clustering"
50 | )
51 | }
52 | ),
53 |
54 | private = list(
55 | .train = function(task) {
56 | pv = self$param_set$get_values(tags = "train")
57 | m = invoke(cluster::fanny, x = task$data(), .args = pv)
58 | if (self$save_assignments) {
59 | self$assignments = m$clustering
60 | }
61 | m
62 | },
63 |
64 | .predict = function(task) {
65 | warn_prediction_useless(self$id)
66 |
67 | partition = self$model$clustering
68 |
69 | prob = self$model$membership
70 | colnames(prob) = seq_len(ncol(prob))
71 |
72 | PredictionClust$new(task = task, partition = partition, prob = prob)
73 | }
74 | )
75 | )
76 |
77 | #' @include zzz.R
78 | register_learner("clust.fanny", LearnerClustFanny)
79 |
--------------------------------------------------------------------------------
/R/LearnerClustOPTICS.R:
--------------------------------------------------------------------------------
1 | #' @title Ordering Points to Identify the Clustering Structure (OPTICS) Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.optics
4 | #'
5 | #' @description
6 | #' OPTICS (Ordering points to identify the clustering structure) point ordering clustering.
7 | #' Calls [dbscan::optics()] from \CRANpkg{dbscan}.
8 | #'
9 | #' @templateVar id clust.optics
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("hahsler2019dbscan", "ankerst1999optics")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template simple_example
18 | LearnerClustOPTICS = R6Class("LearnerClustOPTICS",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | eps = p_dbl(0, special_vals = list(NULL), default = NULL, tags = "train"),
26 | minPts = p_int(0L, default = 5L, tags = "train"),
27 | search = p_fct(c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
28 | bucketSize = p_int(1L, default = 10L, tags = "train", depends = quote(search == "kdtree")),
29 | splitRule = p_fct(
30 | levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"),
31 | default = "SUGGEST",
32 | tags = "train",
33 | depends = quote(search == "kdtree")
34 | ),
35 | approx = p_dbl(default = 0, tags = "train"),
36 | eps_cl = p_dbl(0, tags = c("train", "required"))
37 | )
38 |
39 | super$initialize(
40 | id = "clust.optics",
41 | feature_types = c("logical", "integer", "numeric"),
42 | predict_types = "partition",
43 | param_set = param_set,
44 | properties = c("density", "exclusive", "complete"),
45 | packages = "dbscan",
46 | man = "mlr3cluster::mlr_learners_clust.optics",
47 | label = "OPTICS Clustering"
48 | )
49 | }
50 | ),
51 |
52 | private = list(
53 | .train = function(task) {
54 | pv = self$param_set$get_values(tags = "train")
55 | data = task$data()
56 | m = invoke(dbscan::optics, x = data, .args = remove_named(pv, "eps_cl"))
57 | m = insert_named(m, list(data = data))
58 | m = invoke(dbscan::extractDBSCAN, object = m, eps_cl = pv$eps_cl)
59 |
60 | if (self$save_assignments) {
61 | self$assignments = m$cluster
62 | }
63 | m
64 | },
65 |
66 | .predict = function(task) {
67 | partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data)
68 | PredictionClust$new(task = task, partition = partition)
69 | }
70 | )
71 | )
72 |
73 | #' @include zzz.R
74 | register_learner("clust.optics", LearnerClustOPTICS)
75 |
--------------------------------------------------------------------------------
/R/LearnerClustKMeans.R:
--------------------------------------------------------------------------------
1 | #' @title K-Means Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.kmeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for k-means clustering implemented in [stats::kmeans()].
7 | #' [stats::kmeans()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `centers` parameter here is set to 2 by default.
9 | #' The predict method uses [clue::cl_predict()] to compute the
10 | #' cluster memberships for new data.
11 | #'
12 | #' @templateVar id clust.kmeans
13 | #' @template learner
14 | #'
15 | #' @references
16 | #' `r format_bib("forgy1965cluster", "hartigan1979algorithm", "lloyd1982least", "macqueen1967some")`
17 | #'
18 | #' @export
19 | #' @template seealso_learner
20 | #' @template example
21 | LearnerClustKMeans = R6Class("LearnerClustKMeans",
22 | inherit = LearnerClust,
23 | public = list(
24 | #' @description
25 | #' Creates a new instance of this [R6][R6::R6Class] class.
26 | initialize = function() {
27 | param_set = ps(
28 | centers = p_uty(tags = c("train", "required"), custom_check = check_centers),
29 | iter.max = p_int(1L, default = 10L, tags = "train"),
30 | algorithm = p_fct(
31 | c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"), default = "Hartigan-Wong", tags = "train"
32 | ),
33 | nstart = p_int(1L, default = 1L, tags = "train"),
34 | trace = p_int(0L, default = 0L, tags = "train")
35 | )
36 |
37 | param_set$set_values(centers = 2L)
38 |
39 | super$initialize(
40 | id = "clust.kmeans",
41 | feature_types = c("logical", "integer", "numeric"),
42 | predict_types = "partition",
43 | param_set = param_set,
44 | properties = c("partitional", "exclusive", "complete"),
45 | packages = c("stats", "clue"),
46 | man = "mlr3cluster::mlr_learners_clust.kmeans",
47 | label = "K-Means"
48 | )
49 | }
50 | ),
51 |
52 | private = list(
53 | .train = function(task) {
54 | pv = self$param_set$get_values(tags = "train")
55 | if (!is.null(pv$nstart) && !test_int(pv$centers)) {
56 | warningf("`nstart` parameter is only relevant when `centers` is integer.")
57 | }
58 |
59 | assert_centers_param(pv$centers, task, test_data_frame, "centers")
60 |
61 | m = invoke(stats::kmeans, x = task$data(), .args = pv)
62 | if (self$save_assignments) {
63 | self$assignments = m$cluster
64 | }
65 | m
66 | },
67 |
68 | .predict = function(task) {
69 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids"))
70 | PredictionClust$new(task = task, partition = partition)
71 | }
72 | )
73 | )
74 |
75 | #' @include zzz.R
76 | register_learner("clust.kmeans", LearnerClustKMeans)
77 |
--------------------------------------------------------------------------------
/R/LearnerClustEM.R:
--------------------------------------------------------------------------------
1 | #' @title Expectation-Maximization Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.em
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Expectation-Maximization clustering implemented in
7 | #' [RWeka::list_Weka_interfaces()].
8 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the
9 | #' cluster memberships for new data.
10 | #'
11 | #' @templateVar id clust.em
12 | #' @template learner
13 | #'
14 | #' @references
15 | #' `r format_bib("witten2002data", "dempster1977maximum")`
16 | #'
17 | #' @export
18 | #' @template seealso_learner
19 | #' @template example
20 | LearnerClustEM = R6Class("LearnerClustEM",
21 | inherit = LearnerClust,
22 | public = list(
23 | #' @description
24 | #' Creates a new instance of this [R6][R6::R6Class] class.
25 | initialize = function() {
26 | param_set = ps(
27 | I = p_int(1L, default = 100L, tags = "train"),
28 | ll_cv = p_dbl(1e-6, default = 1e-6, tags = "train"),
29 | ll_iter = p_dbl(1e-6, default = 1e-6, tags = "train"),
30 | M = p_dbl(1e-6, default = 1e-6, tags = "train"),
31 | max = p_int(-1L, default = -1L, tags = "train"),
32 | N = p_int(-1L, default = -1L, tags = "train"),
33 | num_slots = p_int(1L, default = 1L, tags = "train"),
34 | S = p_int(0L, default = 100L, tags = "train"),
35 | X = p_int(1L, default = 10L, tags = "train"),
36 | K = p_int(1L, default = 10L, tags = "train"),
37 | V = p_lgl(default = FALSE, tags = "train"),
38 | output_debug_info = p_lgl(default = FALSE, tags = "train")
39 | )
40 |
41 | super$initialize(
42 | id = "clust.em",
43 | feature_types = c("logical", "integer", "numeric"),
44 | predict_types = "partition",
45 | param_set = param_set,
46 | properties = c("partitional", "exclusive", "complete"),
47 | packages = "RWeka",
48 | man = "mlr3cluster::mlr_learners_clust.em",
49 | label = "Expectation-Maximization Clustering"
50 | )
51 | }
52 | ),
53 |
54 | private = list(
55 | .train = function(task) {
56 | pv = self$param_set$get_values(tags = "train")
57 | names(pv) = chartr("_", "-", names(pv))
58 | ctrl = invoke(RWeka::Weka_control, .args = pv)
59 | m = invoke(RWeka::make_Weka_clusterer("weka/clusterers/EM"), x = task$data(), control = ctrl)
60 | if (self$save_assignments) {
61 | self$assignments = unname(m$class_ids + 1L)
62 | }
63 | m
64 | },
65 |
66 | .predict = function(task) {
67 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L
68 | PredictionClust$new(task = task, partition = partition)
69 | }
70 | )
71 | )
72 |
73 | #' @include zzz.R
74 | register_learner("clust.em", LearnerClustEM)
75 |
--------------------------------------------------------------------------------
/R/LearnerClustFeatureless.R:
--------------------------------------------------------------------------------
1 | #' @title Featureless Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.featureless
4 | #'
5 | #' @description
6 | #' A simple [LearnerClust] which randomly (but evenly) assigns observations to
7 | #' `num_clusters` partitions (default: 1 partition).
8 | #'
9 | #' @templateVar id clust.featureless
10 | #' @template learner
11 | #'
12 | #' @export
13 | #' @template seealso_learner
14 | #' @template example
15 | LearnerClustFeatureless = R6Class("LearnerClustFeatureless",
16 | inherit = LearnerClust,
17 | public = list(
18 | #' @description
19 | #' Creates a new instance of this [R6][R6::R6Class] class.
20 | initialize = function() {
21 | param_set = ps(num_clusters = p_int(1L, tags = c("train", "predict", "required")))
22 | param_set$set_values(num_clusters = 1L)
23 |
24 | super$initialize(
25 | id = "clust.featureless",
26 | feature_types = c("logical", "integer", "numeric"),
27 | predict_types = c("partition", "prob"),
28 | param_set = param_set,
29 | properties = c("partitional", "exclusive", "complete", "missings"),
30 | man = "mlr3cluster::mlr_learners_clust.featureless",
31 | label = "Featureless Clustering"
32 | )
33 | }
34 | ),
35 |
36 | private = list(
37 | .train = function(task) {
38 | pv = self$param_set$get_values(tags = "train")
39 | k = pv$num_clusters
40 | n = task$nrow
41 |
42 | if (k > n) {
43 | stopf("number of clusters must lie between 1 and `nrow(data)`.")
44 | }
45 |
46 | partition = chunk(n, n_chunks = k)
47 |
48 | if (self$save_assignments) {
49 | self$assignments = partition
50 | }
51 |
52 | set_class(
53 | list(clustering = partition, features = task$feature_names),
54 | "clust.featureless_model"
55 | )
56 | },
57 |
58 | .predict = function(task) {
59 | pv = self$param_set$get_values(tags = "predict")
60 | n = task$nrow
61 | k = pv$num_clusters
62 |
63 | partition = chunk(n, n_chunks = k)
64 | prob = NULL
65 |
66 | if (self$predict_type == "prob") {
67 | prob = matrix(runif(n * k), nrow = n, ncol = k)
68 | prob = prob / rowSums(prob)
69 |
70 | # reorder rows so that the max probability corresponds to
71 | # the selected partition in `partition`
72 | prob = do.call(rbind, map(seq_along(partition), function(i) {
73 | x = prob[i, , drop = TRUE]
74 | pos = which_max(x)
75 | if (pos == i) x else append(x[-pos], x[pos], after = partition[i] - 1L)
76 | }))
77 | }
78 |
79 | PredictionClust$new(task = task, partition = partition, prob = prob)
80 | }
81 | )
82 | )
83 |
84 | #' @include zzz.R
85 | register_learner("clust.featureless", LearnerClustFeatureless)
86 |
--------------------------------------------------------------------------------
/R/MeasureClustInternal.R:
--------------------------------------------------------------------------------
1 | #' @include measures.R
2 | #' @include MeasureClust.R
3 | MeasureClustFPC = R6Class("MeasureClustFPC",
4 | inherit = MeasureClust,
5 | cloneable = FALSE,
6 | public = list(
7 | crit = NULL,
8 | initialize = function(name, label) {
9 | info = measures[[name]]
10 | super$initialize(
11 | id = paste0("clust.", name),
12 | range = c(info$lower, info$upper),
13 | minimize = info$minimize,
14 | predict_type = info$predict_type,
15 | packages = "fpc",
16 | properties = "requires_task",
17 | label = label,
18 | man = paste0("mlr3cluster::mlr_measures_clust.", name)
19 | )
20 | self$crit = info$crit
21 | }
22 | ),
23 | private = list(
24 | .score = function(prediction, task, ...) {
25 | X = dist(task$data(rows = prediction$row_ids))
26 | suppressWarnings(cluster.stats(X, clustering = prediction$partition, silhouette = FALSE)[[self$crit]])
27 | }
28 | )
29 | )
30 |
31 | MeasureClustSil = R6Class("MeasureClustSil",
32 | inherit = MeasureClust,
33 | cloneable = FALSE,
34 | public = list(
35 | crit = NULL,
36 | initialize = function(name, label) {
37 | info = measures[[name]]
38 | super$initialize(
39 | id = paste0("clust.", name),
40 | range = c(info$lower, info$upper),
41 | minimize = info$minimize,
42 | predict_type = info$predict_type,
43 | packages = "cluster",
44 | properties = "requires_task",
45 | label = label,
46 | man = paste0("mlr3cluster::mlr_measures_clust.", name)
47 | )
48 | self$crit = info$crit
49 | }
50 | ),
51 | private = list(
52 | .score = function(prediction, task, ...) {
53 | X = dist(task$data(rows = prediction$row_ids))
54 |
55 | if (length(unique(prediction$partition)) == 1L) {
56 | 0L
57 | } else {
58 | mean(silhouette(prediction$partition, X)[, self$crit])
59 | }
60 | }
61 | )
62 | )
63 |
64 | #' @title Rousseeuw's Silhouette Quality Index
65 | #'
66 | #' @templateVar id silhouette
67 | #' @template measure_sil
68 | measures$silhouette = make_measure_info("sil_width", "Silhouette", lower = 0, upper = Inf, minimize = FALSE)
69 |
70 | #' @title Calinski Harabasz Pseudo F-Statistic
71 | #'
72 | #' @templateVar id ch
73 | #' @template measure_fpc
74 | measures$ch = make_measure_info("ch", "Calinski Harabasz", lower = 0, upper = Inf, minimize = FALSE)
75 |
76 | #' @title Dunn Index
77 | #'
78 | #' @templateVar id dunn
79 | #' @template measure_fpc
80 | measures$dunn = make_measure_info("dunn", "Dunn", lower = 0, upper = Inf, minimize = FALSE)
81 |
82 | #' @title Within Sum of Squares
83 | #'
84 | #' @templateVar id wss
85 | #' @template measure_fpc
86 | measures$wss = make_measure_info("within.cluster.ss", "Within Sum of Squares", lower = 0, upper = Inf, minimize = TRUE)
87 |
--------------------------------------------------------------------------------
/R/LearnerClustXMeans.R:
--------------------------------------------------------------------------------
1 | #' @title X-means Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.xmeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for X-means clustering implemented in [RWeka::XMeans()].
7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the
8 | #' cluster memberships for new data.
9 | #'
10 | #' @templateVar id clust.xmeans
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("witten2002data", "pelleg2000x")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template simple_example
19 | LearnerClustXMeans = R6Class("LearnerClustXMeans",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | param_set = ps(
26 | B = p_dbl(0, default = 1, tags = "train"),
27 | C = p_dbl(0, default = 0, tags = "train"),
28 | D = p_uty(default = "weka.core.EuclideanDistance", tags = "train"),
29 | H = p_int(1L, default = 4L, tags = "train"),
30 | I = p_int(1L, default = 1L, tags = "train"),
31 | J = p_int(1L, default = 1000L, tags = "train"),
32 | K = p_uty(default = "", tags = "train"),
33 | L = p_int(1L, default = 2L, tags = "train"),
34 | M = p_int(1L, default = 1000L, tags = "train"),
35 | S = p_int(1L, default = 10L, tags = "train"),
36 | U = p_int(0L, default = 0L, tags = "train"),
37 | use_kdtree = p_lgl(default = FALSE, tags = "train"),
38 | N = p_uty(tags = "train"),
39 | O = p_uty(tags = "train"),
40 | Y = p_uty(tags = "train"),
41 | output_debug_info = p_lgl(default = FALSE, tags = "train")
42 | )
43 |
44 | super$initialize(
45 | id = "clust.xmeans",
46 | feature_types = c("logical", "integer", "numeric"),
47 | predict_types = "partition",
48 | param_set = param_set,
49 | properties = c("partitional", "exclusive", "complete"),
50 | packages = "RWeka",
51 | man = "mlr3cluster::mlr_learners_clust.xmeans",
52 | label = "X-means"
53 | )
54 | }
55 | ),
56 |
57 | private = list(
58 | .train = function(task) {
59 | pv = self$param_set$get_values(tags = "train")
60 | names(pv) = chartr("_", "-", names(pv))
61 | ctrl = invoke(RWeka::Weka_control, .args = pv)
62 | m = invoke(RWeka::XMeans, x = task$data(), control = ctrl)
63 | if (self$save_assignments) {
64 | self$assignments = unname(m$class_ids + 1L)
65 | }
66 | m
67 | },
68 |
69 | .predict = function(task) {
70 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L
71 | PredictionClust$new(task = task, partition = partition)
72 | }
73 | )
74 | )
75 |
76 | #' @include zzz.R
77 | register_learner("clust.xmeans", LearnerClustXMeans)
78 |
--------------------------------------------------------------------------------
/R/LearnerClustDBSCANfpc.R:
--------------------------------------------------------------------------------
1 | #' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.dbscan_fpc
4 | #'
5 | #' @description
6 | #' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
7 | #' Calls [fpc::dbscan()] from \CRANpkg{fpc}.
8 | #'
9 | #' @templateVar id clust.dbscan_fpc
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("ester1996density")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template simple_example
18 | LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | eps = p_dbl(0, tags = c("train", "required")),
26 | MinPts = p_int(0L, default = 5L, tags = "train"),
27 | scale = p_lgl(default = FALSE, tags = "train"),
28 | method = p_fct(c("hybrid", "raw", "dist"), default = "hybrid", tags = "train"),
29 | seeds = p_lgl(default = TRUE, tags = "train"),
30 | showplot = p_uty(default = FALSE, tags = "train", custom_check = crate(function(x) {
31 | if (test_flag(x) || test_int(x, lower = 0L, upper = 2L)) {
32 | TRUE
33 | } else {
34 | "`showplot` need to be either logical or integer between 0 and 2"
35 | }
36 | })),
37 | countmode = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) {
38 | if (test_integer(x, null.ok = TRUE)) {
39 | TRUE
40 | } else {
41 | "`countmode` need to be NULL or vector of integers"
42 | }
43 | }))
44 | )
45 |
46 | super$initialize(
47 | id = "clust.dbscan_fpc",
48 | packages = "fpc",
49 | feature_types = c("logical", "integer", "numeric"),
50 | predict_types = "partition",
51 | param_set = param_set,
52 | properties = c("density", "exclusive", "complete"),
53 | man = "mlr3cluster::mlr_learners_clust.dbscan_fpc",
54 | label = "Density-Based Clustering with fpc"
55 | )
56 | }
57 | ),
58 |
59 | private = list(
60 | .train = function(task) {
61 | pv = self$param_set$get_values(tags = "train")
62 | data = task$data()
63 | m = invoke(fpc::dbscan, data = data, .args = pv)
64 | m = insert_named(m, list(data = data))
65 | if (self$save_assignments) {
66 | self$assignments = m$cluster
67 | }
68 | m
69 | },
70 |
71 | .predict = function(task) {
72 | partition = as.integer(invoke(predict, self$model, data = self$model$data), newdata = task$data())
73 | PredictionClust$new(task = task, partition = partition)
74 | }
75 | )
76 | )
77 |
78 | #' @include zzz.R
79 | register_learner("clust.dbscan_fpc", LearnerClustDBSCANfpc)
80 |
--------------------------------------------------------------------------------
/R/PredictionClust.R:
--------------------------------------------------------------------------------
1 | #' @title Prediction Object for Cluster Analysis
2 | #'
3 | #' @description
4 | #' This object wraps the predictions returned by a learner of class [LearnerClust], i.e.
5 | #' the predicted partition and cluster probability.
6 | #'
7 | #' @family Prediction
8 | #' @export
9 | #' @examples
10 | #' library(mlr3)
11 | #' library(mlr3cluster)
12 | #' task = tsk("usarrests")
13 | #' learner = lrn("clust.kmeans")
14 | #' p = learner$train(task)$predict(task)
15 | #' p$predict_types
16 | #' head(as.data.table(p))
17 | PredictionClust = R6Class("PredictionClust",
18 | inherit = Prediction,
19 | public = list(
20 | #' @description
21 | #' Creates a new instance of this [R6][R6::R6Class] class.
22 | #'
23 | #' @param task ([TaskClust])\cr
24 | #' Task, used to extract defaults for `row_ids`.
25 | #'
26 | #' @param row_ids (`integer()`)\cr
27 | #' Row ids of the predicted observations, i.e. the row ids of the test set.
28 | #'
29 | #' @param partition (`integer()`)\cr
30 | #' Vector of cluster partitions.
31 | #'
32 | #' @param prob (`matrix()`)\cr
33 | #' Numeric matrix of cluster membership probabilities with one column for each cluster
34 | #' and one row for each observation.
35 | #' Columns must be named with cluster numbers, row names are automatically removed.
36 | #' If `prob` is provided, but `partition` is not, the cluster memberships are calculated from
37 | #' the probabilities using [max.col()] with `ties.method` set to `"first"`.
38 | #'
39 | #' @param check (`logical(1)`)\cr
40 | #' If `TRUE`, performs some argument checks and predict type conversions.
41 | initialize = function(task = NULL, row_ids = task$row_ids, partition = NULL, prob = NULL, check = TRUE) {
42 | pdata = list(row_ids = row_ids, partition = partition, prob = prob)
43 | pdata = discard(pdata, is.null)
44 | class(pdata) = c("PredictionDataClust", "PredictionData")
45 |
46 | if (check) {
47 | pdata = check_prediction_data(pdata)
48 | }
49 | self$task_type = "clust"
50 | self$man = "mlr3cluster::PredictionClust"
51 | self$data = pdata
52 | self$predict_types = intersect(c("partition", "prob"), names(pdata))
53 | }
54 | ),
55 |
56 | active = list(
57 | #' @field partition (`integer()`)\cr
58 | #' Access the stored partition.
59 | partition = function() {
60 | self$data$partition %??% rep(NA_real_, length(self$data$row_ids))
61 | },
62 |
63 | #' @field prob (`matrix()`)\cr
64 | #' Access to the stored probabilities.
65 | prob = function() {
66 | self$data$prob
67 | }
68 | )
69 | )
70 |
71 | #' @export
72 | as.data.table.PredictionClust = function(x, ...) { # nolint
73 | tab = as.data.table(x$data[c("row_ids", "partition")])
74 | if ("prob" %chin% x$predict_types) {
75 | prob = as.data.table(x$data$prob)
76 | setnames(prob, new = paste0("prob.", names(prob)))
77 | tab = rcbind(tab, prob)
78 | }
79 |
80 | tab[]
81 | }
82 |
--------------------------------------------------------------------------------
/R/LearnerClustPAM.R:
--------------------------------------------------------------------------------
1 | #' @title Partitioning Around Medoids Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.pam
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for PAM clustering implemented in [cluster::pam()].
7 | #' [cluster::pam()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `k` parameter which corresponds to the number
9 | #' of clusters here is set to 2 by default.
10 | #' The predict method uses [clue::cl_predict()] to compute the
11 | #' cluster memberships for new data.
12 | #'
13 | #' @templateVar id clust.pam
14 | #' @template learner
15 | #'
16 | #' @references
17 | #' `r format_bib("reynolds2006clustering", "schubert2019faster")`
18 | #'
19 | #' @export
20 | #' @template seealso_learner
21 | #' @template example
22 | LearnerClustPAM = R6Class("LearnerClustPAM",
23 | inherit = LearnerClust,
24 | public = list(
25 | #' @description
26 | #' Creates a new instance of this [R6][R6::R6Class] class.
27 | initialize = function() {
28 | param_set = ps(
29 | k = p_int(1L, tags = c("train", "required")),
30 | metric = p_fct(c("euclidian", "manhattan"), default = "euclidian", tags = "train"),
31 | medoids = p_uty(
32 | default = NULL, tags = "train", custom_check = crate(function(x) check_integerish(x, null.ok = TRUE))
33 | ),
34 | stand = p_lgl(default = FALSE, tags = "train"),
35 | do.swap = p_lgl(default = TRUE, tags = "train"),
36 | pamonce = p_int(0L, 5L, default = 0L, tags = "train"),
37 | trace.lev = p_int(0L, default = 0L, tags = "train")
38 | )
39 |
40 | param_set$set_values(k = 2L)
41 |
42 | super$initialize(
43 | id = "clust.pam",
44 | feature_types = c("logical", "integer", "numeric"),
45 | predict_types = "partition",
46 | param_set = param_set,
47 | properties = c("partitional", "exclusive", "complete"),
48 | packages = "cluster",
49 | man = "mlr3cluster::mlr_learners_clust.pam",
50 | label = "Partitioning Around Medoids"
51 | )
52 | }
53 | ),
54 |
55 | private = list(
56 | .train = function(task) {
57 | pv = self$param_set$get_values(tags = "train")
58 | if (!is.null(pv$medoids)) {
59 | if (length(pv$medoids) != pv$k) {
60 | stopf("number of `medoids`' needs to match `k`!")
61 | }
62 | if (sum(pv$medoids <= task$nrow & pv$medoids >= 1L) != pv$k) {
63 | stopf("`medoids` need to contain valid indices from 1 to %i (number of observations)!", pv$k)
64 | }
65 | }
66 |
67 | m = invoke(cluster::pam, x = task$data(), diss = FALSE, .args = pv)
68 | if (self$save_assignments) {
69 | self$assignments = m$clustering
70 | }
71 | m
72 | },
73 |
74 | .predict = function(task) {
75 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids"))
76 | PredictionClust$new(task = task, partition = partition)
77 | }
78 | )
79 | )
80 |
81 | #' @include zzz.R
82 | register_learner("clust.pam", LearnerClustPAM)
83 |
--------------------------------------------------------------------------------
/R/LearnerClustSimpleKMeans.R:
--------------------------------------------------------------------------------
1 | #' @title K-Means Clustering Learner from Weka
2 | #'
3 | #' @name mlr_learners_clust.SimpleKMeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Simple K Means clustering implemented in [RWeka::SimpleKMeans()].
7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the
8 | #' cluster memberships for new data.
9 | #'
10 | #' @templateVar id clust.SimpleKMeans
11 | #' @template learner
12 | #'
13 | #' @references
14 | #' `r format_bib("witten2002data", "forgy1965cluster", "lloyd1982least", "macqueen1967some")`
15 | #'
16 | #' @export
17 | #' @template seealso_learner
18 | #' @template example
19 | LearnerClustSimpleKMeans = R6Class("LearnerClustSimpleKMeans",
20 | inherit = LearnerClust,
21 | public = list(
22 | #' @description
23 | #' Creates a new instance of this [R6][R6::R6Class] class.
24 | initialize = function() {
25 | param_set = ps(
26 | A = p_uty(default = "weka.core.EuclideanDistance", tags = "train"),
27 | C = p_lgl(default = FALSE, tags = "train"),
28 | fast = p_lgl(default = FALSE, tags = "train"),
29 | I = p_int(1L, default = 100L, tags = "train"),
30 | init = p_int(0L, 3L, default = 0L, tags = "train"),
31 | M = p_lgl(default = FALSE, tags = "train"),
32 | max_candidates = p_int(1L, default = 100L, tags = "train"),
33 | min_density = p_int(1L, default = 2L, tags = "train"),
34 | N = p_int(1L, default = 2L, tags = "train"),
35 | num_slots = p_int(1L, default = 1L, tags = "train"),
36 | O = p_lgl(default = FALSE, tags = "train"),
37 | periodic_pruning = p_int(1L, default = 10000L, tags = "train"),
38 | S = p_int(0L, default = 10L, tags = "train"),
39 | t2 = p_dbl(default = -1, tags = "train"),
40 | t1 = p_dbl(default = -1.5, tags = "train"),
41 | V = p_lgl(default = FALSE, tags = "train"),
42 | output_debug_info = p_lgl(default = FALSE, tags = "train")
43 | )
44 |
45 | super$initialize(
46 | id = "clust.SimpleKMeans",
47 | feature_types = c("logical", "integer", "numeric"),
48 | predict_types = "partition",
49 | param_set = param_set,
50 | properties = c("partitional", "exclusive", "complete"),
51 | packages = "RWeka",
52 | man = "mlr3cluster::mlr_learners_clust.SimpleKMeans",
53 | label = "K-Means (Weka)"
54 | )
55 | }
56 | ),
57 |
58 | private = list(
59 | .train = function(task) {
60 | pv = self$param_set$get_values(tags = "train")
61 | names(pv) = chartr("_", "-", names(pv))
62 | ctrl = invoke(RWeka::Weka_control, .args = pv)
63 | m = invoke(RWeka::SimpleKMeans, x = task$data(), control = ctrl)
64 | if (self$save_assignments) {
65 | self$assignments = unname(m$class_ids + 1L)
66 | }
67 | m
68 | },
69 |
70 | .predict = function(task) {
71 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L
72 | PredictionClust$new(task = task, partition = partition)
73 | }
74 | )
75 | )
76 |
77 | #' @include zzz.R
78 | register_learner("clust.SimpleKMeans", LearnerClustSimpleKMeans)
79 |
--------------------------------------------------------------------------------
/R/LearnerClustAgnes.R:
--------------------------------------------------------------------------------
1 | #' @title Agglomerative Hierarchical Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.agnes
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [cluster::agnes()].
7 | #' The predict method uses [stats::cutree()] which cuts the tree resulting from
8 | #' hierarchical clustering into specified number of groups (see parameter `k`).
9 | #' The default number for `k` is 2.
10 | #'
11 | #' @templateVar id clust.agnes
12 | #' @template learner
13 | #'
14 | #' @references
15 | #' `r format_bib("kaufman2009finding")`
16 | #'
17 | #' @export
18 | #' @template seealso_learner
19 | #' @template example
20 | LearnerClustAgnes = R6Class("LearnerClustAgnes",
21 | inherit = LearnerClust,
22 | public = list(
23 | #' @description
24 | #' Creates a new instance of this [R6][R6::R6Class] class.
25 | initialize = function() {
26 | param_set = ps(
27 | metric = p_fct(c("euclidean", "manhattan"), default = "euclidean", tags = "train"),
28 | stand = p_lgl(default = FALSE, tags = "train"),
29 | method = p_fct(
30 | levels = c("average", "single", "complete", "ward", "weighted", "flexible", "gaverage"),
31 | default = "average",
32 | tags = "train"
33 | ),
34 | trace.lev = p_int(0L, default = 0L, tags = "train"),
35 | k = p_int(1L, default = 2L, tags = c("train", "predict")),
36 | par.method = p_uty(
37 | tags = "train",
38 | depends = quote(method %in% c("flexible", "gaverage")),
39 | custom_check = crate(function(x) {
40 | if (!(test_numeric(x) || test_list(x))) {
41 | return("`par.method` needs to be a numeric vector")
42 | }
43 | if (length(x) %in% c(1L, 3L, 4L)) TRUE else "`par.method` needs be of length 1, 3, or 4"
44 | })
45 | )
46 | )
47 |
48 | param_set$set_values(k = 2L)
49 |
50 | super$initialize(
51 | id = "clust.agnes",
52 | feature_types = c("logical", "integer", "numeric"),
53 | predict_types = "partition",
54 | param_set = param_set,
55 | properties = c("hierarchical", "exclusive", "complete"),
56 | packages = "cluster",
57 | man = "mlr3cluster::mlr_learners_clust.agnes",
58 | label = "Agglomerative Hierarchical Clustering"
59 | )
60 | }
61 | ),
62 |
63 | private = list(
64 | .train = function(task) {
65 | pv = self$param_set$get_values(tags = "train")
66 | m = invoke(cluster::agnes,
67 | x = task$data(),
68 | diss = FALSE,
69 | .args = remove_named(pv, "k")
70 | )
71 | if (self$save_assignments) {
72 | self$assignments = stats::cutree(m, pv$k)
73 | }
74 | m
75 | },
76 |
77 | .predict = function(task) {
78 | pv = self$param_set$get_values(tags = "predict")
79 | if (pv$k > task$nrow) {
80 | stopf("`k` needs to be between 1 and %i.", task$nrow)
81 | }
82 |
83 | warn_prediction_useless(self$id)
84 |
85 | PredictionClust$new(task = task, partition = self$assignments)
86 | }
87 | )
88 | )
89 |
90 | #' @include zzz.R
91 | register_learner("clust.agnes", LearnerClustAgnes)
92 |
--------------------------------------------------------------------------------
/R/PredictionDataClust.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | as_prediction.PredictionDataClust = function(x, check = TRUE, ...) { # nolint
3 | invoke(PredictionClust$new, check = check, .args = x)
4 | }
5 |
6 | #' @export
7 | check_prediction_data.PredictionDataClust = function(pdata, ...) { # nolint
8 | pdata$row_ids = assert_row_ids(pdata$row_ids)
9 | n = length(pdata$row_ids)
10 | assert_integer(pdata$partition, len = n, any.missing = FALSE, null.ok = TRUE)
11 |
12 | prob = pdata$prob
13 | if (!is.null(prob)) {
14 | # need to check number of columns for matrix
15 | assert_matrix(prob, nrows = n)
16 | assert_numeric(prob, lower = 0, upper = 1)
17 | if (!is.null(rownames(prob))) {
18 | rownames(prob) = NULL
19 | pdata$prob = prob
20 | }
21 |
22 | if (is.null(pdata$partition)) {
23 | # calculate partition from prob
24 | pdata$partition = max.col(prob, ties.method = "first")
25 | }
26 | }
27 |
28 | pdata
29 | }
30 |
31 | #' @export
32 | is_missing_prediction_data.PredictionDataClust = function(pdata, ...) { # nolint
33 | miss = logical(length(pdata$row_ids))
34 |
35 | if (!is.null(pdata$partition)) {
36 | miss = is.na(pdata$partition)
37 | }
38 |
39 | if (!is.null(pdata$prob)) {
40 | miss = miss | apply(pdata$prob, 1L, anyMissing)
41 | }
42 |
43 | pdata$row_ids[miss]
44 | }
45 |
46 | #' @export
47 | c.PredictionDataClust = function(..., keep_duplicates = TRUE) {
48 | dots = list(...)
49 | assert_list(dots, "PredictionDataClust")
50 | assert_flag(keep_duplicates)
51 | if (length(dots) == 1L) {
52 | return(dots[[1L]])
53 | }
54 |
55 | predict_types = names(mlr_reflections$learner_predict_types$clust)
56 | predict_types = map(dots, function(x) intersect(names(x), predict_types))
57 | if (!every(predict_types[-1L], setequal, y = predict_types[[1L]])) {
58 | stopf("Cannot combine predictions: Different predict types.")
59 | }
60 |
61 | elems = c("row_ids", "partition")
62 | tab = map_dtr(dots, function(x) x[elems], .fill = FALSE)
63 | prob = do.call(rbind, map(dots, "prob"))
64 |
65 | if (!keep_duplicates) {
66 | keep = !duplicated(tab, by = "row_ids", fromLast = TRUE)
67 | tab = tab[keep]
68 | prob = prob[keep, , drop = FALSE]
69 | }
70 |
71 | result = as.list(tab)
72 | result$prob = prob
73 |
74 | set_class(result, "PredictionDataClust")
75 | }
76 |
77 | #' @export
78 | filter_prediction_data.PredictionDataClust = function(pdata, row_ids, ...) {
79 | keep = pdata$row_ids %in% row_ids
80 | pdata$row_ids = pdata$row_ids[keep]
81 |
82 | if (!is.null(pdata$partition)) {
83 | pdata$partition = pdata$partition[keep]
84 | }
85 |
86 | if (!is.null(pdata$prob)) {
87 | pdata$prob = pdata$prob[keep, , drop = FALSE]
88 | }
89 |
90 | pdata
91 | }
92 |
93 | #' @export
94 | create_empty_prediction_data.TaskClust = function(task, learner) {
95 | predict_types = mlr_reflections$learner_predict_types[["clust"]][[learner$predict_type]]
96 |
97 | pdata = list(
98 | row_ids = integer(),
99 | partition = integer()
100 | )
101 |
102 | if ("prob" %chin% predict_types) {
103 | pdata$prob = matrix(integer())
104 | }
105 |
106 | set_class(pdata, "PredictionDataClust")
107 | }
108 |
--------------------------------------------------------------------------------
/R/LearnerClustCMeans.R:
--------------------------------------------------------------------------------
1 | #' @title Fuzzy C-Means Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.cmeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for fuzzy clustering implemented in [e1071::cmeans()].
7 | #' [e1071::cmeans()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `centers` parameter here is set to 2 by default.
9 | #' The predict method uses [clue::cl_predict()] to compute the
10 | #' cluster memberships for new data.
11 | #'
12 | #' @templateVar id clust.cmeans
13 | #' @template learner
14 | #'
15 | #' @references
16 | #' `r format_bib("dimitriadou2008misc", "bezdek2013pattern")`
17 | #'
18 | #' @export
19 | #' @template seealso_learner
20 | #' @template example
21 | LearnerClustCMeans = R6Class("LearnerClustCMeans",
22 | inherit = LearnerClust,
23 | public = list(
24 | #' @description
25 | #' Creates a new instance of this [R6][R6::R6Class] class.
26 | initialize = function() {
27 | param_set = ps(
28 | centers = p_uty(tags = c("train", "required"), custom_check = check_centers),
29 | iter.max = p_int(1L, default = 100L, tags = "train"),
30 | verbose = p_lgl(default = FALSE, tags = "train"),
31 | dist = p_fct(c("euclidean", "manhattan"), default = "euclidean", tags = "train"),
32 | method = p_fct(c("cmeans", "ufcl"), default = "cmeans", tags = "train"),
33 | m = p_dbl(1, default = 2, tags = "train"),
34 | rate.par = p_dbl(0, 1, tags = "train", depends = quote(method == "ufcl")),
35 | weights = p_uty(default = 1L, tags = "train", custom_check = crate(function(x) {
36 | if (test_numeric(x) && all(x > 0) || check_count(x, positive = TRUE)) {
37 | TRUE
38 | } else {
39 | "`weights` must be positive numeric vector or a single positive number"
40 | }
41 | })),
42 | control = p_uty(tags = "train")
43 | )
44 |
45 | param_set$set_values(centers = 2L)
46 |
47 | super$initialize(
48 | id = "clust.cmeans",
49 | feature_types = c("logical", "integer", "numeric"),
50 | predict_types = c("partition", "prob"),
51 | param_set = param_set,
52 | properties = c("partitional", "fuzzy", "complete"),
53 | packages = "e1071",
54 | man = "mlr3cluster::mlr_learners_clust.cmeans",
55 | label = "Fuzzy C-Means Clustering Learner"
56 | )
57 | }
58 | ),
59 |
60 | private = list(
61 | .train = function(task) {
62 | pv = self$param_set$get_values(tags = "train")
63 | assert_centers_param(pv$centers, task, test_data_frame, "centers")
64 |
65 | m = invoke(e1071::cmeans, x = task$data(), .args = pv, .opts = allow_partial_matching)
66 | if (self$save_assignments) {
67 | self$assignments = m$cluster
68 | }
69 | m
70 | },
71 |
72 | .predict = function(task) {
73 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids"))
74 | prob = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "memberships"))
75 | colnames(prob) = seq_len(ncol(prob))
76 |
77 | PredictionClust$new(task = task, partition = partition, prob = prob)
78 | }
79 | )
80 | )
81 |
82 | #' @include zzz.R
83 | register_learner("clust.cmeans", LearnerClustCMeans)
84 |
--------------------------------------------------------------------------------
/R/LearnerClustHclust.R:
--------------------------------------------------------------------------------
1 | #' @title Agglomerative Hierarchical Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.hclust
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [stats::hclust()].
7 | #' Difference Calculation is done by [stats::dist()]
8 | #'
9 | #' @templateVar id clust.hclust
10 | #' @template learner
11 | #'
12 | #' @references
13 | #' `r format_bib("becker1988s", "everitt1974cluster", "hartigan1975clustering", "sneath1973numerical", "anderberg1973cluster", "gordon1999classification", "murtagh1985multidimensional", "mcquitty1966similarity", "legendre2012numerical", "murtagh2014ward")`
14 | #'
15 | #' @export
16 | #' @template seealso_learner
17 | #' @template example
18 | LearnerClustHclust = R6Class("LearnerClustHclust",
19 | inherit = LearnerClust,
20 | public = list(
21 | #' @description
22 | #' Creates a new instance of this [R6][R6::R6Class] class.
23 | initialize = function() {
24 | param_set = ps(
25 | method = p_fct(
26 | levels = c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid"),
27 | default = "complete",
28 | tags = c("train", "hclust")
29 | ),
30 | members = p_uty(default = NULL, tags = c("train", "hclust")),
31 | distmethod = p_fct(
32 | levels = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"),
33 | default = "euclidean",
34 | tags = "train"
35 | ),
36 | diag = p_lgl(default = FALSE, tags = c("train", "dist")),
37 | upper = p_lgl(default = FALSE, tags = c("train", "dist")),
38 | p = p_dbl(default = 2, tags = c("train", "dist"), depends = quote(distmethod == "minkowski")),
39 | k = p_int(1L, default = 2L, tags = c("train", "predict"))
40 | )
41 |
42 | param_set$set_values(k = 2L, distmethod = "euclidean")
43 |
44 | super$initialize(
45 | id = "clust.hclust",
46 | feature_types = c("logical", "integer", "numeric"),
47 | predict_types = "partition",
48 | param_set = param_set,
49 | properties = c("hierarchical", "exclusive", "complete"),
50 | packages = "stats",
51 | man = "mlr3cluster::mlr_learners_clust.hclust",
52 | label = "Agglomerative Hierarchical Clustering"
53 | )
54 | }
55 | ),
56 |
57 | private = list(
58 | .train = function(task) {
59 | pv = self$param_set$get_values(tags = "train")
60 | dist = invoke(stats::dist,
61 | x = task$data(),
62 | method = pv$d %??% "euclidean",
63 | .args = self$param_set$get_values(tags = c("train", "dist"))
64 | )
65 | m = invoke(stats::hclust,
66 | d = dist,
67 | .args = self$param_set$get_values(tags = c("train", "hclust"))
68 | )
69 | if (self$save_assignments) {
70 | self$assignments = stats::cutree(m, pv$k)
71 | }
72 | m
73 | },
74 |
75 | .predict = function(task) {
76 | pv = self$param_set$get_values(tags = "predict")
77 | if (pv$k > task$nrow) {
78 | stopf("`k` needs to be between 1 and %i.", task$nrow)
79 | }
80 |
81 | warn_prediction_useless(self$id)
82 |
83 | PredictionClust$new(task = task, partition = self$assignments)
84 | }
85 | )
86 | )
87 |
88 | #' @include zzz.R
89 | register_learner("clust.hclust", LearnerClustHclust)
90 |
--------------------------------------------------------------------------------
/R/LearnerClustAffinityPropagation.R:
--------------------------------------------------------------------------------
1 | #' @title Affinity Propagation Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.ap
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for Affinity Propagation clustering implemented in [apcluster::apcluster()].
7 | #' [apcluster::apcluster()] doesn't have set a default for similarity function.
8 | #' The predict method computes the closest cluster exemplar to find the
9 | #' cluster memberships for new data.
10 | #' The code is taken from
11 | #' [StackOverflow](https://stackoverflow.com/questions/34932692/using-the-apcluster-package-in-r-it-is-possible-to-score-unclustered-data-poi)
12 | #' answer by the `apcluster` package maintainer.
13 | #'
14 | #' @templateVar id clust.ap
15 | #' @template learner
16 | #'
17 | #' @references
18 | #' `r format_bib("bodenhofer2011apcluster", "frey2007clustering")`
19 | #'
20 | #' @export
21 | #' @template seealso_learner
22 | #' @template simple_example
23 | LearnerClustAP = R6Class("LearnerClustAP",
24 | inherit = LearnerClust,
25 | public = list(
26 | #' @description
27 | #' Creates a new instance of this [R6][R6::R6Class] class.
28 | initialize = function() {
29 | param_set = ps(
30 | s = p_uty(tags = c("train", "required")),
31 | p = p_uty(
32 | default = NA_real_, special_vals = list(NA_real_), tags = "train", custom_check = check_numeric
33 | ),
34 | q = p_dbl(0, 1, default = NA_real_, special_vals = list(NA_real_), tags = "train"),
35 | maxits = p_int(1L, default = 1000L, tags = "train"),
36 | convits = p_int(1L, default = 100L, tags = "train"),
37 | lam = p_dbl(0.5, 1, default = 0.9, tags = "train"),
38 | includeSim = p_lgl(default = FALSE, tags = "train"),
39 | details = p_lgl(default = FALSE, tags = "train"),
40 | nonoise = p_lgl(default = FALSE, tags = "train"),
41 | seed = p_int(default = NA_integer_, special_vals = list(NA_integer_), tags = "train")
42 | )
43 |
44 | super$initialize(
45 | id = "clust.ap",
46 | feature_types = c("logical", "integer", "numeric"),
47 | predict_types = "partition",
48 | param_set = param_set,
49 | properties = c("partitional", "exclusive", "complete"),
50 | packages = "apcluster",
51 | man = "mlr3cluster::mlr_learners_clust.ap",
52 | label = "Affinity Propagation Clustering"
53 | )
54 | }
55 | ),
56 |
57 | private = list(
58 | .train = function(task) {
59 | pv = self$param_set$get_values(tags = "train")
60 | data = task$data()
61 | m = invoke(apcluster::apcluster, x = data, .args = pv)
62 | # add data points corresponding to examplars
63 | setattr(m, "exemplar_data", data[m@exemplars])
64 |
65 | if (self$save_assignments) {
66 | self$assignments = apcluster::labels(m, type = "enum")
67 | }
68 | m
69 | },
70 |
71 | .predict = function(task) {
72 | pv = self$param_set$get_values(tags = "train")
73 | sim_func = pv$s
74 | exemplar_data = attr(self$model, "exemplar_data")
75 |
76 | data = task$data()
77 | sim_mat = sim_func(
78 | rbind(exemplar_data, data),
79 | sel = seq_len(nrow(data)) + nrow(exemplar_data)
80 | )[seq_len(nrow(exemplar_data)), ]
81 | partition = unname(apply(sim_mat, 2L, which.max))
82 | PredictionClust$new(task = task, partition = partition)
83 | }
84 | )
85 | )
86 |
87 | #' @include zzz.R
88 | register_learner("clust.ap", LearnerClustAP)
89 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | #' @import checkmate
2 | #' @import data.table
3 | #' @import mlr3
4 | #' @import mlr3misc
5 | #' @import paradox
6 | #' @importFrom R6 R6Class
7 | #' @importFrom clue cl_predict
8 | #' @importFrom cluster silhouette
9 | #' @importFrom fpc cluster.stats
10 | #' @importFrom stats model.frame terms predict runif dist
11 | "_PACKAGE"
12 |
13 | mlr3cluster_tasks = new.env(parent = emptyenv())
14 | mlr3cluster_learners = new.env(parent = emptyenv())
15 |
16 | register_task = function(name, constructor) {
17 | if (name %chin% names(mlr3cluster_tasks)) stopf("task %s registered twice.", name)
18 | mlr3cluster_tasks[[name]] = constructor
19 | }
20 |
21 | register_learner = function(name, constructor) {
22 | if (name %chin% names(mlr3cluster_learners)) stopf("learner %s registered twice.", name)
23 | mlr3cluster_learners[[name]] = constructor
24 | }
25 |
26 | register_mlr3 = function() {
27 | # reflections
28 | mlr_reflections = utils::getFromNamespace("mlr_reflections", ns = "mlr3")
29 | mlr_reflections$task_types = mlr_reflections$task_types[!"clust"]
30 | mlr_reflections$task_types = setkeyv(rbind(mlr_reflections$task_types, rowwise_table(
31 | ~type, ~package, ~task, ~learner, ~prediction, ~prediction_data, ~measure,
32 | "clust", "mlr3cluster", "TaskClust", "LearnerClust", "PredictionClust", "PredictionDataClust", "MeasureClust"
33 | ), fill = TRUE), "type")
34 |
35 | mlr_reflections$task_col_roles$clust = mlr_reflections$task_col_roles$regr
36 | mlr_reflections$task_properties$clust = mlr_reflections$task_properties$regr
37 | mlr_reflections$learner_properties$clust = c(
38 | "missings", "partitional", "hierarchical", "exclusive", "overlapping", "fuzzy", "complete", "partial", "density"
39 | )
40 | mlr_reflections$learner_predict_types$clust = list(partition = "partition", prob = c("partition", "prob"))
41 | mlr_reflections$measure_properties$clust = mlr_reflections$measure_properties$regr
42 | mlr_reflections$default_measures$clust = "clust.dunn"
43 |
44 | # tasks
45 | mlr_tasks = utils::getFromNamespace("mlr_tasks", ns = "mlr3")
46 | iwalk(as.list(mlr3cluster_tasks), function(task, id) mlr_tasks$add(id, task))
47 |
48 | # learners
49 | mlr_learners = utils::getFromNamespace("mlr_learners", ns = "mlr3")
50 | iwalk(as.list(mlr3cluster_learners), function(learner, id) mlr_learners$add(id, learner))
51 |
52 | # measures
53 | mlr_measures = utils::getFromNamespace("mlr_measures", ns = "mlr3")
54 | mlr_measures$add("clust.silhouette", MeasureClustSil, name = "silhouette", label = "Silhouette")
55 | mlr_measures$add("clust.dunn", MeasureClustFPC, name = "dunn", label = "Dunn")
56 | mlr_measures$add("clust.ch", MeasureClustFPC, name = "ch", label = "Calinski Harabasz")
57 | mlr_measures$add("clust.wss", MeasureClustFPC, name = "wss", label = "Within Sum of Squares")
58 | }
59 |
60 | .onLoad = function(libname, pkgname) {
61 | backports::import(pkgname)
62 |
63 | register_namespace_callback(pkgname, "mlr3", register_mlr3)
64 | }
65 |
66 | .onUnload = function(libpaths) { # nolint
67 | walk(names(mlr3cluster_tasks), function(id) mlr_tasks$remove(id))
68 | walk(names(mlr3cluster_learners), function(id) mlr_learners$remove(id))
69 | walk(names(measures), function(id) mlr_measures$remove(paste("clust", id, sep = ".")))
70 |
71 | mlr_reflections$task_types = mlr_reflections$task_types[!"clust"]
72 | reflections = c(
73 | "measure_properties",
74 | "default_measures",
75 | "learner_properties",
76 | "learner_predict_types",
77 | "task_properties",
78 | "task_col_roles"
79 | )
80 | walk(reflections, function(x) mlr_reflections[[x]] = remove_named(mlr_reflections[[x]], "clust"))
81 | }
82 |
83 | leanify_package()
84 |
--------------------------------------------------------------------------------
/R/LearnerClustMiniBatchKMeans.R:
--------------------------------------------------------------------------------
1 | #' @title Mini Batch K-Means Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.MBatchKMeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for mini batch k-means clustering implemented in [ClusterR::MiniBatchKmeans()].
7 | #' [ClusterR::MiniBatchKmeans()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `clusters` parameter here is set to 2 by default.
9 | #' The predict method uses [ClusterR::predict_MBatchKMeans()] to compute the
10 | #' cluster memberships for new data.
11 | #' The learner supports both partitional and fuzzy clustering.
12 | #'
13 | #' @templateVar id clust.MBatchKMeans
14 | #' @template learner
15 | #'
16 | #' @references
17 | #' `r format_bib("sculley2010web")`
18 | #'
19 | #' @export
20 | #' @template seealso_learner
21 | #' @template example
22 | LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans",
23 | inherit = LearnerClust,
24 | public = list(
25 | #' @description
26 | #' Creates a new instance of this [R6][R6::R6Class] class.
27 | initialize = function() {
28 | param_set = ps(
29 | clusters = p_int(1L, default = 2L, tags = "train"),
30 | batch_size = p_int(1L, default = 10L, tags = "train"),
31 | num_init = p_int(1L, default = 1L, tags = "train"),
32 | max_iters = p_int(1L, default = 100L, tags = "train"),
33 | init_fraction = p_dbl(
34 | 0, 1, default = 1, tags = "train", depends = quote(initializer %in% c("kmeans++", "optimal_init"))
35 | ),
36 | initializer = p_fct(
37 | c("optimal_init", "quantile_init", "kmeans++", "random"), default = "kmeans++", tags = "train"
38 | ),
39 | early_stop_iter = p_int(1L, default = 10L, tags = "train"),
40 | verbose = p_lgl(default = FALSE, tags = "train"),
41 | CENTROIDS = p_uty(default = NULL, tags = "train"),
42 | tol = p_dbl(0, default = 1e-04, tags = "train"),
43 | tol_optimal_init = p_dbl(0, default = 0.3, tags = "train"),
44 | seed = p_int(default = 1L, tags = "train")
45 | )
46 |
47 | param_set$set_values(clusters = 2L)
48 |
49 | super$initialize(
50 | id = "clust.MBatchKMeans",
51 | feature_types = c("logical", "integer", "numeric"),
52 | predict_types = c("partition", "prob"),
53 | param_set = param_set,
54 | properties = c("partitional", "fuzzy", "exclusive", "complete"),
55 | packages = "ClusterR",
56 | man = "mlr3cluster::mlr_learners_clust.MBatchKMeans",
57 | label = "Mini Batch K-Means"
58 | )
59 | }
60 | ),
61 |
62 | private = list(
63 | .train = function(task) {
64 | pv = self$param_set$get_values(tags = "train")
65 | assert_centers_param(pv$CENTROIDS, task, test_matrix, "CENTROIDS")
66 | if (test_matrix(pv$CENTROIDS) && nrow(pv$CENTROIDS) != pv$clusters) {
67 | stopf("`CENTROIDS` must have same number of rows as `clusters`.")
68 | }
69 |
70 | data = task$data()
71 | m = invoke(ClusterR::MiniBatchKmeans, data = data, .args = pv)
72 | if (self$save_assignments) {
73 | self$assignments = as.integer(invoke(predict, m, newdata = data))
74 | }
75 | m
76 | },
77 |
78 | .predict = function(task) {
79 | data = task$data()
80 | partition = as.integer(invoke(predict, self$model, newdata = data))
81 | prob = NULL
82 | if (self$predict_type == "prob") {
83 | prob = invoke(predict, self$model, newdata = data, fuzzy = TRUE)
84 | colnames(prob) = seq_len(ncol(prob))
85 | }
86 | PredictionClust$new(task = task, partition = partition, prob = prob)
87 | }
88 | )
89 | )
90 |
91 | #' @include zzz.R
92 | register_learner("clust.MBatchKMeans", LearnerClustMiniBatchKMeans)
93 |
--------------------------------------------------------------------------------
/R/LearnerClustKKMeans.R:
--------------------------------------------------------------------------------
1 | #' @title Kernel K-Means Clustering Learner
2 | #'
3 | #' @name mlr_learners_clust.kkmeans
4 | #'
5 | #' @description
6 | #' A [LearnerClust] for kernel k-means clustering implemented in [kernlab::kkmeans()].
7 | #' [kernlab::kkmeans()] doesn't have a default value for the number of clusters.
8 | #' Therefore, the `centers` parameter here is set to 2 by default.
9 | #' Kernel parameters have to be passed directly and not by using the `kpar` list in `kkmeans`.
10 | #' The predict method finds the nearest center in kernel distance to
11 | #' assign clusters for new data points.
12 | #'
13 | #' @templateVar id clust.kkmeans
14 | #' @template learner
15 | #'
16 | #' @references
17 | #' `r format_bib("karatzoglou2004kernlab", "dhillon2004unified")`
18 | #'
19 | #' @export
20 | #' @template seealso_learner
21 | #' @template example
22 | LearnerClustKKMeans = R6Class("LearnerClustKKMeans",
23 | inherit = LearnerClust,
24 | public = list(
25 | #' @description
26 | #' Creates a new instance of this [R6][R6::R6Class] class.
27 | initialize = function() {
28 | param_set = ps(
29 | centers = p_uty(tags = c("train", "required"), custom_check = check_centers),
30 | kernel = p_fct(
31 | levels = c("vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"),
32 | default = "rbfdot",
33 | tags = "train"
34 | ),
35 | sigma = p_dbl(
36 | 0, tags = "train", depends = quote(kernel %in% c("rbfdot", "anovadot", "besseldot", "laplacedot"))
37 | ),
38 | degree = p_int(
39 | 1L, default = 3L, tags = "train", depends = quote(kernel %in% c("polydot", "anovadot", "besseldot"))
40 | ),
41 | scale = p_dbl(0, default = 1, tags = "train", depends = quote(kernel %in% c("polydot", "tanhdot"))),
42 | offset = p_dbl(default = 1, tags = "train", depends = quote(kernel %in% c("polydot", "tanhdot"))),
43 | order = p_int(default = 1L, tags = "train", depends = quote(kernel == "besseldot")),
44 | alg = p_fct(c("kkmeans", "kerninghan"), default = "kkmeans", tags = "train"),
45 | p = p_dbl(default = 1, tags = "train")
46 | )
47 |
48 | param_set$set_values(centers = 2L)
49 |
50 | super$initialize(
51 | id = "clust.kkmeans",
52 | feature_types = c("logical", "integer", "numeric"),
53 | predict_types = "partition",
54 | param_set = param_set,
55 | properties = c("partitional", "exclusive", "complete"),
56 | packages = "kernlab",
57 | man = "mlr3cluster::mlr_learners_clust.kkmeans",
58 | label = "Kernel K-Means"
59 | )
60 | }
61 | ),
62 |
63 | private = list(
64 | .train = function(task) {
65 | pv = self$param_set$get_values(tags = "train")
66 | assert_centers_param(pv$centers, task, test_data_frame, "centers")
67 |
68 | m = invoke(kernlab::kkmeans, x = as.matrix(task$data()), .args = pv)
69 | if (self$save_assignments) {
70 | self$assignments = m[seq_along(m)]
71 | }
72 | m
73 | },
74 |
75 | .predict = function(task) {
76 | # all of predict is taken from mlr2
77 |
78 | c = kernlab::centers(self$model)
79 | K = kernlab::kernelf(self$model)
80 | data = task$data()
81 |
82 | # kernel product between each new datapoint and the centers
83 | d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(data), c), ncol = nrow(c))
84 | # kernel product between each new datapoint and itself: rows are identical
85 | d_xx = matrix(
86 | rep(diag(kernlab::kernelMatrix(K, as.matrix(data))), each = ncol(d_xc)),
87 | ncol = ncol(d_xc), byrow = TRUE
88 | )
89 | # kernel product between each center and itself: columns are identical
90 | d_cc = matrix(
91 | rep(diag(kernlab::kernelMatrix(K, as.matrix(c))), each = nrow(d_xc)), nrow = nrow(d_xc)
92 | )
93 | # this is the squared kernel distance to the centers
94 | d2 = d_xx + d_cc - 2 * d_xc
95 | # the nearest center determines cluster assignment
96 | partition = apply(d2, 1L, which.min)
97 |
98 | PredictionClust$new(task = task, partition = partition)
99 | }
100 | )
101 | )
102 |
103 | #' @include zzz.R
104 | register_learner("clust.kkmeans", LearnerClustKKMeans)
105 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 |
15 | library(data.table)
16 | library(mlr3cluster)
17 | library(mlr3misc)
18 | lrn_clust = as.data.table(mlr3::mlr_learners)[task_type == "clust", .(key, label, packages)]
19 | msr_clust = as.data.table(mlr3::mlr_measures)[task_type == "clust", .(key, label, packages)]
20 | ```
21 |
22 | # mlr3cluster
23 |
24 | Package website: [release](https://mlr3cluster.mlr-org.com/) \| [dev](https://mlr3cluster.mlr-org.com/dev/)
25 |
26 | Cluster analysis for **[mlr3](https://github.com/mlr-org/mlr3/)**.
27 |
28 |
29 | [](https://github.com/mlr-org/mlr3cluster/actions/workflows/r-cmd-check.yml)
30 | [](https://CRAN.R-project.org/package=mlr3cluster)
31 | [](https://stackoverflow.com/questions/tagged/mlr3)
32 | [](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
33 |
34 |
35 | **mlr3cluster** is an extension package for cluster analysis within the **[mlr3](https://github.com/mlr-org/mlr3)** ecosystem. It is a successor of clustering capabilities of **[mlr2](https://github.com/mlr-org/mlr)**.
36 |
37 | ## Installation
38 |
39 | Install the last release from CRAN:
40 |
41 | ```{r, eval = FALSE}
42 | install.packages("mlr3cluster")
43 | ```
44 |
45 | Install the development version from GitHub:
46 |
47 | ```{r, eval = FALSE}
48 | # install.packages("pak")
49 | pak::pak("mlr-org/mlr3cluster")
50 | ```
51 |
52 | ## Feature Overview
53 |
54 | The current version of **mlr3cluster** contains:
55 |
56 | - A selection of `r nrow(lrn_clust)` clustering learners that represent a wide variety of clusterers: partitional, hierarchical, fuzzy, etc.
57 | - A selection of `r nrow(msr_clust)` performance measures
58 | - Two built-in tasks to get started with clustering
59 |
60 | Also, the package is integrated with **[mlr3viz](https://github.com/mlr-org/mlr3viz)** which enables you to create great visualizations with just one line of code!
61 |
62 | ## Cluster Analysis
63 |
64 | ### Cluster Learners
65 |
66 | ```{r, echo = FALSE}
67 | cran_pkg = function(pkgs) {
68 | pkgs = fifelse(
69 | pkgs %chin% c("stats", "graphics", "datasets"),
70 | pkgs, sprintf("[%1$s](https://cran.r-project.org/package=%1$s)", pkgs)
71 | )
72 | toString(pkgs)
73 | }
74 |
75 | lrn_clust[, packages := map(packages, function(x) setdiff(x, c("mlr3", "mlr3cluster")))]
76 | lrn_clust[, `:=`(
77 | key = sprintf("[%1$s](https://mlr3cluster.mlr-org.com/reference/mlr_learners_%1$s)", key),
78 | packages = map_chr(packages, cran_pkg)
79 | )]
80 | knitr::kable(lrn_clust, format = "markdown", col.names = tools::toTitleCase(names(lrn_clust)))
81 | ```
82 |
83 | ### Cluster Measures
84 |
85 | ```{r, echo = FALSE}
86 | msr_clust[, packages := map(packages, function(x) setdiff(x, c("mlr3", "mlr3cluster")))]
87 | msr_clust[, `:=`(
88 | key = sprintf("[%1$s](https://mlr3cluster.mlr-org.com/reference/mlr_measures_%1$s)", key),
89 | packages = map_chr(packages, cran_pkg)
90 | )]
91 | knitr::kable(msr_clust, format = "markdown", col.names = tools::toTitleCase(names(msr_clust)))
92 | ```
93 |
94 |
95 | ## Example
96 |
97 | ```{r}
98 | library(mlr3)
99 | library(mlr3cluster)
100 |
101 | task = tsk("usarrests")
102 | task
103 |
104 | learner = lrn("clust.kmeans")
105 | prediction = learner$train(task)$predict(task)
106 | measures = msrs(c("clust.wss", "clust.silhouette"))
107 | prediction$score(measures, task)
108 | ```
109 |
110 | ## More Resources
111 |
112 | Check out the **[blogpost](https://www.r-bloggers.com/2020/10/introducing-mlr3cluster-cluster-analysis-package/)** for a more detailed introduction to the package.
113 | Also, **[mlr3book](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-cluster)** has a section on clustering.
114 |
115 | ## Future Plans
116 |
117 | - Add more learners and measures
118 | - Integrate the package with **[mlr3pipelines](https://github.com/mlr-org/mlr3pipelines)** (work in progress)
119 |
120 | If you have any questions, feedback or ideas, feel free to open an issue [here](https://github.com/mlr-org/mlr3cluster/issues).
121 |
--------------------------------------------------------------------------------
/man/PredictionClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/PredictionClust.R
3 | \name{PredictionClust}
4 | \alias{PredictionClust}
5 | \title{Prediction Object for Cluster Analysis}
6 | \description{
7 | This object wraps the predictions returned by a learner of class \link{LearnerClust}, i.e.
8 | the predicted partition and cluster probability.
9 | }
10 | \examples{
11 | library(mlr3)
12 | library(mlr3cluster)
13 | task = tsk("usarrests")
14 | learner = lrn("clust.kmeans")
15 | p = learner$train(task)$predict(task)
16 | p$predict_types
17 | head(as.data.table(p))
18 | }
19 | \concept{Prediction}
20 | \section{Super class}{
21 | \code{\link[mlr3:Prediction]{mlr3::Prediction}} -> \code{PredictionClust}
22 | }
23 | \section{Active bindings}{
24 | \if{html}{\out{}}
25 | \describe{
26 | \item{\code{partition}}{(\code{integer()})\cr
27 | Access the stored partition.}
28 |
29 | \item{\code{prob}}{(\code{matrix()})\cr
30 | Access to the stored probabilities.}
31 | }
32 | \if{html}{\out{
}}
33 | }
34 | \section{Methods}{
35 | \subsection{Public methods}{
36 | \itemize{
37 | \item \href{#method-PredictionClust-new}{\code{PredictionClust$new()}}
38 | \item \href{#method-PredictionClust-clone}{\code{PredictionClust$clone()}}
39 | }
40 | }
41 | \if{html}{\out{
42 | Inherited methods
43 |
51 |
52 | }}
53 | \if{html}{\out{
}}
54 | \if{html}{\out{}}
55 | \if{latex}{\out{\hypertarget{method-PredictionClust-new}{}}}
56 | \subsection{Method \code{new()}}{
57 | Creates a new instance of this \link[R6:R6Class]{R6} class.
58 | \subsection{Usage}{
59 | \if{html}{\out{}}\preformatted{PredictionClust$new(
60 | task = NULL,
61 | row_ids = task$row_ids,
62 | partition = NULL,
63 | prob = NULL,
64 | check = TRUE
65 | )}\if{html}{\out{
}}
66 | }
67 |
68 | \subsection{Arguments}{
69 | \if{html}{\out{}}
70 | \describe{
71 | \item{\code{task}}{(\link{TaskClust})\cr
72 | Task, used to extract defaults for \code{row_ids}.}
73 |
74 | \item{\code{row_ids}}{(\code{integer()})\cr
75 | Row ids of the predicted observations, i.e. the row ids of the test set.}
76 |
77 | \item{\code{partition}}{(\code{integer()})\cr
78 | Vector of cluster partitions.}
79 |
80 | \item{\code{prob}}{(\code{matrix()})\cr
81 | Numeric matrix of cluster membership probabilities with one column for each cluster
82 | and one row for each observation.
83 | Columns must be named with cluster numbers, row names are automatically removed.
84 | If \code{prob} is provided, but \code{partition} is not, the cluster memberships are calculated from
85 | the probabilities using \code{\link[=max.col]{max.col()}} with \code{ties.method} set to \code{"first"}.}
86 |
87 | \item{\code{check}}{(\code{logical(1)})\cr
88 | If \code{TRUE}, performs some argument checks and predict type conversions.}
89 | }
90 | \if{html}{\out{
}}
91 | }
92 | }
93 | \if{html}{\out{
}}
94 | \if{html}{\out{}}
95 | \if{latex}{\out{\hypertarget{method-PredictionClust-clone}{}}}
96 | \subsection{Method \code{clone()}}{
97 | The objects of this class are cloneable with this method.
98 | \subsection{Usage}{
99 | \if{html}{\out{}}\preformatted{PredictionClust$clone(deep = FALSE)}\if{html}{\out{
}}
100 | }
101 |
102 | \subsection{Arguments}{
103 | \if{html}{\out{}}
104 | \describe{
105 | \item{\code{deep}}{Whether to make a deep clone.}
106 | }
107 | \if{html}{\out{
}}
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/man/MeasureClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeasureClust.R
3 | \name{MeasureClust}
4 | \alias{MeasureClust}
5 | \title{Cluster Measure}
6 | \description{
7 | This measure specializes \link[mlr3:Measure]{mlr3::Measure} for cluster analysis:
8 | \itemize{
9 | \item \code{task_type} is set to \code{"clust"}.
10 | \item Possible values for \code{predict_type} are \code{"partition"} and \code{"prob"}.
11 | }
12 |
13 | Predefined measures can be found in the \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} \link[mlr3:mlr_measures]{mlr3::mlr_measures}.
14 | }
15 | \seealso{
16 | Example cluster measures: \code{\link[=mlr_measures_clust.dunn]{clust.dunn}}
17 | }
18 | \section{Super class}{
19 | \code{\link[mlr3:Measure]{mlr3::Measure}} -> \code{MeasureClust}
20 | }
21 | \section{Methods}{
22 | \subsection{Public methods}{
23 | \itemize{
24 | \item \href{#method-MeasureClust-new}{\code{MeasureClust$new()}}
25 | }
26 | }
27 | \if{html}{\out{
28 | Inherited methods
29 |
36 |
37 | }}
38 | \if{html}{\out{
}}
39 | \if{html}{\out{}}
40 | \if{latex}{\out{\hypertarget{method-MeasureClust-new}{}}}
41 | \subsection{Method \code{new()}}{
42 | Creates a new instance of this \link[R6:R6Class]{R6} class.
43 | \subsection{Usage}{
44 | \if{html}{\out{}}\preformatted{MeasureClust$new(
45 | id,
46 | range,
47 | minimize = NA,
48 | aggregator = NULL,
49 | properties = character(),
50 | predict_type = "partition",
51 | task_properties = character(),
52 | packages = character(),
53 | label = NA_character_,
54 | man = NA_character_
55 | )}\if{html}{\out{
}}
56 | }
57 |
58 | \subsection{Arguments}{
59 | \if{html}{\out{}}
60 | \describe{
61 | \item{\code{id}}{(\code{character(1)})\cr
62 | Identifier for the new instance.}
63 |
64 | \item{\code{range}}{(\code{numeric(2)})\cr
65 | Feasible range for this measure as \code{c(lower_bound, upper_bound)}.
66 | Both bounds may be infinite.}
67 |
68 | \item{\code{minimize}}{(\code{logical(1)})\cr
69 | Set to \code{TRUE} if good predictions correspond to small values,
70 | and to \code{FALSE} if good predictions correspond to large values.
71 | If set to \code{NA} (default), tuning this measure is not possible.}
72 |
73 | \item{\code{aggregator}}{(\verb{function(x)})\cr
74 | Function to aggregate individual performance scores \code{x} where \code{x} is a numeric vector.
75 | If \code{NULL}, defaults to \code{\link[=mean]{mean()}}.}
76 |
77 | \item{\code{properties}}{(\code{character()})\cr
78 | Properties of the measure.
79 | Must be a subset of \link[mlr3:mlr_reflections]{mlr_reflections$measure_properties}.
80 | Supported by \code{mlr3}:
81 | \itemize{
82 | \item \code{"requires_task"} (requires the complete \link[mlr3:Task]{mlr3::Task}),
83 | \item \code{"requires_learner"} (requires the trained \link[mlr3:Learner]{mlr3::Learner}),
84 | \item \code{"requires_train_set"} (requires the training indices from the \link[mlr3:Resampling]{mlr3::Resampling}), and
85 | \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}).
86 | }}
87 |
88 | \item{\code{predict_type}}{(\code{character(1)})\cr
89 | Required predict type of the \link[mlr3:Learner]{mlr3::Learner}.
90 | Possible values are stored in \link[mlr3:mlr_reflections]{mlr_reflections$learner_predict_types}.}
91 |
92 | \item{\code{task_properties}}{(\code{character()})\cr
93 | Required task properties, see \link[mlr3:Task]{mlr3::Task}.}
94 |
95 | \item{\code{packages}}{(\code{character()})\cr
96 | Set of required packages.
97 | A warning is signaled by the constructor if at least one of the packages is not installed,
98 | but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.}
99 |
100 | \item{\code{label}}{(\code{character(1)})\cr
101 | Label for the new instance.}
102 |
103 | \item{\code{man}}{(\code{character(1)})\cr
104 | String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object.
105 | The referenced help package can be opened via method \verb{$help()}.}
106 | }
107 | \if{html}{\out{
}}
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/man/TaskClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/TaskClust.R
3 | \name{TaskClust}
4 | \alias{TaskClust}
5 | \title{Cluster Task}
6 | \description{
7 | This task specializes \link[mlr3:Task]{mlr3::Task} for cluster problems.
8 | As an unsupervised task, this task has no target column.
9 | The \code{task_type} is set to \code{"clust"}.
10 |
11 | Predefined tasks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks}.
12 | }
13 | \examples{
14 | library(mlr3)
15 | library(mlr3cluster)
16 | task = TaskClust$new("usarrests", backend = USArrests)
17 | task$task_type
18 |
19 | # possible properties:
20 | mlr_reflections$task_properties$clust
21 | }
22 | \seealso{
23 | Other Task:
24 | \code{\link{mlr_tasks_ruspini}},
25 | \code{\link{mlr_tasks_usarrests}}
26 | }
27 | \concept{Task}
28 | \section{Super classes}{
29 | \code{\link[mlr3:Task]{mlr3::Task}} -> \code{\link[mlr3:TaskUnsupervised]{mlr3::TaskUnsupervised}} -> \code{TaskClust}
30 | }
31 | \section{Methods}{
32 | \subsection{Public methods}{
33 | \itemize{
34 | \item \href{#method-TaskClust-new}{\code{TaskClust$new()}}
35 | \item \href{#method-TaskClust-clone}{\code{TaskClust$clone()}}
36 | }
37 | }
38 | \if{html}{\out{
39 | Inherited methods
40 |
62 |
63 | }}
64 | \if{html}{\out{
}}
65 | \if{html}{\out{}}
66 | \if{latex}{\out{\hypertarget{method-TaskClust-new}{}}}
67 | \subsection{Method \code{new()}}{
68 | Creates a new instance of this \link[R6:R6Class]{R6} class.
69 | \subsection{Usage}{
70 | \if{html}{\out{}}\preformatted{TaskClust$new(id, backend, label = NA_character_)}\if{html}{\out{
}}
71 | }
72 |
73 | \subsection{Arguments}{
74 | \if{html}{\out{}}
75 | \describe{
76 | \item{\code{id}}{(\code{character(1)})\cr
77 | Identifier for the new instance.}
78 |
79 | \item{\code{backend}}{(\link[mlr3:DataBackend]{mlr3::DataBackend})\cr
80 | Either a \link[mlr3:DataBackend]{mlr3::DataBackend}, or any object which is convertible to a \link[mlr3:DataBackend]{mlr3::DataBackend} with \code{as_data_backend()}.
81 | E.g., a \code{data.frame()} will be converted to a \link[mlr3:DataBackendDataTable]{mlr3::DataBackendDataTable}.}
82 |
83 | \item{\code{label}}{(\code{character(1)})\cr
84 | Label for the new instance.}
85 | }
86 | \if{html}{\out{
}}
87 | }
88 | }
89 | \if{html}{\out{
}}
90 | \if{html}{\out{}}
91 | \if{latex}{\out{\hypertarget{method-TaskClust-clone}{}}}
92 | \subsection{Method \code{clone()}}{
93 | The objects of this class are cloneable with this method.
94 | \subsection{Usage}{
95 | \if{html}{\out{}}\preformatted{TaskClust$clone(deep = FALSE)}\if{html}{\out{
}}
96 | }
97 |
98 | \subsection{Arguments}{
99 | \if{html}{\out{}}
100 | \describe{
101 | \item{\code{deep}}{Whether to make a deep clone.}
102 | }
103 | \if{html}{\out{
}}
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/man/mlr_learners_clust.featureless.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LearnerClustFeatureless.R
3 | \name{mlr_learners_clust.featureless}
4 | \alias{mlr_learners_clust.featureless}
5 | \alias{LearnerClustFeatureless}
6 | \title{Featureless Clustering Learner}
7 | \description{
8 | A simple \link{LearnerClust} which randomly (but evenly) assigns observations to
9 | \code{num_clusters} partitions (default: 1 partition).
10 | }
11 | \section{Dictionary}{
12 |
13 | This \link[mlr3:Learner]{mlr3::Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_learners]{mlr3::mlr_learners} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::lrn()}}:
14 |
15 | \if{html}{\out{}}\preformatted{mlr_learners$get("clust.featureless")
16 | lrn("clust.featureless")
17 | }\if{html}{\out{
}}
18 | }
19 |
20 | \section{Meta Information}{
21 |
22 | \itemize{
23 | \item Task type: \dQuote{clust}
24 | \item Predict Types: \dQuote{partition}, \dQuote{prob}
25 | \item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric}
26 | \item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3cluster}
27 | }
28 | }
29 |
30 | \section{Parameters}{
31 | \tabular{llll}{
32 | Id \tab Type \tab Default \tab Range \cr
33 | num_clusters \tab integer \tab - \tab \eqn{[1, \infty)}{[1, Inf)} \cr
34 | }
35 | }
36 |
37 | \examples{
38 | \dontshow{if (mlr3misc::require_namespaces(lrn("clust.featureless")$packages, quietly = TRUE)) withAutoprint(\{ # examplesIf}
39 | # Define the Learner and set parameter values
40 | learner = lrn("clust.featureless")
41 | print(learner)
42 |
43 | # Define a Task
44 | task = tsk("usarrests")
45 |
46 | # Train the learner on the task
47 | learner$train(task)
48 |
49 | # Print the model
50 | print(learner$model)
51 |
52 | # Make predictions for the task
53 | prediction = learner$predict(task)
54 |
55 | # Score the predictions
56 | prediction$score(task = task)
57 | \dontshow{\}) # examplesIf}
58 | }
59 | \seealso{
60 | \itemize{
61 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}:
62 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners}
63 | \item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners.
64 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Learner]{Learners}: \link[mlr3:mlr_learners]{mlr3::mlr_learners}
65 | \item \code{as.data.table(mlr_learners)} for a table of available \link[mlr3:Learner]{Learners} in the running session (depending on the loaded packages).
66 | \item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps.
67 | \item Extension packages for additional task types:
68 | \itemize{
69 | \item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis.
70 | \item \CRANpkg{mlr3cluster} for unsupervised clustering.
71 | }
72 | \item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces}
73 | for established default tuning spaces.
74 | }
75 |
76 | Other Learner:
77 | \code{\link{mlr_learners_clust.MBatchKMeans}},
78 | \code{\link{mlr_learners_clust.SimpleKMeans}},
79 | \code{\link{mlr_learners_clust.agnes}},
80 | \code{\link{mlr_learners_clust.ap}},
81 | \code{\link{mlr_learners_clust.bico}},
82 | \code{\link{mlr_learners_clust.birch}},
83 | \code{\link{mlr_learners_clust.cmeans}},
84 | \code{\link{mlr_learners_clust.cobweb}},
85 | \code{\link{mlr_learners_clust.dbscan}},
86 | \code{\link{mlr_learners_clust.dbscan_fpc}},
87 | \code{\link{mlr_learners_clust.diana}},
88 | \code{\link{mlr_learners_clust.em}},
89 | \code{\link{mlr_learners_clust.fanny}},
90 | \code{\link{mlr_learners_clust.ff}},
91 | \code{\link{mlr_learners_clust.hclust}},
92 | \code{\link{mlr_learners_clust.hdbscan}},
93 | \code{\link{mlr_learners_clust.kkmeans}},
94 | \code{\link{mlr_learners_clust.kmeans}},
95 | \code{\link{mlr_learners_clust.mclust}},
96 | \code{\link{mlr_learners_clust.meanshift}},
97 | \code{\link{mlr_learners_clust.optics}},
98 | \code{\link{mlr_learners_clust.pam}},
99 | \code{\link{mlr_learners_clust.xmeans}}
100 | }
101 | \concept{Learner}
102 | \section{Super classes}{
103 | \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3cluster:LearnerClust]{mlr3cluster::LearnerClust}} -> \code{LearnerClustFeatureless}
104 | }
105 | \section{Methods}{
106 | \subsection{Public methods}{
107 | \itemize{
108 | \item \href{#method-LearnerClustFeatureless-new}{\code{LearnerClustFeatureless$new()}}
109 | \item \href{#method-LearnerClustFeatureless-clone}{\code{LearnerClustFeatureless$clone()}}
110 | }
111 | }
112 | \if{html}{\out{
113 | Inherited methods
114 |
127 |
128 | }}
129 | \if{html}{\out{
}}
130 | \if{html}{\out{}}
131 | \if{latex}{\out{\hypertarget{method-LearnerClustFeatureless-new}{}}}
132 | \subsection{Method \code{new()}}{
133 | Creates a new instance of this \link[R6:R6Class]{R6} class.
134 | \subsection{Usage}{
135 | \if{html}{\out{}}\preformatted{LearnerClustFeatureless$new()}\if{html}{\out{
}}
136 | }
137 |
138 | }
139 | \if{html}{\out{
}}
140 | \if{html}{\out{}}
141 | \if{latex}{\out{\hypertarget{method-LearnerClustFeatureless-clone}{}}}
142 | \subsection{Method \code{clone()}}{
143 | The objects of this class are cloneable with this method.
144 | \subsection{Usage}{
145 | \if{html}{\out{}}\preformatted{LearnerClustFeatureless$clone(deep = FALSE)}\if{html}{\out{
}}
146 | }
147 |
148 | \subsection{Arguments}{
149 | \if{html}{\out{}}
150 | \describe{
151 | \item{\code{deep}}{Whether to make a deep clone.}
152 | }
153 | \if{html}{\out{
}}
154 | }
155 | }
156 | }
157 |
--------------------------------------------------------------------------------
/man/LearnerClust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LearnerClust.R
3 | \name{LearnerClust}
4 | \alias{LearnerClust}
5 | \title{Cluster Learner}
6 | \description{
7 | This Learner specializes \link[mlr3:Learner]{mlr3::Learner} for cluster problems:
8 | \itemize{
9 | \item \code{task_type} is set to \code{"clust"}.
10 | \item Creates \link[mlr3:Prediction]{mlr3::Prediction}s of class \link{PredictionClust}.
11 | \item Possible values for \code{predict_types} are:
12 | \itemize{
13 | \item \code{"partition"}: Integer indicating the cluster membership.
14 | \item \code{"prob"}: Probability for belonging to each cluster.
15 | }
16 | }
17 |
18 | Predefined learners can be found in the \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} \link[mlr3:mlr_learners]{mlr3::mlr_learners}.
19 | }
20 | \examples{
21 | library(mlr3)
22 | library(mlr3cluster)
23 | ids = mlr_learners$keys("^clust")
24 | ids
25 |
26 | # get a specific learner from mlr_learners:
27 | learner = lrn("clust.kmeans")
28 | print(learner)
29 | }
30 | \section{Super class}{
31 | \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerClust}
32 | }
33 | \section{Public fields}{
34 | \if{html}{\out{}}
35 | \describe{
36 | \item{\code{assignments}}{(\code{NULL} | \code{vector()})\cr
37 | Cluster assignments from learned model.}
38 |
39 | \item{\code{save_assignments}}{(\code{logical()})\cr
40 | Should assignments for 'train' data be saved in the learner?
41 | Default is \code{TRUE}.}
42 | }
43 | \if{html}{\out{
}}
44 | }
45 | \section{Methods}{
46 | \subsection{Public methods}{
47 | \itemize{
48 | \item \href{#method-LearnerClust-new}{\code{LearnerClust$new()}}
49 | \item \href{#method-LearnerClust-reset}{\code{LearnerClust$reset()}}
50 | \item \href{#method-LearnerClust-clone}{\code{LearnerClust$clone()}}
51 | }
52 | }
53 | \if{html}{\out{
54 | Inherited methods
55 |
67 |
68 | }}
69 | \if{html}{\out{
}}
70 | \if{html}{\out{}}
71 | \if{latex}{\out{\hypertarget{method-LearnerClust-new}{}}}
72 | \subsection{Method \code{new()}}{
73 | Creates a new instance of this \link[R6:R6Class]{R6} class.
74 | \subsection{Usage}{
75 | \if{html}{\out{}}\preformatted{LearnerClust$new(
76 | id,
77 | param_set = ps(),
78 | predict_types = "partition",
79 | feature_types = character(),
80 | properties = character(),
81 | packages = character(),
82 | label = NA_character_,
83 | man = NA_character_
84 | )}\if{html}{\out{
}}
85 | }
86 |
87 | \subsection{Arguments}{
88 | \if{html}{\out{}}
89 | \describe{
90 | \item{\code{id}}{(\code{character(1)})\cr
91 | Identifier for the new instance.}
92 |
93 | \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet})\cr
94 | Set of hyperparameters.}
95 |
96 | \item{\code{predict_types}}{(\code{character()})\cr
97 | Supported predict types. Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$learner_predict_types}}.}
98 |
99 | \item{\code{feature_types}}{(\code{character()})\cr
100 | Feature types the learner operates on. Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$task_feature_types}}.}
101 |
102 | \item{\code{properties}}{(\code{character()})\cr
103 | Set of properties of the \link[mlr3:Learner]{mlr3::Learner}.
104 | Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$learner_properties}}.
105 | The following properties are currently standardized and understood by learners in \CRANpkg{mlr3}:
106 | \itemize{
107 | \item \code{"missings"}: The learner can handle missing values in the data.
108 | \item \code{"weights"}: The learner supports observation weights.
109 | \item \code{"importance"}: The learner supports extraction of importance scores, i.e. comes with an \verb{$importance()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}).
110 | \item \code{"selected_features"}: The learner supports extraction of the set of selected features, i.e. comes with a \verb{$selected_features()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}).
111 | \item \code{"oob_error"}: The learner supports extraction of estimated out of bag error, i.e. comes with a \code{oob_error()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}).
112 | }}
113 |
114 | \item{\code{packages}}{(\code{character()})\cr
115 | Set of required packages.
116 | A warning is signaled by the constructor if at least one of the packages is not installed,
117 | but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.}
118 |
119 | \item{\code{label}}{(\code{character(1)})\cr
120 | Label for the new instance.}
121 |
122 | \item{\code{man}}{(\code{character(1)})\cr
123 | String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object.
124 | The referenced help package can be opened via method \verb{$help()}.}
125 | }
126 | \if{html}{\out{
}}
127 | }
128 | }
129 | \if{html}{\out{
}}
130 | \if{html}{\out{}}
131 | \if{latex}{\out{\hypertarget{method-LearnerClust-reset}{}}}
132 | \subsection{Method \code{reset()}}{
133 | Reset \code{assignments} field before calling parent's \code{reset()}.
134 | \subsection{Usage}{
135 | \if{html}{\out{}}\preformatted{LearnerClust$reset()}\if{html}{\out{
}}
136 | }
137 |
138 | }
139 | \if{html}{\out{
}}
140 | \if{html}{\out{}}
141 | \if{latex}{\out{\hypertarget{method-LearnerClust-clone}{}}}
142 | \subsection{Method \code{clone()}}{
143 | The objects of this class are cloneable with this method.
144 | \subsection{Usage}{
145 | \if{html}{\out{}}\preformatted{LearnerClust$clone(deep = FALSE)}\if{html}{\out{
}}
146 | }
147 |
148 | \subsection{Arguments}{
149 | \if{html}{\out{}}
150 | \describe{
151 | \item{\code{deep}}{Whether to make a deep clone.}
152 | }
153 | \if{html}{\out{
}}
154 | }
155 | }
156 | }
157 |
--------------------------------------------------------------------------------