├── .Rbuildignore ├── .editorconfig ├── .github ├── dependabot.yml └── workflows │ ├── pkgdown.yml │ └── r-cmd-check.yml ├── .gitignore ├── .ignore ├── .lintr ├── .pre-commit-config.yaml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── LearnerClust.R ├── LearnerClustAffinityPropagation.R ├── LearnerClustAgnes.R ├── LearnerClustBICO.R ├── LearnerClustBIRCH.R ├── LearnerClustCMeans.R ├── LearnerClustCobweb.R ├── LearnerClustDBSCAN.R ├── LearnerClustDBSCANfpc.R ├── LearnerClustDiana.R ├── LearnerClustEM.R ├── LearnerClustFanny.R ├── LearnerClustFarthestFirst.R ├── LearnerClustFeatureless.R ├── LearnerClustHDBSCAN.R ├── LearnerClustHclust.R ├── LearnerClustKKMeans.R ├── LearnerClustKMeans.R ├── LearnerClustMclust.R ├── LearnerClustMeanShift.R ├── LearnerClustMiniBatchKMeans.R ├── LearnerClustOPTICS.R ├── LearnerClustPAM.R ├── LearnerClustSimpleKMeans.R ├── LearnerClustXMeans.R ├── MeasureClust.R ├── MeasureClustInternal.R ├── PredictionClust.R ├── PredictionDataClust.R ├── TaskClust.R ├── TaskClust_ruspini.R ├── TaskClust_usarrest.R ├── as_prediction_clust.R ├── as_task_clust.R ├── bibentries.R ├── helper.R ├── measures.R └── zzz.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── man-roxygen ├── example.R ├── learner.R ├── measure_fpc.R ├── measure_sil.R ├── param_aggregator.R ├── param_average.R ├── param_backend.R ├── param_data_formats.R ├── param_feature_types.R ├── param_id.R ├── param_label.R ├── param_learner_properties.R ├── param_man.R ├── param_measure_properties.R ├── param_measures.R ├── param_minimize.R ├── param_packages.R ├── param_param_set.R ├── param_predict_sets.R ├── param_predict_type.R ├── param_predict_types.R ├── param_primary_key.R ├── param_range.R ├── param_rows.R ├── param_task_properties.R ├── param_task_type.R ├── seealso_learner.R ├── seealso_task.R └── task.R ├── man ├── LearnerClust.Rd ├── MeasureClust.Rd ├── PredictionClust.Rd ├── TaskClust.Rd ├── as_prediction_clust.Rd ├── as_task_clust.Rd ├── mlr3cluster-package.Rd ├── mlr_learners_clust.MBatchKMeans.Rd ├── mlr_learners_clust.SimpleKMeans.Rd ├── mlr_learners_clust.agnes.Rd ├── mlr_learners_clust.ap.Rd ├── mlr_learners_clust.bico.Rd ├── mlr_learners_clust.birch.Rd ├── mlr_learners_clust.cmeans.Rd ├── mlr_learners_clust.cobweb.Rd ├── mlr_learners_clust.dbscan.Rd ├── mlr_learners_clust.dbscan_fpc.Rd ├── mlr_learners_clust.diana.Rd ├── mlr_learners_clust.em.Rd ├── mlr_learners_clust.fanny.Rd ├── mlr_learners_clust.featureless.Rd ├── mlr_learners_clust.ff.Rd ├── mlr_learners_clust.hclust.Rd ├── mlr_learners_clust.hdbscan.Rd ├── mlr_learners_clust.kkmeans.Rd ├── mlr_learners_clust.kmeans.Rd ├── mlr_learners_clust.mclust.Rd ├── mlr_learners_clust.meanshift.Rd ├── mlr_learners_clust.optics.Rd ├── mlr_learners_clust.pam.Rd ├── mlr_learners_clust.xmeans.Rd ├── mlr_measures_clust.ch.Rd ├── mlr_measures_clust.dunn.Rd ├── mlr_measures_clust.silhouette.Rd ├── mlr_measures_clust.wss.Rd ├── mlr_tasks_ruspini.Rd └── mlr_tasks_usarrests.Rd ├── mlr3cluster.Rproj ├── pkgdown ├── _pkgdown.yml └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico └── tests ├── testthat.R └── testthat ├── helper.R ├── helper_expectations.R ├── helper_mlr3.R ├── test_LearnerClust.R ├── test_MeasureClust.R ├── test_PredictionClust.R ├── test_TaskClust.R ├── test_mlr_learners_clust_agnes.R ├── test_mlr_learners_clust_ap.R ├── test_mlr_learners_clust_bico.R ├── test_mlr_learners_clust_birch.R ├── test_mlr_learners_clust_cmeans.R ├── test_mlr_learners_clust_cobweb.R ├── test_mlr_learners_clust_dbscan.R ├── test_mlr_learners_clust_dbscan_fpc.R ├── test_mlr_learners_clust_diana.R ├── test_mlr_learners_clust_em.R ├── test_mlr_learners_clust_fanny.R ├── test_mlr_learners_clust_featureless.R ├── test_mlr_learners_clust_ff.R ├── test_mlr_learners_clust_hclust.R ├── test_mlr_learners_clust_hdbscan.R ├── test_mlr_learners_clust_kkmeans.R ├── test_mlr_learners_clust_kmeans.R ├── test_mlr_learners_clust_mbatchkmeans.R ├── test_mlr_learners_clust_mclust.R ├── test_mlr_learners_clust_meanshift.R ├── test_mlr_learners_clust_optics.R ├── test_mlr_learners_clust_pam.R ├── test_mlr_learners_clust_simplekmeans.R └── test_mlr_learners_clust_xmeans.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^CITATION\.cff$ 3 | ^CONTRIBUTING\.md$ 4 | ^CRAN-RELEASE$ 5 | ^CRAN-SUBMISSION$ 6 | ^Dockerfile$ 7 | ^LICENSE$ 8 | ^LICENSE\.md$ 9 | ^Meta$ 10 | ^README\.Rmd$ 11 | ^README\.html$ 12 | ^\.Renviron$ 13 | ^\.Rproj\.user$ 14 | ^\.ccache$ 15 | ^\.dockerignore$ 16 | ^\.editorconfig$ 17 | ^\.gitattributes$ 18 | ^\.github$ 19 | ^\.gitignore$ 20 | ^\.httr-oauth$ 21 | ^\.ignore$ 22 | ^\.lintr$ 23 | ^\.pre-commit-config\.yaml$ 24 | ^\.vscode$ 25 | ^_pkgdown\.yml$ 26 | ^attic$ 27 | ^codecov\.yml$ 28 | ^codemeta\.json$ 29 | ^compose\.ya?ml$ 30 | ^cran-comments\.md$ 31 | ^data-raw$ 32 | ^docs?$ 33 | ^inst/extdata/.+\.R$ 34 | ^man-roxygen$ 35 | ^paper$ 36 | ^pkgdown$ 37 | ^revdep$ 38 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # See http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | trim_trailing_whitespace = true 10 | 11 | [*.{r,R,md,Rmd}] 12 | indent_size = 2 13 | 14 | [*.{c,h}] 15 | indent_size = 4 16 | 17 | [*.{cpp,hpp}] 18 | indent_size = 4 19 | 20 | [{NEWS.md,DESCRIPTION,LICENSE}] 21 | max_line_length = 80 22 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | release: 11 | types: 12 | - published 13 | workflow_dispatch: 14 | 15 | name: pkgdown 16 | 17 | jobs: 18 | pkgdown: 19 | runs-on: ubuntu-latest 20 | 21 | concurrency: 22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | 32 | - uses: r-lib/actions/setup-r-dependencies@v2 33 | with: 34 | extra-packages: any::pkgdown, local::. 35 | needs: website 36 | 37 | - name: Install template 38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") 39 | shell: Rscript {0} 40 | 41 | - name: Build site 42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 43 | shell: Rscript {0} 44 | 45 | - name: Deploy 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4.7.3 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # Workflow sets WEKA_HOME and installs XMeans 2 | # r cmd check workflow of the mlr3 ecosystem v0.3.1 3 | # https://github.com/mlr-org/actions 4 | on: 5 | workflow_dispatch: 6 | inputs: 7 | debug_enabled: 8 | type: boolean 9 | description: 'Run the build with tmate debugging enabled' 10 | required: false 11 | default: false 12 | push: 13 | branches: 14 | - main 15 | pull_request: 16 | branches: 17 | - main 18 | 19 | name: r-cmd-check 20 | 21 | env: 22 | WEKA_HOME: /home/runner/work/mlr3cluster 23 | 24 | jobs: 25 | r-cmd-check: 26 | runs-on: ${{ matrix.config.os }} 27 | 28 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 29 | 30 | env: 31 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 32 | 33 | strategy: 34 | fail-fast: false 35 | matrix: 36 | config: 37 | - {os: ubuntu-latest, r: 'devel'} 38 | - {os: ubuntu-latest, r: 'release'} 39 | 40 | steps: 41 | - uses: actions/checkout@v4 42 | 43 | - uses: r-lib/actions/setup-pandoc@v2 44 | 45 | - uses: r-lib/actions/setup-r@v2 46 | with: 47 | r-version: ${{ matrix.config.r }} 48 | 49 | - uses: r-lib/actions/setup-r-dependencies@v2 50 | with: 51 | extra-packages: any::rcmdcheck 52 | needs: check 53 | 54 | - uses: mxschmitt/action-tmate@v3 55 | if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} 56 | with: 57 | limit-access-to-actor: true 58 | 59 | - name: Install XMeans 60 | run: | 61 | RWeka::WPM("refresh-cache") 62 | RWeka::WPM("install-package", "XMeans") 63 | shell: Rscript {0} 64 | 65 | - uses: r-lib/actions/check-r-package@v2 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | .RDataTmp 8 | 9 | # User-specific files 10 | .Ruserdata 11 | 12 | # Example code in package build process 13 | *-Ex.R 14 | 15 | # Output files from R CMD build 16 | /*.tar.gz 17 | 18 | # Output files from R CMD check 19 | /*.Rcheck/ 20 | 21 | # RStudio files 22 | .Rproj.user/ 23 | 24 | # produced vignettes 25 | vignettes/*.html 26 | vignettes/*.pdf 27 | doc/ 28 | Meta/ 29 | 30 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 31 | .httr-oauth 32 | 33 | # knitr and R markdown default cache directories 34 | *_cache/ 35 | /cache/ 36 | 37 | # Temporary files created by R markdown 38 | *.utf8.md 39 | *.knit.md 40 | 41 | # R Environment Variables 42 | .Renviron 43 | 44 | # pkgdown site 45 | docs/ 46 | 47 | # translation temp files 48 | po/*~ 49 | 50 | # RStudio Connect folder 51 | rsconnect/ 52 | 53 | # CRAN 54 | CRAN-RELEASE 55 | CRAN-SUBMISSION 56 | 57 | # MacOS 58 | .DS_Store 59 | -------------------------------------------------------------------------------- /.ignore: -------------------------------------------------------------------------------- 1 | man/ 2 | docs/ 3 | inst/doc/ 4 | attic/ 5 | vignettes/*.html 6 | pkgdown/ 7 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | # lintr defaults: https://lintr.r-lib.org/reference/default_linters.html 3 | # the following setup changes/removes certain linters 4 | assignment_linter = NULL, # do not force using <- for assignments 5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names 6 | commented_code_linter = NULL, # allow code in comments 7 | line_length_linter = line_length_linter(120L), 8 | object_length_linter = object_length_linter(40L), 9 | undesirable_function_linter = undesirable_function_linter(fun = c( 10 | # base messaging 11 | "stop" = "use stopf()", 12 | "warning" = "use warningf()", 13 | "message" = "use messagef()" 14 | )) 15 | ) 16 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # All available hooks: https://pre-commit.com/hooks.html 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit 3 | repos: 4 | - repo: https://github.com/lorenzwalthert/precommit 5 | rev: v0.4.3.9008 6 | hooks: 7 | - id: style-files 8 | args: [--style_pkg=styler.mlr, --style_fun=mlr_style] 9 | additional_dependencies: 10 | - mlr-org/styler.mlr 11 | - id: roxygenize 12 | additional_dependencies: 13 | - ClusterR 14 | - LPCM 15 | - R6 16 | - RWeka 17 | - apcluster 18 | - backports 19 | - checkmate 20 | - clue 21 | - cluster 22 | - data.table 23 | - dbscan 24 | - e1071 25 | - fpc 26 | - kernlab 27 | - mclust 28 | - mlbench 29 | - mlr3 30 | - mlr3misc 31 | - paradox 32 | - stream 33 | # codemeta must be above use-tidy-description when both are used 34 | - id: use-tidy-description 35 | - id: readme-rmd-rendered 36 | - id: parsable-R 37 | - id: no-browser-statement 38 | - id: deps-in-desc 39 | exclude: data-raw|inst 40 | - repo: https://github.com/pre-commit/pre-commit-hooks 41 | rev: v5.0.0 42 | hooks: 43 | - id: check-added-large-files 44 | args: [--maxkb=200] 45 | - id: file-contents-sorter 46 | files: '^\.Rbuildignore$' 47 | - id: end-of-file-fixer 48 | exclude: '\.Rd' 49 | - repo: local 50 | hooks: 51 | - id: forbid-to-commit 52 | name: Don't commit common R artifacts 53 | entry: Cannot commit .Rhistory, .RData, .Rds or .rds. 54 | language: fail 55 | files: '\.Rhistory|\.RData|\.Rds|\.rds$' 56 | # `exclude: ` to allow committing specific files. 57 | ci: 58 | autoupdate_schedule: monthly 59 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mlr3cluster 2 | Title: Cluster Extension for 'mlr3' 3 | Version: 0.1.11.9000 4 | Authors@R: c( 5 | person("Maximilian", "Mücke", , "muecke.maximilian@gmail.com", role = c("aut", "cre"), 6 | comment = c(ORCID = "0009-0000-9432-9795")), 7 | person("Damir", "Pulatov", , "damirpolat@protonmail.com", role = "aut"), 8 | person("Michel", "Lang", , "michellang@gmail.com", role = "aut", 9 | comment = c(ORCID = "0000-0001-9754-0393")), 10 | person("Marc", "Becker", , "marcbecker@posteo.de", role = "ctb", 11 | comment = c(ORCID = "0000-0002-8115-0400")) 12 | ) 13 | Description: Extends the 'mlr3' package with cluster analysis. 14 | License: LGPL-3 15 | URL: https://mlr3cluster.mlr-org.com, 16 | https://github.com/mlr-org/mlr3cluster 17 | BugReports: https://github.com/mlr-org/mlr3cluster/issues 18 | Depends: 19 | mlr3 (>= 0.21.1), 20 | R (>= 3.3.0) 21 | Imports: 22 | backports (>= 1.1.10), 23 | checkmate (>= 2.0.0), 24 | clue, 25 | cluster, 26 | data.table (>= 1.15.0), 27 | fpc, 28 | mlr3misc (>= 0.15.0), 29 | paradox (>= 1.0.1), 30 | R6, 31 | stats 32 | Suggests: 33 | apcluster, 34 | ClusterR (>= 1.3.1), 35 | dbscan, 36 | e1071, 37 | kernlab, 38 | LPCM, 39 | mclust, 40 | mlbench, 41 | RWeka, 42 | stream, 43 | testthat (>= 3.0.0) 44 | Config/testthat/edition: 3 45 | Encoding: UTF-8 46 | Roxygen: list(markdown = TRUE, r6 = TRUE) 47 | RoxygenNote: 7.3.2 48 | Collate: 49 | 'LearnerClust.R' 50 | 'zzz.R' 51 | 'LearnerClustAffinityPropagation.R' 52 | 'LearnerClustAgnes.R' 53 | 'LearnerClustBICO.R' 54 | 'LearnerClustBIRCH.R' 55 | 'LearnerClustCMeans.R' 56 | 'LearnerClustCobweb.R' 57 | 'LearnerClustDBSCAN.R' 58 | 'LearnerClustDBSCANfpc.R' 59 | 'LearnerClustDiana.R' 60 | 'LearnerClustEM.R' 61 | 'LearnerClustFanny.R' 62 | 'LearnerClustFarthestFirst.R' 63 | 'LearnerClustFeatureless.R' 64 | 'LearnerClustHDBSCAN.R' 65 | 'LearnerClustHclust.R' 66 | 'LearnerClustKKMeans.R' 67 | 'LearnerClustKMeans.R' 68 | 'LearnerClustMclust.R' 69 | 'LearnerClustMeanShift.R' 70 | 'LearnerClustMiniBatchKMeans.R' 71 | 'LearnerClustOPTICS.R' 72 | 'LearnerClustPAM.R' 73 | 'LearnerClustSimpleKMeans.R' 74 | 'LearnerClustXMeans.R' 75 | 'MeasureClust.R' 76 | 'measures.R' 77 | 'MeasureClustInternal.R' 78 | 'PredictionClust.R' 79 | 'PredictionDataClust.R' 80 | 'TaskClust.R' 81 | 'TaskClust_ruspini.R' 82 | 'TaskClust_usarrest.R' 83 | 'as_prediction_clust.R' 84 | 'as_task_clust.R' 85 | 'bibentries.R' 86 | 'helper.R' 87 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.data.table,PredictionClust) 4 | S3method(as_prediction,PredictionDataClust) 5 | S3method(as_prediction_clust,PredictionClust) 6 | S3method(as_prediction_clust,data.frame) 7 | S3method(as_task_clust,DataBackend) 8 | S3method(as_task_clust,TaskClust) 9 | S3method(as_task_clust,data.frame) 10 | S3method(as_task_clust,formula) 11 | S3method(c,PredictionDataClust) 12 | S3method(check_prediction_data,PredictionDataClust) 13 | S3method(create_empty_prediction_data,TaskClust) 14 | S3method(filter_prediction_data,PredictionDataClust) 15 | S3method(is_missing_prediction_data,PredictionDataClust) 16 | export(LearnerClust) 17 | export(LearnerClustAP) 18 | export(LearnerClustAgnes) 19 | export(LearnerClustBICO) 20 | export(LearnerClustBIRCH) 21 | export(LearnerClustCMeans) 22 | export(LearnerClustCobweb) 23 | export(LearnerClustDBSCAN) 24 | export(LearnerClustDBSCANfpc) 25 | export(LearnerClustDiana) 26 | export(LearnerClustEM) 27 | export(LearnerClustFanny) 28 | export(LearnerClustFarthestFirst) 29 | export(LearnerClustFeatureless) 30 | export(LearnerClustHDBSCAN) 31 | export(LearnerClustHclust) 32 | export(LearnerClustKKMeans) 33 | export(LearnerClustKMeans) 34 | export(LearnerClustMclust) 35 | export(LearnerClustMeanShift) 36 | export(LearnerClustMiniBatchKMeans) 37 | export(LearnerClustOPTICS) 38 | export(LearnerClustPAM) 39 | export(LearnerClustSimpleKMeans) 40 | export(LearnerClustXMeans) 41 | export(MeasureClust) 42 | export(PredictionClust) 43 | export(TaskClust) 44 | export(as_prediction_clust) 45 | export(as_task_clust) 46 | import(checkmate) 47 | import(data.table) 48 | import(mlr3) 49 | import(mlr3misc) 50 | import(paradox) 51 | importFrom(R6,R6Class) 52 | importFrom(clue,cl_predict) 53 | importFrom(cluster,silhouette) 54 | importFrom(fpc,cluster.stats) 55 | importFrom(stats,dist) 56 | importFrom(stats,model.frame) 57 | importFrom(stats,predict) 58 | importFrom(stats,runif) 59 | importFrom(stats,terms) 60 | importFrom(utils,bibentry) 61 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # mlr3cluster (development version) 2 | 3 | # mlr3cluster 0.1.11 4 | 5 | * fix: Mclust learner no longer sets the control default with a function not in 6 | import to stay compliant with {paradox} conventions 7 | 8 | # mlr3cluster 0.1.10 9 | 10 | * Add BIRCH learner from 'stream' package 11 | * Add BICO learner from 'stream' package 12 | 13 | # mlr3cluster 0.1.9 14 | 15 | * Add DBSCAN learner from 'fpc' package 16 | * Add HDBSCAN learner from 'dbscan' package 17 | * Add OPTICS learner from 'dbscan' package 18 | * Compatibility with upcoming 'paradox' release 19 | * Move to testthat3 20 | * Refactoring 21 | 22 | # mlr3cluster 0.1.8 23 | 24 | * Add new task based on `ruspini` dataset 25 | 26 | # mlr3cluster 0.1.7 27 | 28 | * Replace 'clusterCrit' measures with alternatives from 'cluster' and 'fpc' packages 29 | * Remove broken unloading test 30 | 31 | # mlr3cluster 0.1.6 32 | 33 | * Add states as row names to `usarrest` task. 34 | * Remove dictionary items after unloading package. 35 | 36 | # mlr3cluster 0.1.5 37 | 38 | * Added Mclust learner 39 | * Fix error associated with new dbscan release 40 | 41 | # mlr3cluster 0.1.4 42 | 43 | * code refactoring 44 | 45 | # mlr3cluster 0.1.3 46 | 47 | * code refactoring 48 | * small fixes 49 | * add filter to PredictionClust 50 | 51 | # mlr3cluster 0.1.2 52 | 53 | * Add Hclust 54 | * test and doc hclust 55 | * Add within sum of squares measure 56 | * add doc wss 57 | * code factor adaptions 58 | 59 | # mlr3cluster 0.1.1 60 | 61 | * Eight new learners 62 | * Added `assignments` and `save_assignments` fields to `LearnerClust` class 63 | 64 | # mlr3cluster 0.1.0 65 | 66 | * Initial upload to CRAN 67 | -------------------------------------------------------------------------------- /R/LearnerClust.R: -------------------------------------------------------------------------------- 1 | #' @title Cluster Learner 2 | #' 3 | #' @description 4 | #' This Learner specializes [mlr3::Learner] for cluster problems: 5 | #' * `task_type` is set to `"clust"`. 6 | #' * Creates [mlr3::Prediction]s of class [PredictionClust]. 7 | #' * Possible values for `predict_types` are: 8 | #' - `"partition"`: Integer indicating the cluster membership. 9 | #' - `"prob"`: Probability for belonging to each cluster. 10 | #' 11 | #' Predefined learners can be found in the [mlr3misc::Dictionary] [mlr3::mlr_learners]. 12 | #' 13 | #' @template param_id 14 | #' @template param_param_set 15 | #' @template param_predict_types 16 | #' @template param_feature_types 17 | #' @template param_learner_properties 18 | #' @template param_data_formats 19 | #' @template param_packages 20 | #' @template param_label 21 | #' @template param_man 22 | #' 23 | #' @export 24 | #' @examples 25 | #' library(mlr3) 26 | #' library(mlr3cluster) 27 | #' ids = mlr_learners$keys("^clust") 28 | #' ids 29 | #' 30 | #' # get a specific learner from mlr_learners: 31 | #' learner = lrn("clust.kmeans") 32 | #' print(learner) 33 | LearnerClust = R6Class("LearnerClust", 34 | inherit = Learner, 35 | public = list( 36 | #' @field assignments (`NULL` | `vector()`)\cr 37 | #' Cluster assignments from learned model. 38 | assignments = NULL, 39 | 40 | #' @field save_assignments (`logical()`)\cr 41 | #' Should assignments for 'train' data be saved in the learner? 42 | #' Default is `TRUE`. 43 | save_assignments = TRUE, 44 | 45 | #' @description 46 | #' Creates a new instance of this [R6][R6::R6Class] class. 47 | initialize = function( 48 | id, 49 | param_set = ps(), 50 | predict_types = "partition", 51 | feature_types = character(), 52 | properties = character(), 53 | packages = character(), 54 | label = NA_character_, 55 | man = NA_character_ 56 | ) { 57 | super$initialize( 58 | id = id, 59 | task_type = "clust", 60 | param_set = param_set, 61 | predict_types = predict_types, 62 | feature_types = feature_types, 63 | properties = properties, 64 | packages = c("mlr3cluster", packages), 65 | label = label, 66 | man = man 67 | ) 68 | }, 69 | 70 | #' @description 71 | #' Reset `assignments` field before calling parent's `reset()`. 72 | reset = function() { 73 | self$assignments = NULL 74 | super$reset() 75 | } 76 | ) 77 | ) 78 | -------------------------------------------------------------------------------- /R/LearnerClustAffinityPropagation.R: -------------------------------------------------------------------------------- 1 | #' @title Affinity Propagation Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.ap 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Affinity Propagation clustering implemented in [apcluster::apcluster()]. 7 | #' [apcluster::apcluster()] doesn't have set a default for similarity function. 8 | #' The predict method computes the closest cluster exemplar to find the 9 | #' cluster memberships for new data. 10 | #' The code is taken from 11 | #' [StackOverflow](https://stackoverflow.com/questions/34932692/using-the-apcluster-package-in-r-it-is-possible-to-score-unclustered-data-poi) 12 | #' answer by the `apcluster` package maintainer. 13 | #' 14 | #' @templateVar id clust.ap 15 | #' @template learner 16 | #' 17 | #' @references 18 | #' `r format_bib("bodenhofer2011apcluster", "frey2007clustering")` 19 | #' 20 | #' @export 21 | #' @template seealso_learner 22 | #' @template example 23 | LearnerClustAP = R6Class("LearnerClustAP", 24 | inherit = LearnerClust, 25 | public = list( 26 | #' @description 27 | #' Creates a new instance of this [R6][R6::R6Class] class. 28 | initialize = function() { 29 | param_set = ps( 30 | s = p_uty(tags = c("required", "train")), 31 | p = p_uty(default = NA, tags = "train", custom_check = check_numeric), 32 | q = p_dbl(0, 1, tags = "train"), 33 | maxits = p_int(1L, default = 1000L, tags = "train"), 34 | convits = p_int(1L, default = 100L, tags = "train"), 35 | lam = p_dbl(0.5, 1, default = 0.9, tags = "train"), 36 | includeSim = p_lgl(default = FALSE, tags = "train"), 37 | details = p_lgl(default = FALSE, tags = "train"), 38 | nonoise = p_lgl(default = FALSE, tags = "train"), 39 | seed = p_int(tags = "train") 40 | ) 41 | 42 | super$initialize( 43 | id = "clust.ap", 44 | feature_types = c("logical", "integer", "numeric"), 45 | predict_types = "partition", 46 | param_set = param_set, 47 | properties = c("partitional", "exclusive", "complete"), 48 | packages = "apcluster", 49 | man = "mlr3cluster::mlr_learners_clust.ap", 50 | label = "Affinity Propagation Clustering" 51 | ) 52 | } 53 | ), 54 | 55 | private = list( 56 | .train = function(task) { 57 | pv = self$param_set$get_values(tags = "train") 58 | d = task$data() 59 | m = invoke(apcluster::apcluster, x = d, .args = pv) 60 | # add data points corresponding to examplars 61 | attributes(m)$exemplar_data = d[m@exemplars, ] 62 | 63 | if (self$save_assignments) { 64 | self$assignments = apcluster::labels(m, type = "enum") 65 | } 66 | m 67 | }, 68 | 69 | .predict = function(task) { 70 | pv = self$param_set$get_values(tags = "train") 71 | sim_func = pv$s 72 | exemplar_data = attributes(self$model)$exemplar_data 73 | 74 | d = task$data() 75 | sim_mat = sim_func( 76 | rbind(exemplar_data, d), 77 | sel = (seq_len(nrow(d))) + nrow(exemplar_data) 78 | )[seq_len(nrow(exemplar_data)), ] 79 | partition = unname(apply(sim_mat, 2L, which.max)) 80 | PredictionClust$new(task = task, partition = partition) 81 | } 82 | ) 83 | ) 84 | 85 | #' @include zzz.R 86 | register_learner("clust.ap", LearnerClustAP) 87 | -------------------------------------------------------------------------------- /R/LearnerClustAgnes.R: -------------------------------------------------------------------------------- 1 | #' @title Agglomerative Hierarchical Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.agnes 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [cluster::agnes()]. 7 | #' The predict method uses [stats::cutree()] which cuts the tree resulting from 8 | #' hierarchical clustering into specified number of groups (see parameter `k`). 9 | #' The default number for `k` is 2. 10 | #' 11 | #' @templateVar id clust.agnes 12 | #' @template learner 13 | #' 14 | #' @references 15 | #' `r format_bib("kaufman2009finding")` 16 | #' 17 | #' @export 18 | #' @template seealso_learner 19 | #' @template example 20 | LearnerClustAgnes = R6Class("LearnerClustAgnes", 21 | inherit = LearnerClust, 22 | public = list( 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | param_set = ps( 27 | metric = p_fct(default = "euclidean", levels = c("euclidean", "manhattan"), tags = "train"), 28 | stand = p_lgl(default = FALSE, tags = "train"), 29 | method = p_fct( 30 | default = "average", 31 | levels = c("average", "single", "complete", "ward", "weighted", "flexible", "gaverage"), 32 | tags = "train" 33 | ), 34 | trace.lev = p_int(0L, default = 0L, tags = "train"), 35 | k = p_int(1L, default = 2L, tags = c("train", "predict")), 36 | par.method = p_uty( 37 | tags = "train", 38 | depends = quote(method %in% c("flexible", "gaverage")), 39 | custom_check = crate(function(x) { 40 | if (!(test_numeric(x) || test_list(x))) { 41 | return("`par.method` needs to be a numeric vector") 42 | } 43 | if (length(x) %in% c(1L, 3L, 4L)) TRUE else "`par.method` needs be of length 1, 3, or 4" 44 | }) 45 | ) 46 | ) 47 | 48 | param_set$set_values(k = 2L) 49 | 50 | super$initialize( 51 | id = "clust.agnes", 52 | feature_types = c("logical", "integer", "numeric"), 53 | predict_types = "partition", 54 | param_set = param_set, 55 | properties = c("hierarchical", "exclusive", "complete"), 56 | packages = "cluster", 57 | man = "mlr3cluster::mlr_learners_clust.agnes", 58 | label = "Agglomerative Hierarchical Clustering" 59 | ) 60 | } 61 | ), 62 | 63 | private = list( 64 | .train = function(task) { 65 | pv = self$param_set$get_values(tags = "train") 66 | m = invoke(cluster::agnes, 67 | x = task$data(), 68 | diss = FALSE, 69 | .args = remove_named(pv, "k") 70 | ) 71 | if (self$save_assignments) { 72 | self$assignments = stats::cutree(m, pv$k) 73 | } 74 | m 75 | }, 76 | 77 | .predict = function(task) { 78 | pv = self$param_set$get_values(tags = "predict") 79 | if (pv$k > task$nrow) { 80 | stopf("`k` needs to be between 1 and %i.", task$nrow) 81 | } 82 | 83 | warn_prediction_useless(self$id) 84 | 85 | PredictionClust$new(task = task, partition = self$assignments) 86 | } 87 | ) 88 | ) 89 | 90 | #' @include zzz.R 91 | register_learner("clust.agnes", LearnerClustAgnes) 92 | -------------------------------------------------------------------------------- /R/LearnerClustBICO.R: -------------------------------------------------------------------------------- 1 | #' @title BICO Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.bico 4 | #' 5 | #' @description 6 | #' BICO (Fast computation of k-means coresets in a data stream) clustering. 7 | #' Calls [stream::DSC_BICO()] from \CRANpkg{stream}. 8 | #' 9 | #' @templateVar id clust.bico 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("fichtenberger2013bico", "hahsler2017stream")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustBICO = R6Class("LearnerClustBICO", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | k = p_int(1L, default = 5L, tags = "train"), 26 | space = p_int(1L, default = 10L, tags = "train"), 27 | p = p_int(1L, default = 10L, tags = "train"), 28 | iterations = p_int(1L, default = 10L, tags = "train") 29 | ) 30 | 31 | super$initialize( 32 | id = "clust.bico", 33 | feature_types = c("integer", "numeric"), 34 | predict_types = "partition", 35 | param_set = param_set, 36 | properties = c("partitional", "exclusive", "complete"), 37 | packages = "stream", 38 | man = "mlr3cluster::mlr_learners_clust.bico", 39 | label = "BICO Clustering" 40 | ) 41 | } 42 | ), 43 | 44 | private = list( 45 | .train = function(task) { 46 | pv = self$param_set$get_values(tags = "train") 47 | dt = task$data() 48 | m = invoke(stream::DSC_BICO, .args = pv) 49 | x = stream::DSD_Memory(dt) 50 | stats::update(m, x, n = nrow(dt)) 51 | 52 | if (self$save_assignments) { 53 | self$assignments = as.integer(invoke(predict, m, newdata = dt)[[1L]]) 54 | } 55 | m 56 | }, 57 | 58 | .predict = function(task) { 59 | partition = as.integer(invoke(predict, self$model, newdata = task$data())[[1L]]) 60 | PredictionClust$new(task = task, partition = partition) 61 | } 62 | ) 63 | ) 64 | 65 | #' @include zzz.R 66 | register_learner("clust.bico", LearnerClustBICO) 67 | -------------------------------------------------------------------------------- /R/LearnerClustBIRCH.R: -------------------------------------------------------------------------------- 1 | #' @title BIRCH Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.birch 4 | #' 5 | #' @description 6 | #' BIRCH (Balanced Iterative Reducing Clustering using Hierarchies) clustering. 7 | #' Calls [stream::DSC_BIRCH()] from \CRANpkg{stream}. 8 | #' 9 | #' @templateVar id clust.birch 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("zhang1996birch", "zhang1997birch", "hahsler2017stream")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustBIRCH = R6Class("LearnerClustBIRCH", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | threshold = p_dbl(0L, tags = c("train", "required")), 26 | branching = p_int(1L, tags = c("train", "required")), 27 | maxLeaf = p_int(1L, tags = c("train", "required")), 28 | maxMem = p_int(0L, default = 0L, tags = "train"), 29 | outlierThreshold = p_dbl(default = 0.25, tags = "train") 30 | ) 31 | 32 | super$initialize( 33 | id = "clust.birch", 34 | feature_types = c("integer", "numeric"), 35 | predict_types = "partition", 36 | param_set = param_set, 37 | properties = c("hierarchical", "exclusive", "complete"), 38 | packages = "stream", 39 | man = "mlr3cluster::mlr_learners_clust.birch", 40 | label = "BIRCH Clustering" 41 | ) 42 | } 43 | ), 44 | 45 | private = list( 46 | .train = function(task) { 47 | pv = self$param_set$get_values(tags = "train") 48 | dt = task$data() 49 | m = invoke(stream::DSC_BIRCH, .args = pv) 50 | x = stream::DSD_Memory(dt) 51 | stats::update(m, x, n = nrow(dt)) 52 | 53 | if (self$save_assignments) { 54 | self$assignments = as.integer(invoke(predict, m, newdata = dt)[[1L]]) 55 | } 56 | m 57 | }, 58 | 59 | .predict = function(task) { 60 | partition = as.integer(invoke(predict, self$model, newdata = task$data())[[1L]]) 61 | PredictionClust$new(task = task, partition = partition) 62 | } 63 | ) 64 | ) 65 | 66 | #' @include zzz.R 67 | register_learner("clust.birch", LearnerClustBIRCH) 68 | -------------------------------------------------------------------------------- /R/LearnerClustCMeans.R: -------------------------------------------------------------------------------- 1 | #' @title Fuzzy C-Means Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.cmeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for fuzzy clustering implemented in [e1071::cmeans()]. 7 | #' [e1071::cmeans()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `centers` parameter here is set to 2 by default. 9 | #' The predict method uses [clue::cl_predict()] to compute the 10 | #' cluster memberships for new data. 11 | #' 12 | #' @templateVar id clust.cmeans 13 | #' @template learner 14 | #' 15 | #' @references 16 | #' `r format_bib("dimitriadou2008misc", "bezdek2013pattern")` 17 | #' 18 | #' @export 19 | #' @template seealso_learner 20 | #' @template example 21 | LearnerClustCMeans = R6Class("LearnerClustCMeans", 22 | inherit = LearnerClust, 23 | public = list( 24 | #' @description 25 | #' Creates a new instance of this [R6][R6::R6Class] class. 26 | initialize = function() { 27 | param_set = ps( 28 | centers = p_uty( 29 | tags = c("required", "train"), custom_check = check_centers 30 | ), 31 | iter.max = p_int(1L, default = 100L, tags = "train"), 32 | verbose = p_lgl(default = FALSE, tags = "train"), 33 | dist = p_fct(levels = c("euclidean", "manhattan"), default = "euclidean", tags = "train"), 34 | method = p_fct(levels = c("cmeans", "ufcl"), default = "cmeans", tags = "train"), 35 | m = p_dbl(1, default = 2, tags = "train"), 36 | rate.par = p_dbl(0, 1, tags = "train", depends = quote(method == "ufcl")), 37 | weights = p_uty(default = 1L, tags = "train", custom_check = crate(function(x) { 38 | if (test_numeric(x) && all(x > 0) || check_count(x, positive = TRUE)) { 39 | TRUE 40 | } else { 41 | "`weights` must be positive numeric vector or a single positive number" 42 | } 43 | })), 44 | control = p_uty(tags = "train") 45 | ) 46 | 47 | param_set$set_values(centers = 2L) 48 | 49 | super$initialize( 50 | id = "clust.cmeans", 51 | feature_types = c("logical", "integer", "numeric"), 52 | predict_types = c("partition", "prob"), 53 | param_set = param_set, 54 | properties = c("partitional", "fuzzy", "complete"), 55 | packages = "e1071", 56 | man = "mlr3cluster::mlr_learners_clust.cmeans", 57 | label = "Fuzzy C-Means Clustering Learner" 58 | ) 59 | } 60 | ), 61 | 62 | private = list( 63 | .train = function(task) { 64 | pv = self$param_set$get_values(tags = "train") 65 | assert_centers_param(pv$centers, task, test_data_frame, "centers") 66 | 67 | m = invoke(e1071::cmeans, x = task$data(), .args = pv, .opts = allow_partial_matching) 68 | if (self$save_assignments) { 69 | self$assignments = m$cluster 70 | } 71 | m 72 | }, 73 | 74 | .predict = function(task) { 75 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids")) 76 | prob = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "memberships")) 77 | colnames(prob) = seq_len(ncol(prob)) 78 | 79 | PredictionClust$new(task = task, partition = partition, prob = prob) 80 | } 81 | ) 82 | ) 83 | 84 | #' @include zzz.R 85 | register_learner("clust.cmeans", LearnerClustCMeans) 86 | -------------------------------------------------------------------------------- /R/LearnerClustCobweb.R: -------------------------------------------------------------------------------- 1 | #' @title Cobweb Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.cobweb 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Cobweb clustering implemented in [RWeka::Cobweb()]. 7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the 8 | #' cluster memberships for new data. 9 | #' 10 | #' @templateVar id clust.cobweb 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("witten2002data", "fisher1987knowledge", "gennari1989models")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustCobweb = R6Class("LearnerClustCobweb", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps( 26 | A = p_dbl(0, default = 1, tags = "train"), 27 | C = p_dbl(0, default = 0.002, tags = "train"), 28 | S = p_int(1L, default = 42L, tags = "train") 29 | ) 30 | 31 | super$initialize( 32 | id = "clust.cobweb", 33 | feature_types = c("logical", "integer", "numeric"), 34 | predict_types = "partition", 35 | param_set = param_set, 36 | properties = c("partitional", "exclusive", "complete"), 37 | packages = "RWeka", 38 | man = "mlr3cluster::mlr_learners_clust.cobweb", 39 | label = "Cobweb Clustering" 40 | ) 41 | } 42 | ), 43 | 44 | private = list( 45 | .train = function(task) { 46 | pv = self$param_set$get_values(tags = "train") 47 | ctrl = invoke(RWeka::Weka_control, .args = pv) 48 | m = invoke(RWeka::Cobweb, x = task$data(), control = ctrl) 49 | if (self$save_assignments) { 50 | self$assignments = unname(m$class_ids + 1L) 51 | } 52 | m 53 | }, 54 | 55 | .predict = function(task) { 56 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L 57 | PredictionClust$new(task = task, partition = partition) 58 | } 59 | ) 60 | ) 61 | 62 | #' @include zzz.R 63 | register_learner("clust.cobweb", LearnerClustCobweb) 64 | -------------------------------------------------------------------------------- /R/LearnerClustDBSCAN.R: -------------------------------------------------------------------------------- 1 | #' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.dbscan 4 | #' 5 | #' @description 6 | #' DBSCAN (Density-based spatial clustering of applications with noise) clustering. 7 | #' Calls [dbscan::dbscan()] from \CRANpkg{dbscan}. 8 | #' 9 | #' @templateVar id clust.dbscan 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("hahsler2019dbscan", "ester1996density")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | eps = p_dbl(0, tags = c("required", "train")), 26 | minPts = p_int(0L, default = 5L, tags = "train"), 27 | borderPoints = p_lgl(default = TRUE, tags = "train"), 28 | weights = p_uty(tags = "train", custom_check = check_numeric), 29 | search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"), 30 | bucketSize = p_int(1L, default = 10L, tags = "train", depends = quote(search == "kdtree")), 31 | splitRule = p_fct( 32 | levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"), 33 | default = "SUGGEST", 34 | tags = "train", 35 | depends = quote(search == "kdtree") 36 | ), 37 | approx = p_dbl(default = 0, tags = "train") 38 | ) 39 | 40 | super$initialize( 41 | id = "clust.dbscan", 42 | feature_types = c("logical", "integer", "numeric"), 43 | predict_types = "partition", 44 | param_set = param_set, 45 | properties = c("density", "exclusive", "complete"), 46 | packages = "dbscan", 47 | man = "mlr3cluster::mlr_learners_clust.dbscan", 48 | label = "Density-Based Clustering" 49 | ) 50 | } 51 | ), 52 | 53 | private = list( 54 | .train = function(task) { 55 | pv = self$param_set$get_values(tags = "train") 56 | data = task$data() 57 | m = invoke(dbscan::dbscan, x = data, .args = pv) 58 | m = insert_named(m, list(data = data)) 59 | if (self$save_assignments) { 60 | self$assignments = m$cluster 61 | } 62 | m 63 | }, 64 | 65 | .predict = function(task) { 66 | partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data) 67 | PredictionClust$new(task = task, partition = partition) 68 | } 69 | ) 70 | ) 71 | 72 | #' @include zzz.R 73 | register_learner("clust.dbscan", LearnerClustDBSCAN) 74 | -------------------------------------------------------------------------------- /R/LearnerClustDBSCANfpc.R: -------------------------------------------------------------------------------- 1 | #' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.dbscan_fpc 4 | #' 5 | #' @description 6 | #' DBSCAN (Density-based spatial clustering of applications with noise) clustering. 7 | #' Calls [fpc::dbscan()] from \CRANpkg{fpc}. 8 | #' 9 | #' @templateVar id clust.dbscan_fpc 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("ester1996density")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | eps = p_dbl(0, tags = c("required", "train")), 26 | MinPts = p_int(0L, default = 5L, tags = "train"), 27 | scale = p_lgl(default = FALSE, tags = "train"), 28 | method = p_fct(levels = c("hybrid", "raw", "dist"), tags = "train"), 29 | seeds = p_lgl(default = TRUE, tags = "train"), 30 | showplot = p_uty(default = FALSE, tags = "train", custom_check = crate(function(x) { 31 | if (test_flag(x) || test_int(x, lower = 0L, upper = 2L)) { 32 | TRUE 33 | } else { 34 | "`showplot` need to be either logical or integer between 0 and 2" 35 | } 36 | })), 37 | countmode = p_uty(default = NULL, tags = "train", custom_check = crate(function(x) { 38 | if (test_integer(x, null.ok = TRUE)) { 39 | TRUE 40 | } else { 41 | "`countmode` need to be NULL or vector of integers" 42 | } 43 | })) 44 | ) 45 | 46 | param_set$set_values(MinPts = 5L, scale = FALSE, seeds = TRUE, showplot = FALSE, countmode = NULL) 47 | 48 | super$initialize( 49 | id = "clust.dbscan_fpc", 50 | packages = "fpc", 51 | feature_types = c("logical", "integer", "numeric"), 52 | predict_types = "partition", 53 | param_set = param_set, 54 | properties = c("density", "exclusive", "complete"), 55 | man = "mlr3cluster::mlr_learners_clust.dbscan_fpc", 56 | label = "Density-Based Clustering with fpc" 57 | ) 58 | } 59 | ), 60 | 61 | private = list( 62 | .train = function(task) { 63 | pv = self$param_set$get_values(tags = "train") 64 | data = task$data() 65 | m = invoke(fpc::dbscan, data = data, .args = pv) 66 | m = insert_named(m, list(data = data)) 67 | if (self$save_assignments) { 68 | self$assignments = m$cluster 69 | } 70 | m 71 | }, 72 | 73 | .predict = function(task) { 74 | partition = as.integer(invoke(predict, self$model, data = self$model$data), newdata = task$data()) 75 | PredictionClust$new(task = task, partition = partition) 76 | } 77 | ) 78 | ) 79 | 80 | #' @include zzz.R 81 | register_learner("clust.dbscan_fpc", LearnerClustDBSCANfpc) 82 | -------------------------------------------------------------------------------- /R/LearnerClustDiana.R: -------------------------------------------------------------------------------- 1 | #' @title Divisive Hierarchical Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.diana 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for divisive hierarchical clustering implemented in [cluster::diana()]. 7 | #' The predict method uses [stats::cutree()] which cuts the tree resulting from 8 | #' hierarchical clustering into specified number of groups (see parameter `k`). 9 | #' The default value for `k` is 2. 10 | #' 11 | #' @templateVar id clust.diana 12 | #' @template learner 13 | #' 14 | #' @references 15 | #' `r format_bib("kaufman2009finding")` 16 | #' 17 | #' @export 18 | #' @template seealso_learner 19 | #' @template example 20 | LearnerClustDiana = R6Class("LearnerClustDiana", 21 | inherit = LearnerClust, 22 | public = list( 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | param_set = ps( 27 | metric = p_fct(default = "euclidean", levels = c("euclidean", "manhattan"), tags = "train"), 28 | stand = p_lgl(default = FALSE, tags = "train"), 29 | trace.lev = p_int(0L, default = 0L, tags = "train"), 30 | k = p_int(1L, default = 2L, tags = c("train", "predict")) 31 | ) 32 | 33 | param_set$set_values(k = 2L) 34 | 35 | super$initialize( 36 | id = "clust.diana", 37 | feature_types = c("logical", "integer", "numeric"), 38 | predict_types = "partition", 39 | param_set = param_set, 40 | properties = c("hierarchical", "exclusive", "complete"), 41 | packages = "cluster", 42 | man = "mlr3cluster::mlr_learners_clust.diana", 43 | label = "Divisive Hierarchical Clustering" 44 | ) 45 | } 46 | ), 47 | 48 | private = list( 49 | .train = function(task) { 50 | pv = self$param_set$get_values(tags = "train") 51 | m = invoke(cluster::diana, 52 | x = task$data(), 53 | diss = FALSE, 54 | .args = remove_named(pv, "k") 55 | ) 56 | if (self$save_assignments) { 57 | self$assignments = stats::cutree(m, pv$k) 58 | } 59 | m 60 | }, 61 | 62 | .predict = function(task) { 63 | pv = self$param_set$get_values(tags = "predict") 64 | if (pv$k > task$nrow) { 65 | stopf("`k` needs to be between 1 and %i.", task$nrow) 66 | } 67 | 68 | warn_prediction_useless(self$id) 69 | 70 | PredictionClust$new(task = task, partition = self$assignments) 71 | } 72 | ) 73 | ) 74 | 75 | #' @include zzz.R 76 | register_learner("clust.diana", LearnerClustDiana) 77 | -------------------------------------------------------------------------------- /R/LearnerClustEM.R: -------------------------------------------------------------------------------- 1 | #' @title Expectation-Maximization Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.em 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Expectation-Maximization clustering implemented in 7 | #' [RWeka::list_Weka_interfaces()]. 8 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the 9 | #' cluster memberships for new data. 10 | #' 11 | #' @templateVar id clust.em 12 | #' @template learner 13 | #' 14 | #' @references 15 | #' `r format_bib("witten2002data", "dempster1977maximum")` 16 | #' 17 | #' @export 18 | #' @template seealso_learner 19 | #' @template example 20 | LearnerClustEM = R6Class("LearnerClustEM", 21 | inherit = LearnerClust, 22 | public = list( 23 | #' @description 24 | #' Creates a new instance of this [R6][R6::R6Class] class. 25 | initialize = function() { 26 | param_set = ps( 27 | I = p_int(1L, default = 100L, tags = "train"), 28 | ll_cv = p_dbl(1e-6, default = 1e-6, tags = "train"), 29 | ll_iter = p_dbl(1e-6, default = 1e-6, tags = "train"), 30 | M = p_dbl(1e-6, default = 1e-6, tags = "train"), 31 | max = p_int(-1L, default = -1L, tags = "train"), 32 | N = p_int(-1L, default = -1L, tags = "train"), 33 | num_slots = p_int(1L, default = 1L, tags = "train"), 34 | S = p_int(0L, default = 100L, tags = "train"), 35 | X = p_int(1L, default = 10L, tags = "train"), 36 | K = p_int(1L, default = 10L, tags = "train"), 37 | V = p_lgl(default = FALSE, tags = "train"), 38 | output_debug_info = p_lgl(default = FALSE, tags = "train") 39 | ) 40 | 41 | super$initialize( 42 | id = "clust.em", 43 | feature_types = c("logical", "integer", "numeric"), 44 | predict_types = "partition", 45 | param_set = param_set, 46 | properties = c("partitional", "exclusive", "complete"), 47 | packages = "RWeka", 48 | man = "mlr3cluster::mlr_learners_clust.em", 49 | label = "Expectation-Maximization Clustering" 50 | ) 51 | } 52 | ), 53 | 54 | private = list( 55 | .train = function(task) { 56 | pv = self$param_set$get_values(tags = "train") 57 | names(pv) = chartr("_", "-", names(pv)) 58 | ctrl = invoke(RWeka::Weka_control, .args = pv) 59 | m = invoke(RWeka::make_Weka_clusterer("weka/clusterers/EM"), x = task$data(), control = ctrl) 60 | if (self$save_assignments) { 61 | self$assignments = unname(m$class_ids + 1L) 62 | } 63 | m 64 | }, 65 | 66 | .predict = function(task) { 67 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L 68 | PredictionClust$new(task = task, partition = partition) 69 | } 70 | ) 71 | ) 72 | 73 | #' @include zzz.R 74 | register_learner("clust.em", LearnerClustEM) 75 | -------------------------------------------------------------------------------- /R/LearnerClustFanny.R: -------------------------------------------------------------------------------- 1 | #' @title Fuzzy Analysis Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.fanny 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for fuzzy clustering implemented in [cluster::fanny()]. 7 | #' [cluster::fanny()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `k` parameter which corresponds to the number 9 | #' of clusters here is set to 2 by default. 10 | #' The predict method copies cluster assignments and memberships 11 | #' generated for train data. The predict does not work for 12 | #' new data. 13 | #' 14 | #' @templateVar id clust.fanny 15 | #' @template learner 16 | #' 17 | #' @references 18 | #' `r format_bib("kaufman2009finding")` 19 | #' 20 | #' @export 21 | #' @template seealso_learner 22 | #' @template example 23 | LearnerClustFanny = R6Class("LearnerClustFanny", 24 | inherit = LearnerClust, 25 | public = list( 26 | #' @description 27 | #' Creates a new instance of this [R6][R6::R6Class] class. 28 | initialize = function() { 29 | param_set = ps( 30 | k = p_int(1L, tags = c("required", "train")), 31 | memb.exp = p_dbl(1, default = 2, tags = "train"), 32 | metric = p_fct(default = "euclidean", levels = c("euclidean", "manhattan", "SqEuclidean"), tags = "train"), 33 | stand = p_lgl(default = FALSE, tags = "train"), 34 | maxit = p_int(0L, default = 500L, tags = "train"), 35 | tol = p_dbl(0, default = 1e-15, tags = "train"), 36 | trace.lev = p_int(0L, default = 0L, tags = "train") 37 | ) 38 | 39 | param_set$set_values(k = 2L) 40 | 41 | super$initialize( 42 | id = "clust.fanny", 43 | feature_types = c("logical", "integer", "numeric"), 44 | predict_types = c("partition", "prob"), 45 | param_set = param_set, 46 | properties = c("partitional", "fuzzy", "complete"), 47 | packages = "cluster", 48 | man = "mlr3cluster::mlr_learners_clust.fanny", 49 | label = "Fuzzy Analysis Clustering" 50 | ) 51 | } 52 | ), 53 | 54 | private = list( 55 | .train = function(task) { 56 | pv = self$param_set$get_values(tags = "train") 57 | m = invoke(cluster::fanny, x = task$data(), .args = pv) 58 | if (self$save_assignments) { 59 | self$assignments = m$clustering 60 | } 61 | m 62 | }, 63 | 64 | .predict = function(task) { 65 | warn_prediction_useless(self$id) 66 | 67 | partition = self$model$clustering 68 | 69 | prob = self$model$membership 70 | colnames(prob) = seq_len(ncol(prob)) 71 | 72 | PredictionClust$new(task = task, partition = partition, prob = prob) 73 | } 74 | ) 75 | ) 76 | 77 | #' @include zzz.R 78 | register_learner("clust.fanny", LearnerClustFanny) 79 | -------------------------------------------------------------------------------- /R/LearnerClustFarthestFirst.R: -------------------------------------------------------------------------------- 1 | #' @title Farthest First Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.ff 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Farthest First clustering implemented in [RWeka::FarthestFirst()]. 7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the 8 | #' cluster memberships for new data. 9 | #' 10 | #' @templateVar id clust.ff 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("witten2002data", "hochbaum1985best")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustFarthestFirst = R6Class("LearnerClustFF", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps( 26 | N = p_int(1L, default = 2L, tags = "train"), 27 | S = p_int(1L, default = 1L, tags = "train"), 28 | output_debug_info = p_lgl(default = FALSE, tags = "train") 29 | ) 30 | 31 | super$initialize( 32 | id = "clust.ff", 33 | feature_types = c("logical", "integer", "numeric"), 34 | predict_types = "partition", 35 | param_set = param_set, 36 | properties = c("partitional", "exclusive", "complete"), 37 | packages = "RWeka", 38 | man = "mlr3cluster::mlr_learners_clust.ff", 39 | label = "Farthest First Clustering" 40 | ) 41 | } 42 | ), 43 | 44 | private = list( 45 | .train = function(task) { 46 | pv = self$param_set$get_values(tags = "train") 47 | names(pv) = chartr("_", "-", names(pv)) 48 | ctrl = invoke(RWeka::Weka_control, .args = pv) 49 | m = invoke(RWeka::FarthestFirst, x = task$data(), control = ctrl) 50 | if (self$save_assignments) { 51 | self$assignments = unname(m$class_ids + 1L) 52 | } 53 | m 54 | }, 55 | 56 | .predict = function(task) { 57 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L 58 | PredictionClust$new(task = task, partition = partition) 59 | } 60 | ) 61 | ) 62 | 63 | #' @include zzz.R 64 | register_learner("clust.ff", LearnerClustFarthestFirst) 65 | -------------------------------------------------------------------------------- /R/LearnerClustFeatureless.R: -------------------------------------------------------------------------------- 1 | #' @title Featureless Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.featureless 4 | #' 5 | #' @description 6 | #' A simple [LearnerClust] which randomly (but evenly) assigns observations to 7 | #' `num_clusters` partitions (default: 1 partition). 8 | #' 9 | #' @templateVar id clust.featureless 10 | #' @template learner 11 | #' 12 | #' @export 13 | #' @template seealso_learner 14 | #' @template example 15 | LearnerClustFeatureless = R6Class("LearnerClustFeatureless", 16 | inherit = LearnerClust, 17 | public = list( 18 | #' @description 19 | #' Creates a new instance of this [R6][R6::R6Class] class. 20 | initialize = function() { 21 | param_set = ps(num_clusters = p_int(1L, tags = c("required", "train", "predict"))) 22 | param_set$set_values(num_clusters = 1L) 23 | 24 | super$initialize( 25 | id = "clust.featureless", 26 | feature_types = c("logical", "integer", "numeric"), 27 | predict_types = c("partition", "prob"), 28 | param_set = param_set, 29 | properties = c("partitional", "exclusive", "complete", "missings"), 30 | man = "mlr3cluster::mlr_learners_clust.featureless", 31 | label = "Featureless Clustering" 32 | ) 33 | } 34 | ), 35 | 36 | private = list( 37 | .train = function(task) { 38 | pv = self$param_set$get_values(tags = "train") 39 | k = pv$num_clusters 40 | n = task$nrow 41 | 42 | if (k > n) { 43 | stopf("number of clusters must lie between 1 and `nrow(data)`.") 44 | } 45 | 46 | partition = chunk(n, n_chunks = k) 47 | 48 | if (self$save_assignments) { 49 | self$assignments = partition 50 | } 51 | 52 | set_class( 53 | list(clustering = partition, features = task$feature_names), 54 | "clust.featureless_model" 55 | ) 56 | }, 57 | 58 | .predict = function(task) { 59 | pv = self$param_set$get_values(tags = "predict") 60 | n = task$nrow 61 | k = pv$num_clusters 62 | 63 | partition = chunk(n, n_chunks = k) 64 | prob = NULL 65 | 66 | if (self$predict_type == "prob") { 67 | prob = matrix(runif(n * k), nrow = n, ncol = k) 68 | prob = prob / rowSums(prob) 69 | 70 | # reorder rows so that the max probability corresponds to 71 | # the selected partition in `partition` 72 | prob = do.call(rbind, map(seq_along(partition), function(i) { 73 | x = prob[i, , drop = TRUE] 74 | pos = which_max(x) 75 | if (pos == i) x else append(x[-pos], x[pos], after = partition[i] - 1L) 76 | })) 77 | } 78 | 79 | PredictionClust$new(task = task, partition = partition, prob = prob) 80 | } 81 | ) 82 | ) 83 | 84 | #' @include zzz.R 85 | register_learner("clust.featureless", LearnerClustFeatureless) 86 | -------------------------------------------------------------------------------- /R/LearnerClustHDBSCAN.R: -------------------------------------------------------------------------------- 1 | #' @title Hierarchical DBSCAN (HDBSCAN) Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.hdbscan 4 | #' 5 | #' @description 6 | #' HDBSCAN (Hierarchical DBSCAN) clustering. 7 | #' Calls [dbscan::hdbscan()] from \CRANpkg{dbscan}. 8 | #' 9 | #' @templateVar id clust.hdbscan 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("hahsler2019dbscan", "campello2013density")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | minPts = p_int(0L, tags = c("required", "train")), 26 | gen_hdbscan_tree = p_lgl(default = FALSE, tags = "train"), 27 | gen_simplified_tree = p_lgl(default = FALSE, tags = "train"), 28 | verbose = p_lgl(default = FALSE, tags = "train") 29 | ) 30 | 31 | super$initialize( 32 | id = "clust.hdbscan", 33 | feature_types = c("logical", "integer", "numeric"), 34 | predict_types = "partition", 35 | param_set = param_set, 36 | properties = c("density", "exclusive", "complete"), 37 | packages = "dbscan", 38 | man = "mlr3cluster::mlr_learners_clust.hdbscan", 39 | label = "HDBSCAN Clustering" 40 | ) 41 | } 42 | ), 43 | 44 | private = list( 45 | .train = function(task) { 46 | pv = self$param_set$get_values(tags = "train") 47 | data = task$data() 48 | m = invoke(dbscan::hdbscan, x = data, .args = pv) 49 | m = insert_named(m, list(data = data)) 50 | 51 | if (self$save_assignments) { 52 | self$assignments = m$cluster 53 | } 54 | m 55 | }, 56 | 57 | .predict = function(task) { 58 | partition = as.integer(invoke(predict, self$model, newdata = task$data(), data = self$model$data)) 59 | PredictionClust$new(task = task, partition = partition) 60 | } 61 | ) 62 | ) 63 | 64 | #' @include zzz.R 65 | register_learner("clust.hdbscan", LearnerClustHDBSCAN) 66 | -------------------------------------------------------------------------------- /R/LearnerClustHclust.R: -------------------------------------------------------------------------------- 1 | #' @title Agglomerative Hierarchical Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.hclust 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [stats::hclust()]. 7 | #' Difference Calculation is done by [stats::dist()] 8 | #' 9 | #' @templateVar id clust.hclust 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("becker1988s", "everitt1974cluster", "hartigan1975clustering", "sneath1973numerical", "anderberg1973cluster", "gordon1999classification", "murtagh1985multidimensional", "mcquitty1966similarity", "legendre2012numerical", "murtagh2014ward")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustHclust = R6Class("LearnerClustHclust", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | method = p_fct( 26 | default = "complete", 27 | levels = c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid"), 28 | tags = c("train", "hclust") 29 | ), 30 | members = p_uty(default = NULL, tags = c("train", "hclust")), 31 | distmethod = p_fct( 32 | default = "euclidean", 33 | levels = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), 34 | tags = "train" 35 | ), 36 | diag = p_lgl(default = FALSE, tags = c("train", "dist")), 37 | upper = p_lgl(default = FALSE, tags = c("train", "dist")), 38 | p = p_dbl(default = 2, tags = c("train", "dist"), depends = quote(distmethod == "minkowski")), 39 | k = p_int(1L, default = 2L, tags = c("train", "predict")) 40 | ) 41 | 42 | param_set$set_values(k = 2L, distmethod = "euclidean") 43 | 44 | super$initialize( 45 | id = "clust.hclust", 46 | feature_types = c("logical", "integer", "numeric"), 47 | predict_types = "partition", 48 | param_set = param_set, 49 | properties = c("hierarchical", "exclusive", "complete"), 50 | packages = "stats", 51 | man = "mlr3cluster::mlr_learners_clust.hclust", 52 | label = "Agglomerative Hierarchical Clustering" 53 | ) 54 | } 55 | ), 56 | 57 | private = list( 58 | .train = function(task) { 59 | pv = self$param_set$get_values(tags = "train") 60 | dist = invoke(stats::dist, 61 | x = task$data(), 62 | method = pv$d %??% "euclidean", 63 | .args = self$param_set$get_values(tags = c("train", "dist")) 64 | ) 65 | m = invoke(stats::hclust, 66 | d = dist, 67 | .args = self$param_set$get_values(tags = c("train", "hclust")) 68 | ) 69 | if (self$save_assignments) { 70 | self$assignments = stats::cutree(m, pv$k) 71 | } 72 | m 73 | }, 74 | 75 | .predict = function(task) { 76 | pv = self$param_set$get_values(tags = "predict") 77 | if (pv$k > task$nrow) { 78 | stopf("`k` needs to be between 1 and %i.", task$nrow) 79 | } 80 | 81 | warn_prediction_useless(self$id) 82 | 83 | PredictionClust$new(task = task, partition = self$assignments) 84 | } 85 | ) 86 | ) 87 | 88 | #' @include zzz.R 89 | register_learner("clust.hclust", LearnerClustHclust) 90 | -------------------------------------------------------------------------------- /R/LearnerClustKKMeans.R: -------------------------------------------------------------------------------- 1 | #' @title Kernel K-Means Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.kkmeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for kernel k-means clustering implemented in [kernlab::kkmeans()]. 7 | #' [kernlab::kkmeans()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `centers` parameter here is set to 2 by default. 9 | #' Kernel parameters have to be passed directly and not by using the `kpar` list in `kkmeans`. 10 | #' The predict method finds the nearest center in kernel distance to 11 | #' assign clusters for new data points. 12 | #' 13 | #' @templateVar id clust.kkmeans 14 | #' @template learner 15 | #' 16 | #' @references 17 | #' `r format_bib("karatzoglou2004kernlab", "dhillon2004unified")` 18 | #' 19 | #' @export 20 | #' @template seealso_learner 21 | #' @template example 22 | LearnerClustKKMeans = R6Class("LearnerClustKKMeans", 23 | inherit = LearnerClust, 24 | public = list( 25 | #' @description 26 | #' Creates a new instance of this [R6][R6::R6Class] class. 27 | initialize = function() { 28 | param_set = ps( 29 | centers = p_uty(tags = c("required", "train"), custom_check = check_centers), 30 | kernel = p_fct( 31 | default = "rbfdot", 32 | levels = c("vanilladot", "polydot", "rbfdot", "tanhdot", "laplacedot", "besseldot", "anovadot", "splinedot"), 33 | tags = "train" 34 | ), 35 | sigma = p_dbl( 36 | 0, tags = "train", depends = quote(kernel %in% c("rbfdot", "anovadot", "besseldot", "laplacedot")) 37 | ), 38 | degree = p_int( 39 | 1L, default = 3L, tags = "train", depends = quote(kernel %in% c("polydot", "anovadot", "besseldot")) 40 | ), 41 | scale = p_dbl(0, default = 1, tags = "train", depends = quote(kernel %in% c("polydot", "tanhdot"))), 42 | offset = p_dbl(default = 1, tags = "train", depends = quote(kernel %in% c("polydot", "tanhdot"))), 43 | order = p_int(default = 1L, tags = "train", depends = quote(kernel == "besseldot")), 44 | alg = p_fct(levels = c("kkmeans", "kerninghan"), default = "kkmeans", tags = "train"), 45 | p = p_dbl(default = 1, tags = "train") 46 | ) 47 | 48 | param_set$set_values(centers = 2L) 49 | 50 | super$initialize( 51 | id = "clust.kkmeans", 52 | feature_types = c("logical", "integer", "numeric"), 53 | predict_types = "partition", 54 | param_set = param_set, 55 | properties = c("partitional", "exclusive", "complete"), 56 | packages = "kernlab", 57 | man = "mlr3cluster::mlr_learners_clust.kkmeans", 58 | label = "Kernel K-Means" 59 | ) 60 | } 61 | ), 62 | 63 | private = list( 64 | .train = function(task) { 65 | pv = self$param_set$get_values(tags = "train") 66 | assert_centers_param(pv$centers, task, test_data_frame, "centers") 67 | 68 | m = invoke(kernlab::kkmeans, x = as.matrix(task$data()), .args = pv) 69 | if (self$save_assignments) { 70 | self$assignments = m[seq_along(m)] 71 | } 72 | m 73 | }, 74 | 75 | .predict = function(task) { 76 | # all of predict is taken from mlr2 77 | 78 | c = kernlab::centers(self$model) 79 | K = kernlab::kernelf(self$model) 80 | data = task$data() 81 | 82 | # kernel product between each new datapoint and the centers 83 | d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(data), c), ncol = nrow(c)) 84 | # kernel product between each new datapoint and itself: rows are identical 85 | d_xx = matrix( 86 | rep(diag(kernlab::kernelMatrix(K, as.matrix(data))), each = ncol(d_xc)), 87 | ncol = ncol(d_xc), byrow = TRUE 88 | ) 89 | # kernel product between each center and itself: columns are identical 90 | d_cc = matrix( 91 | rep(diag(kernlab::kernelMatrix(K, as.matrix(c))), each = nrow(d_xc)), nrow = nrow(d_xc) 92 | ) 93 | # this is the squared kernel distance to the centers 94 | d2 = d_xx + d_cc - 2 * d_xc 95 | # the nearest center determines cluster assignment 96 | partition = apply(d2, 1L, which.min) 97 | 98 | PredictionClust$new(task = task, partition = partition) 99 | } 100 | ) 101 | ) 102 | 103 | #' @include zzz.R 104 | register_learner("clust.kkmeans", LearnerClustKKMeans) 105 | -------------------------------------------------------------------------------- /R/LearnerClustKMeans.R: -------------------------------------------------------------------------------- 1 | #' @title K-Means Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.kmeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for k-means clustering implemented in [stats::kmeans()]. 7 | #' [stats::kmeans()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `centers` parameter here is set to 2 by default. 9 | #' The predict method uses [clue::cl_predict()] to compute the 10 | #' cluster memberships for new data. 11 | #' 12 | #' @templateVar id clust.kmeans 13 | #' @template learner 14 | #' 15 | #' @references 16 | #' `r format_bib("forgy1965cluster", "hartigan1979algorithm", "lloyd1982least", "macqueen1967some")` 17 | #' 18 | #' @export 19 | #' @template seealso_learner 20 | #' @template example 21 | LearnerClustKMeans = R6Class("LearnerClustKMeans", 22 | inherit = LearnerClust, 23 | public = list( 24 | #' @description 25 | #' Creates a new instance of this [R6][R6::R6Class] class. 26 | initialize = function() { 27 | param_set = ps( 28 | centers = p_uty(tags = c("required", "train"), custom_check = check_centers), 29 | iter.max = p_int(1L, default = 10L, tags = "train"), 30 | algorithm = p_fct( 31 | levels = c("Hartigan-Wong", "Lloyd", "Forgy", "MacQueen"), default = "Hartigan-Wong", tags = "train" 32 | ), 33 | nstart = p_int(1L, default = 1L, tags = "train"), 34 | trace = p_int(0L, default = 0L, tags = "train") 35 | ) 36 | 37 | param_set$set_values(centers = 2L) 38 | 39 | super$initialize( 40 | id = "clust.kmeans", 41 | feature_types = c("logical", "integer", "numeric"), 42 | predict_types = "partition", 43 | param_set = param_set, 44 | properties = c("partitional", "exclusive", "complete"), 45 | packages = c("stats", "clue"), 46 | man = "mlr3cluster::mlr_learners_clust.kmeans", 47 | label = "K-Means" 48 | ) 49 | } 50 | ), 51 | 52 | private = list( 53 | .train = function(task) { 54 | pv = self$param_set$get_values(tags = "train") 55 | if (!is.null(pv$nstart) && !test_int(pv$centers)) { 56 | warningf("`nstart` parameter is only relevant when `centers` is integer.") 57 | } 58 | 59 | assert_centers_param(pv$centers, task, test_data_frame, "centers") 60 | 61 | m = invoke(stats::kmeans, x = task$data(), .args = pv) 62 | if (self$save_assignments) { 63 | self$assignments = m$cluster 64 | } 65 | m 66 | }, 67 | 68 | .predict = function(task) { 69 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids")) 70 | PredictionClust$new(task = task, partition = partition) 71 | } 72 | ) 73 | ) 74 | 75 | #' @include zzz.R 76 | register_learner("clust.kmeans", LearnerClustKMeans) 77 | -------------------------------------------------------------------------------- /R/LearnerClustMclust.R: -------------------------------------------------------------------------------- 1 | #' @title Gaussian Mixture Models-Based Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.mclust 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for model-based clustering implemented in [mclust::Mclust()]. 7 | #' The predict method uses [mclust::predict.Mclust()] to compute the 8 | #' cluster memberships for new data. 9 | #' 10 | #' @templateVar id clust.mclust 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("scrucca2016mclust", "fraley2002model")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustMclust = R6Class("LearnerClustMclust", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | parma_set = ps( 26 | G = p_uty(default = 1:9, tags = "train", custom_check = check_numeric), 27 | modelNames = p_uty(tags = "train", custom_check = check_character), 28 | prior = p_uty(tags = "train", custom_check = check_list), 29 | control = p_uty(tags = "train", custom_check = check_list), 30 | initialization = p_uty(tags = "train", custom_check = check_list), 31 | x = p_uty(tags = "train", custom_check = crate(function(x) check_class(x, "mclustBIC"))) 32 | ) 33 | 34 | super$initialize( 35 | id = "clust.mclust", 36 | feature_types = c("logical", "integer", "numeric"), 37 | predict_types = c("partition", "prob"), 38 | param_set = parma_set, 39 | properties = c("partitional", "fuzzy", "complete"), 40 | packages = "mclust", 41 | man = "mlr3cluster::mlr_learners_clust.mclust", 42 | label = "Gaussian Mixture Models Clustering" 43 | ) 44 | } 45 | ), 46 | 47 | private = list( 48 | .train = function(task) { 49 | pv = self$param_set$get_values(tags = "train") 50 | with_package("mclust", { 51 | m = invoke(mclust::Mclust, data = task$data(), .args = pv) 52 | }) 53 | if (self$save_assignments) { 54 | self$assignments = m$classification 55 | } 56 | m 57 | }, 58 | 59 | .predict = function(task) { 60 | predictions = invoke(predict, self$model, newdata = task$data()) 61 | partition = as.integer(predictions$classification) 62 | prob = predictions$z 63 | PredictionClust$new(task = task, partition = partition, prob = prob) 64 | } 65 | ) 66 | ) 67 | 68 | #' @include zzz.R 69 | register_learner("clust.mclust", LearnerClustMclust) 70 | -------------------------------------------------------------------------------- /R/LearnerClustMeanShift.R: -------------------------------------------------------------------------------- 1 | #' @title Mean Shift Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.meanshift 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Mean Shift clustering implemented in [LPCM::ms()]. 7 | #' There is no predict method for [`LPCM::ms()`], so the method 8 | #' returns cluster labels for the 'training' data. 9 | #' 10 | #' @templateVar id clust.meanshift 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("cheng1995mean")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustMeanShift = R6Class("LearnerClustMeanShift", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps( 26 | h = p_uty(tags = "train", custom_check = crate(function(x) { 27 | if (test_numeric(x) || test_int(x)) { 28 | TRUE 29 | } else { 30 | "`h` must be either integer or numeric vector" 31 | } 32 | })), 33 | subset = p_uty(tags = "train", custom_check = check_numeric), 34 | scaled = p_int(0L, default = 1, tags = "train"), 35 | iter = p_int(1L, default = 200L, tags = "train"), 36 | thr = p_dbl(default = 0.01, tags = "train") 37 | ) 38 | 39 | super$initialize( 40 | id = "clust.meanshift", 41 | feature_types = c("logical", "integer", "numeric"), 42 | predict_types = "partition", 43 | param_set = param_set, 44 | properties = c("partitional", "exclusive", "complete"), 45 | packages = "LPCM", 46 | man = "mlr3cluster::mlr_learners_clust.meanshift", 47 | label = "Mean Shift Clustering" 48 | ) 49 | } 50 | ), 51 | 52 | private = list( 53 | .train = function(task) { 54 | pv = self$param_set$get_values(tags = "train") 55 | if (!is.null(pv$subset) && length(pv$subset) > task$nrow) { 56 | stopf("`subset` length must be less than or equal to number of observations in task.") 57 | } 58 | 59 | m = invoke(LPCM::ms, X = task$data(), .args = pv) 60 | if (self$save_assignments) { 61 | self$assignments = m$cluster.label 62 | } 63 | m 64 | }, 65 | 66 | .predict = function(task) { 67 | warn_prediction_useless(self$id) 68 | partition = as.integer(self$model$cluster.label) 69 | PredictionClust$new(task = task, partition = partition) 70 | } 71 | ) 72 | ) 73 | 74 | #' @include zzz.R 75 | register_learner("clust.meanshift", LearnerClustMeanShift) 76 | -------------------------------------------------------------------------------- /R/LearnerClustMiniBatchKMeans.R: -------------------------------------------------------------------------------- 1 | #' @title Mini Batch K-Means Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.MBatchKMeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for mini batch k-means clustering implemented in [ClusterR::MiniBatchKmeans()]. 7 | #' [ClusterR::MiniBatchKmeans()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `clusters` parameter here is set to 2 by default. 9 | #' The predict method uses [ClusterR::predict_MBatchKMeans()] to compute the 10 | #' cluster memberships for new data. 11 | #' The learner supports both partitional and fuzzy clustering. 12 | #' 13 | #' @templateVar id clust.MBatchKMeans 14 | #' @template learner 15 | #' 16 | #' @references 17 | #' `r format_bib("sculley2010web")` 18 | #' 19 | #' @export 20 | #' @template seealso_learner 21 | #' @template example 22 | LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans", 23 | inherit = LearnerClust, 24 | public = list( 25 | #' @description 26 | #' Creates a new instance of this [R6][R6::R6Class] class. 27 | initialize = function() { 28 | param_set = ps( 29 | clusters = p_int(1L, default = 2L, tags = "train"), 30 | batch_size = p_int(1L, default = 10L, tags = "train"), 31 | num_init = p_int(1L, default = 1L, tags = "train"), 32 | max_iters = p_int(1L, default = 100L, tags = "train"), 33 | init_fraction = p_dbl( 34 | 0, 1, default = 1, tags = "train", depends = quote(initializer %in% c("kmeans++", "optimal_init")) 35 | ), 36 | initializer = p_fct( 37 | levels = c("optimal_init", "quantile_init", "kmeans++", "random"), default = "kmeans++", tags = "train" 38 | ), 39 | early_stop_iter = p_int(1L, default = 10L, tags = "train"), 40 | verbose = p_lgl(default = FALSE, tags = "train"), 41 | CENTROIDS = p_uty(default = NULL, tags = "train"), 42 | tol = p_dbl(0, default = 1e-04, tags = "train"), 43 | tol_optimal_init = p_dbl(0, default = 0.3, tags = "train"), 44 | seed = p_int(default = 1L, tags = "train") 45 | ) 46 | 47 | param_set$set_values(clusters = 2L) 48 | 49 | super$initialize( 50 | id = "clust.MBatchKMeans", 51 | feature_types = c("logical", "integer", "numeric"), 52 | predict_types = c("partition", "prob"), 53 | param_set = param_set, 54 | properties = c("partitional", "fuzzy", "exclusive", "complete"), 55 | packages = "ClusterR", 56 | man = "mlr3cluster::mlr_learners_clust.MBatchKMeans", 57 | label = "Mini Batch K-Means" 58 | ) 59 | } 60 | ), 61 | 62 | private = list( 63 | .train = function(task) { 64 | pv = self$param_set$get_values(tags = "train") 65 | assert_centers_param(pv$CENTROIDS, task, test_matrix, "CENTROIDS") 66 | if (test_matrix(pv$CENTROIDS) && nrow(pv$CENTROIDS) != pv$clusters) { 67 | stopf("`CENTROIDS` must have same number of rows as `clusters`.") 68 | } 69 | 70 | data = task$data() 71 | m = invoke(ClusterR::MiniBatchKmeans, data = data, .args = pv) 72 | if (self$save_assignments) { 73 | self$assignments = as.integer(invoke(predict, m, newdata = data)) 74 | } 75 | m 76 | }, 77 | 78 | .predict = function(task) { 79 | data = task$data() 80 | partition = as.integer(invoke(predict, self$model, newdata = data)) 81 | prob = NULL 82 | if (self$predict_type == "prob") { 83 | prob = invoke(predict, self$model, newdata = data, fuzzy = TRUE) 84 | colnames(prob) = seq_len(ncol(prob)) 85 | } 86 | PredictionClust$new(task = task, partition = partition, prob = prob) 87 | } 88 | ) 89 | ) 90 | 91 | #' @include zzz.R 92 | register_learner("clust.MBatchKMeans", LearnerClustMiniBatchKMeans) 93 | -------------------------------------------------------------------------------- /R/LearnerClustOPTICS.R: -------------------------------------------------------------------------------- 1 | #' @title Ordering Points to Identify the Clustering Structure (OPTICS) Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.optics 4 | #' 5 | #' @description 6 | #' OPTICS (Ordering points to identify the clustering structure) point ordering clustering. 7 | #' Calls [dbscan::optics()] from \CRANpkg{dbscan}. 8 | #' 9 | #' @templateVar id clust.optics 10 | #' @template learner 11 | #' 12 | #' @references 13 | #' `r format_bib("hahsler2019dbscan", "ankerst1999optics")` 14 | #' 15 | #' @export 16 | #' @template seealso_learner 17 | #' @template example 18 | LearnerClustOPTICS = R6Class("LearnerClustOPTICS", 19 | inherit = LearnerClust, 20 | public = list( 21 | #' @description 22 | #' Creates a new instance of this [R6][R6::R6Class] class. 23 | initialize = function() { 24 | param_set = ps( 25 | eps = p_dbl(0, special_vals = list(NULL), default = NULL, tags = "train"), 26 | minPts = p_int(0L, default = 5L, tags = "train"), 27 | search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"), 28 | bucketSize = p_int(1L, default = 10L, tags = "train", depends = quote(search == "kdtree")), 29 | splitRule = p_fct( 30 | levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"), 31 | default = "SUGGEST", 32 | tags = "train", 33 | depends = quote(search == "kdtree") 34 | ), 35 | approx = p_dbl(default = 0, tags = "train"), 36 | eps_cl = p_dbl(0, tags = c("required", "train")) 37 | ) 38 | 39 | super$initialize( 40 | id = "clust.optics", 41 | feature_types = c("logical", "integer", "numeric"), 42 | predict_types = "partition", 43 | param_set = param_set, 44 | properties = c("density", "exclusive", "complete"), 45 | packages = "dbscan", 46 | man = "mlr3cluster::mlr_learners_clust.optics", 47 | label = "OPTICS Clustering" 48 | ) 49 | } 50 | ), 51 | 52 | private = list( 53 | .train = function(task) { 54 | pv = self$param_set$get_values(tags = "train") 55 | data = task$data() 56 | m = invoke(dbscan::optics, x = data, .args = remove_named(pv, "eps_cl")) 57 | m = insert_named(m, list(data = data)) 58 | m = invoke(dbscan::extractDBSCAN, object = m, eps_cl = pv$eps_cl) 59 | 60 | if (self$save_assignments) { 61 | self$assignments = m$cluster 62 | } 63 | m 64 | }, 65 | 66 | .predict = function(task) { 67 | partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data) 68 | PredictionClust$new(task = task, partition = partition) 69 | } 70 | ) 71 | ) 72 | 73 | #' @include zzz.R 74 | register_learner("clust.optics", LearnerClustOPTICS) 75 | -------------------------------------------------------------------------------- /R/LearnerClustPAM.R: -------------------------------------------------------------------------------- 1 | #' @title Partitioning Around Medoids Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.pam 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for PAM clustering implemented in [cluster::pam()]. 7 | #' [cluster::pam()] doesn't have a default value for the number of clusters. 8 | #' Therefore, the `k` parameter which corresponds to the number 9 | #' of clusters here is set to 2 by default. 10 | #' The predict method uses [clue::cl_predict()] to compute the 11 | #' cluster memberships for new data. 12 | #' 13 | #' @templateVar id clust.pam 14 | #' @template learner 15 | #' 16 | #' @references 17 | #' `r format_bib("reynolds2006clustering", "schubert2019faster")` 18 | #' 19 | #' @export 20 | #' @template seealso_learner 21 | #' @template example 22 | LearnerClustPAM = R6Class("LearnerClustPAM", 23 | inherit = LearnerClust, 24 | public = list( 25 | #' @description 26 | #' Creates a new instance of this [R6][R6::R6Class] class. 27 | initialize = function() { 28 | param_set = ps( 29 | k = p_int(1L, tags = c("required", "train")), 30 | metric = p_fct(levels = c("euclidian", "manhattan"), tags = "train"), 31 | medoids = p_uty( 32 | default = NULL, tags = "train", custom_check = crate(function(x) check_integerish(x, null.ok = TRUE)) 33 | ), 34 | stand = p_lgl(default = FALSE, tags = "train"), 35 | do.swap = p_lgl(default = TRUE, tags = "train"), 36 | pamonce = p_int(0L, 5L, default = 0L, tags = "train"), 37 | trace.lev = p_int(0L, default = 0L, tags = "train") 38 | ) 39 | 40 | param_set$set_values(k = 2L) 41 | 42 | super$initialize( 43 | id = "clust.pam", 44 | feature_types = c("logical", "integer", "numeric"), 45 | predict_types = "partition", 46 | param_set = param_set, 47 | properties = c("partitional", "exclusive", "complete"), 48 | packages = "cluster", 49 | man = "mlr3cluster::mlr_learners_clust.pam", 50 | label = "Partitioning Around Medoids" 51 | ) 52 | } 53 | ), 54 | 55 | private = list( 56 | .train = function(task) { 57 | pv = self$param_set$get_values(tags = "train") 58 | if (!is.null(pv$medoids)) { 59 | if (length(pv$medoids) != pv$k) { 60 | stopf("number of `medoids`' needs to match `k`!") 61 | } 62 | if (sum(pv$medoids <= task$nrow & pv$medoids >= 1L) != pv$k) { 63 | stopf("`medoids` need to contain valid indices from 1 to %i (number of observations)!", pv$k) 64 | } 65 | } 66 | 67 | m = invoke(cluster::pam, x = task$data(), diss = FALSE, .args = pv) 68 | if (self$save_assignments) { 69 | self$assignments = m$clustering 70 | } 71 | m 72 | }, 73 | 74 | .predict = function(task) { 75 | partition = unclass(invoke(cl_predict, self$model, newdata = task$data(), type = "class_ids")) 76 | PredictionClust$new(task = task, partition = partition) 77 | } 78 | ) 79 | ) 80 | 81 | #' @include zzz.R 82 | register_learner("clust.pam", LearnerClustPAM) 83 | -------------------------------------------------------------------------------- /R/LearnerClustSimpleKMeans.R: -------------------------------------------------------------------------------- 1 | #' @title K-Means Clustering Learner from Weka 2 | #' 3 | #' @name mlr_learners_clust.SimpleKMeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for Simple K Means clustering implemented in [RWeka::SimpleKMeans()]. 7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the 8 | #' cluster memberships for new data. 9 | #' 10 | #' @templateVar id clust.SimpleKMeans 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("witten2002data", "forgy1965cluster", "lloyd1982least", "macqueen1967some")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustSimpleKMeans = R6Class("LearnerClustSimpleKMeans", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps( 26 | A = p_uty(default = "weka.core.EuclideanDistance", tags = "train"), 27 | C = p_lgl(default = FALSE, tags = "train"), 28 | fast = p_lgl(default = FALSE, tags = "train"), 29 | I = p_int(1L, default = 100L, tags = "train"), 30 | init = p_int(0L, 3L, default = 0L, tags = "train"), 31 | M = p_lgl(default = FALSE, tags = "train"), 32 | max_candidates = p_int(1L, default = 100L, tags = "train"), 33 | min_density = p_int(1L, default = 2L, tags = "train"), 34 | N = p_int(1L, default = 2L, tags = "train"), 35 | num_slots = p_int(1L, default = 1L, tags = "train"), 36 | O = p_lgl(default = FALSE, tags = "train"), 37 | periodic_pruning = p_int(1L, default = 10000L, tags = "train"), 38 | S = p_int(0L, default = 10L, tags = "train"), 39 | t2 = p_dbl(default = -1, tags = "train"), 40 | t1 = p_dbl(default = -1.5, tags = "train"), 41 | V = p_lgl(default = FALSE, tags = "train"), 42 | output_debug_info = p_lgl(default = FALSE, tags = "train") 43 | ) 44 | 45 | super$initialize( 46 | id = "clust.SimpleKMeans", 47 | feature_types = c("logical", "integer", "numeric"), 48 | predict_types = "partition", 49 | param_set = param_set, 50 | properties = c("partitional", "exclusive", "complete"), 51 | packages = "RWeka", 52 | man = "mlr3cluster::mlr_learners_clust.SimpleKMeans", 53 | label = "K-Means (Weka)" 54 | ) 55 | } 56 | ), 57 | 58 | private = list( 59 | .train = function(task) { 60 | pv = self$param_set$get_values(tags = "train") 61 | names(pv) = chartr("_", "-", names(pv)) 62 | ctrl = invoke(RWeka::Weka_control, .args = pv) 63 | m = invoke(RWeka::SimpleKMeans, x = task$data(), control = ctrl) 64 | if (self$save_assignments) { 65 | self$assignments = unname(m$class_ids + 1L) 66 | } 67 | m 68 | }, 69 | 70 | .predict = function(task) { 71 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L 72 | PredictionClust$new(task = task, partition = partition) 73 | } 74 | ) 75 | ) 76 | 77 | #' @include zzz.R 78 | register_learner("clust.SimpleKMeans", LearnerClustSimpleKMeans) 79 | -------------------------------------------------------------------------------- /R/LearnerClustXMeans.R: -------------------------------------------------------------------------------- 1 | #' @title X-means Clustering Learner 2 | #' 3 | #' @name mlr_learners_clust.xmeans 4 | #' 5 | #' @description 6 | #' A [LearnerClust] for X-means clustering implemented in [RWeka::XMeans()]. 7 | #' The predict method uses [RWeka::predict.Weka_clusterer()] to compute the 8 | #' cluster memberships for new data. 9 | #' 10 | #' @templateVar id clust.xmeans 11 | #' @template learner 12 | #' 13 | #' @references 14 | #' `r format_bib("witten2002data", "pelleg2000x")` 15 | #' 16 | #' @export 17 | #' @template seealso_learner 18 | #' @template example 19 | LearnerClustXMeans = R6Class( "LearnerClustXMeans", 20 | inherit = LearnerClust, 21 | public = list( 22 | #' @description 23 | #' Creates a new instance of this [R6][R6::R6Class] class. 24 | initialize = function() { 25 | param_set = ps( 26 | B = p_dbl(0, default = 1, tags = "train"), 27 | C = p_dbl(0, default = 0, tags = "train"), 28 | D = p_uty(default = "weka.core.EuclideanDistance", tags = "train"), 29 | H = p_int(1L, default = 4L, tags = "train"), 30 | I = p_int(1L, default = 1L, tags = "train"), 31 | J = p_int(1L, default = 1000L, tags = "train"), 32 | K = p_uty(default = "", tags = "train"), 33 | L = p_int(1L, default = 2L, tags = "train"), 34 | M = p_int(1L, default = 1000L, tags = "train"), 35 | S = p_int(1L, default = 10L, tags = "train"), 36 | U = p_int(0L, default = 0L, tags = "train"), 37 | use_kdtree = p_lgl(default = FALSE, tags = "train"), 38 | N = p_uty(tags = "train"), 39 | O = p_uty(tags = "train"), 40 | Y = p_uty(tags = "train"), 41 | output_debug_info = p_lgl(default = FALSE, tags = "train") 42 | ) 43 | 44 | super$initialize( 45 | id = "clust.xmeans", 46 | feature_types = c("logical", "integer", "numeric"), 47 | predict_types = "partition", 48 | param_set = param_set, 49 | properties = c("partitional", "exclusive", "complete"), 50 | packages = "RWeka", 51 | man = "mlr3cluster::mlr_learners_clust.xmeans", 52 | label = "X-means" 53 | ) 54 | } 55 | ), 56 | 57 | private = list( 58 | .train = function(task) { 59 | pv = self$param_set$get_values(tags = "train") 60 | names(pv) = chartr("_", "-", names(pv)) 61 | ctrl = invoke(RWeka::Weka_control, .args = pv) 62 | m = invoke(RWeka::XMeans, x = task$data(), control = ctrl) 63 | if (self$save_assignments) { 64 | self$assignments = unname(m$class_ids + 1L) 65 | } 66 | m 67 | }, 68 | 69 | .predict = function(task) { 70 | partition = invoke(predict, self$model, newdata = task$data(), type = "class") + 1L 71 | PredictionClust$new(task = task, partition = partition) 72 | } 73 | ) 74 | ) 75 | 76 | #' @include zzz.R 77 | register_learner("clust.xmeans", LearnerClustXMeans) 78 | -------------------------------------------------------------------------------- /R/MeasureClust.R: -------------------------------------------------------------------------------- 1 | #' @title Cluster Measure 2 | #' 3 | #' @description 4 | #' This measure specializes [mlr3::Measure] for cluster analysis: 5 | #' 6 | #' * `task_type` is set to `"clust"`. 7 | #' * Possible values for `predict_type` are `"partition"` and `"prob"`. 8 | #' 9 | #' Predefined measures can be found in the [mlr3misc::Dictionary] [mlr3::mlr_measures]. 10 | #' 11 | #' @template param_id 12 | #' @template param_range 13 | #' @template param_minimize 14 | #' @template param_average 15 | #' @template param_aggregator 16 | #' @template param_predict_type 17 | #' @template param_measure_properties 18 | #' @template param_predict_sets 19 | #' @template param_task_properties 20 | #' @template param_packages 21 | #' @template param_label 22 | #' @template param_man 23 | #' 24 | #' @seealso 25 | #' Example cluster measures: [`clust.dunn`][mlr_measures_clust.dunn] 26 | #' @export 27 | MeasureClust = R6Class("MeasureClust", 28 | inherit = Measure, 29 | cloneable = FALSE, 30 | public = list( 31 | #' @description 32 | #' Creates a new instance of this [R6][R6::R6Class] class. 33 | initialize = function( 34 | id, 35 | range, 36 | minimize = NA, 37 | aggregator = NULL, 38 | properties = character(), 39 | predict_type = "partition", 40 | task_properties = character(), 41 | packages = character(), 42 | label = NA_character_, 43 | man = NA_character_ 44 | ) { 45 | super$initialize( 46 | id = id, 47 | task_type = "clust", 48 | range = range, 49 | minimize = minimize, 50 | aggregator = aggregator, 51 | properties = properties, 52 | predict_type = predict_type, 53 | task_properties = task_properties, 54 | packages = c("mlr3cluster", packages), 55 | label = label, 56 | man = man 57 | ) 58 | } 59 | ) 60 | ) 61 | -------------------------------------------------------------------------------- /R/MeasureClustInternal.R: -------------------------------------------------------------------------------- 1 | #' @include measures.R 2 | #' @include MeasureClust.R 3 | MeasureClustFPC = R6Class("MeasureClustFPC", 4 | inherit = MeasureClust, 5 | public = list( 6 | crit = NULL, 7 | initialize = function(name, label) { 8 | info = measures[[name]] 9 | super$initialize( 10 | id = paste0("clust.", name), 11 | range = c(info$lower, info$upper), 12 | minimize = info$minimize, 13 | predict_type = info$predict_type, 14 | packages = "fpc", 15 | properties = "requires_task", 16 | label = label, 17 | man = paste0("mlr3cluster::mlr_measures_clust.", name) 18 | ) 19 | self$crit = info$crit 20 | } 21 | ), 22 | private = list( 23 | .score = function(prediction, task, ...) { 24 | X = dist(task$data(rows = prediction$row_ids)) 25 | suppressWarnings(cluster.stats(X, clustering = prediction$partition, silhouette = FALSE)[[self$crit]]) 26 | } 27 | ) 28 | ) 29 | 30 | MeasureClustSil = R6Class("MeasureClustSil", 31 | inherit = MeasureClust, 32 | public = list( 33 | crit = NULL, 34 | initialize = function(name, label) { 35 | info = measures[[name]] 36 | super$initialize( 37 | id = paste0("clust.", name), 38 | range = c(info$lower, info$upper), 39 | minimize = info$minimize, 40 | predict_type = info$predict_type, 41 | packages = "cluster", 42 | properties = "requires_task", 43 | label = label, 44 | man = paste0("mlr3cluster::mlr_measures_clust.", name) 45 | ) 46 | self$crit = info$crit 47 | } 48 | ), 49 | private = list( 50 | .score = function(prediction, task, ...) { 51 | X = dist(task$data(rows = prediction$row_ids)) 52 | 53 | if (length(unique(prediction$partition)) == 1L) { 54 | 0L 55 | } else { 56 | mean(silhouette(prediction$partition, X)[, self$crit]) 57 | } 58 | } 59 | ) 60 | ) 61 | 62 | #' @title Rousseeuw's Silhouette Quality Index 63 | #' 64 | #' @templateVar id silhouette 65 | #' @template measure_sil 66 | measures$silhouette = make_measure_info("sil_width", "Silhouette", lower = 0, upper = Inf, minimize = FALSE) 67 | 68 | #' @title Calinski Harabasz Pseudo F-Statistic 69 | #' 70 | #' @templateVar id ch 71 | #' @template measure_fpc 72 | measures$ch = make_measure_info("ch", "Calinski Harabasz", lower = 0, upper = Inf, minimize = FALSE) 73 | 74 | #' @title Dunn Index 75 | #' 76 | #' @templateVar id dunn 77 | #' @template measure_fpc 78 | measures$dunn = make_measure_info("dunn", "Dunn", lower = 0, upper = Inf, minimize = FALSE) 79 | 80 | #' @title Within Sum of Squares 81 | #' 82 | #' @templateVar id wss 83 | #' @template measure_fpc 84 | measures$wss = make_measure_info("within.cluster.ss", "Within Sum of Squares", lower = 0, upper = Inf, minimize = TRUE) 85 | -------------------------------------------------------------------------------- /R/PredictionClust.R: -------------------------------------------------------------------------------- 1 | #' @title Prediction Object for Cluster Analysis 2 | #' 3 | #' @description 4 | #' This object wraps the predictions returned by a learner of class [LearnerClust], i.e. 5 | #' the predicted partition and cluster probability. 6 | #' 7 | #' @family Prediction 8 | #' @export 9 | #' @examples 10 | #' library(mlr3) 11 | #' library(mlr3cluster) 12 | #' task = tsk("usarrests") 13 | #' learner = lrn("clust.kmeans") 14 | #' p = learner$train(task)$predict(task) 15 | #' p$predict_types 16 | #' head(as.data.table(p)) 17 | PredictionClust = R6Class("PredictionClust", 18 | inherit = Prediction, 19 | public = list( 20 | #' @description 21 | #' Creates a new instance of this [R6][R6::R6Class] class. 22 | #' 23 | #' @param task ([TaskClust])\cr 24 | #' Task, used to extract defaults for `row_ids`. 25 | #' 26 | #' @param row_ids (`integer()`)\cr 27 | #' Row ids of the predicted observations, i.e. the row ids of the test set. 28 | #' 29 | #' @param partition (`integer()`)\cr 30 | #' Vector of cluster partitions. 31 | #' 32 | #' @param prob (`matrix()`)\cr 33 | #' Numeric matrix of cluster membership probabilities with one column for each cluster 34 | #' and one row for each observation. 35 | #' Columns must be named with cluster numbers, row names are automatically removed. 36 | #' If `prob` is provided, but `partition` is not, the cluster memberships are calculated from 37 | #' the probabilities using [max.col()] with `ties.method` set to `"first"`. 38 | #' 39 | #' @param check (`logical(1)`)\cr 40 | #' If `TRUE`, performs some argument checks and predict type conversions. 41 | initialize = function(task = NULL, row_ids = task$row_ids, partition = NULL, prob = NULL, check = TRUE) { 42 | pdata = list(row_ids = row_ids, partition = partition, prob = prob) 43 | pdata = discard(pdata, is.null) 44 | class(pdata) = c("PredictionDataClust", "PredictionData") 45 | 46 | if (check) { 47 | pdata = check_prediction_data(pdata) 48 | } 49 | self$task_type = "clust" 50 | self$man = "mlr3cluster::PredictionClust" 51 | self$data = pdata 52 | self$predict_types = intersect(c("partition", "prob"), names(pdata)) 53 | } 54 | ), 55 | 56 | active = list( 57 | #' @field partition (`integer()`)\cr 58 | #' Access the stored partition. 59 | partition = function() { 60 | self$data$partition %??% rep(NA_real_, length(self$data$row_ids)) 61 | }, 62 | 63 | #' @field prob (`matrix()`)\cr 64 | #' Access to the stored probabilities. 65 | prob = function() { 66 | self$data$prob 67 | } 68 | ) 69 | ) 70 | 71 | #' @export 72 | as.data.table.PredictionClust = function(x, ...) { # nolint 73 | tab = as.data.table(x$data[c("row_ids", "partition")]) 74 | if ("prob" %chin% x$predict_types) { 75 | prob = as.data.table(x$data$prob) 76 | setnames(prob, new = paste0("prob.", names(prob))) 77 | tab = rcbind(tab, prob) 78 | } 79 | 80 | tab[] 81 | } 82 | -------------------------------------------------------------------------------- /R/PredictionDataClust.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | as_prediction.PredictionDataClust = function(x, check = TRUE, ...) { # nolint 3 | invoke(PredictionClust$new, check = check, .args = x) 4 | } 5 | 6 | #' @export 7 | check_prediction_data.PredictionDataClust = function(pdata, ...) { # nolint 8 | pdata$row_ids = assert_row_ids(pdata$row_ids) 9 | n = length(pdata$row_ids) 10 | assert_integer(pdata$partition, len = n, any.missing = FALSE, null.ok = TRUE) 11 | 12 | prob = pdata$prob 13 | if (!is.null(prob)) { 14 | # need to check number of columns for matrix 15 | assert_matrix(prob, nrows = n) 16 | assert_numeric(prob, lower = 0, upper = 1) 17 | if (!is.null(rownames(prob))) { 18 | rownames(prob) = NULL 19 | pdata$prob = prob 20 | } 21 | 22 | if (is.null(pdata$partition)) { 23 | # calculate partition from prob 24 | pdata$partition = max.col(prob, ties.method = "first") 25 | } 26 | } 27 | 28 | pdata 29 | } 30 | 31 | #' @export 32 | is_missing_prediction_data.PredictionDataClust = function(pdata, ...) { # nolint 33 | miss = logical(length(pdata$row_ids)) 34 | 35 | if (!is.null(pdata$partition)) { 36 | miss = is.na(pdata$partition) 37 | } 38 | 39 | if (!is.null(pdata$prob)) { 40 | miss = miss | apply(pdata$prob, 1L, anyMissing) 41 | } 42 | 43 | pdata$row_ids[miss] 44 | } 45 | 46 | #' @export 47 | c.PredictionDataClust = function(..., keep_duplicates = TRUE) { 48 | dots = list(...) 49 | assert_list(dots, "PredictionDataClust") 50 | assert_flag(keep_duplicates) 51 | if (length(dots) == 1L) { 52 | return(dots[[1L]]) 53 | } 54 | 55 | predict_types = names(mlr_reflections$learner_predict_types$clust) 56 | predict_types = map(dots, function(x) intersect(names(x), predict_types)) 57 | if (!every(predict_types[-1L], setequal, y = predict_types[[1L]])) { 58 | stopf("Cannot combine predictions: Different predict types.") 59 | } 60 | 61 | elems = c("row_ids", "partition") 62 | tab = map_dtr(dots, function(x) x[elems], .fill = FALSE) 63 | prob = do.call(rbind, map(dots, "prob")) 64 | 65 | if (!keep_duplicates) { 66 | keep = !duplicated(tab, by = "row_ids", fromLast = TRUE) 67 | tab = tab[keep] 68 | prob = prob[keep, , drop = FALSE] 69 | } 70 | 71 | result = as.list(tab) 72 | result$prob = prob 73 | 74 | set_class(result, "PredictionDataClust") 75 | } 76 | 77 | #' @export 78 | filter_prediction_data.PredictionDataClust = function(pdata, row_ids, ...) { 79 | keep = pdata$row_ids %in% row_ids 80 | pdata$row_ids = pdata$row_ids[keep] 81 | 82 | if (!is.null(pdata$partition)) { 83 | pdata$partition = pdata$partition[keep] 84 | } 85 | 86 | if (!is.null(pdata$prob)) { 87 | pdata$prob = pdata$prob[keep, , drop = FALSE] 88 | } 89 | 90 | pdata 91 | } 92 | 93 | #' @export 94 | create_empty_prediction_data.TaskClust = function(task, learner) { 95 | predict_types = mlr_reflections$learner_predict_types[["clust"]][[learner$predict_type]] 96 | 97 | pdata = list( 98 | row_ids = integer(), 99 | partition = integer() 100 | ) 101 | 102 | if ("prob" %chin% predict_types) { 103 | pdata$prob = matrix(integer()) 104 | } 105 | 106 | set_class(pdata, "PredictionDataClust") 107 | } 108 | -------------------------------------------------------------------------------- /R/TaskClust.R: -------------------------------------------------------------------------------- 1 | #' @title Cluster Task 2 | #' 3 | #' @description 4 | #' This task specializes [mlr3::Task] for cluster problems. 5 | #' As an unsupervised task, this task has no target column. 6 | #' The `task_type` is set to `"clust"`. 7 | #' 8 | #' Predefined tasks are stored in the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_tasks]. 9 | #' 10 | #' @template param_id 11 | #' @template param_backend 12 | #' @template param_label 13 | #' @family Task 14 | #' @export 15 | #' @examples 16 | #' library(mlr3) 17 | #' library(mlr3cluster) 18 | #' task = TaskClust$new("usarrests", backend = USArrests) 19 | #' task$task_type 20 | #' 21 | #' # possible properties: 22 | #' mlr_reflections$task_properties$clust 23 | TaskClust = R6Class("TaskClust", 24 | inherit = TaskUnsupervised, 25 | public = list( 26 | #' @description 27 | #' Creates a new instance of this [R6][R6::R6Class] class. 28 | initialize = function(id, backend, label = NA_character_) { 29 | super$initialize(id = id, task_type = "clust", backend = backend, label = label) 30 | } 31 | ) 32 | ) 33 | -------------------------------------------------------------------------------- /R/TaskClust_ruspini.R: -------------------------------------------------------------------------------- 1 | #' @title Ruspini Cluster Task 2 | #' 3 | #' @name mlr_tasks_ruspini 4 | #' @format [R6::R6Class] inheriting from [TaskClust]. 5 | #' 6 | #' @description 7 | #' A cluster task for the [cluster::ruspini] data set. 8 | #' 9 | #' @templateVar id ruspini 10 | #' @template task 11 | #' 12 | #' @references 13 | #' `r format_bib("ruspini_1970")` 14 | #' 15 | #' @template seealso_task 16 | NULL 17 | 18 | load_task_ruspini = function(id = "ruspini") { 19 | b = as_data_backend(load_dataset("ruspini", "cluster")) 20 | task = TaskClust$new(id, b, label = "Ruspini") 21 | b$hash = task$man = "mlr3cluster::mlr_tasks_ruspini" 22 | task 23 | } 24 | 25 | #' @include zzz.R 26 | register_task("ruspini", load_task_ruspini) 27 | -------------------------------------------------------------------------------- /R/TaskClust_usarrest.R: -------------------------------------------------------------------------------- 1 | #' @title US Arrests Cluster Task 2 | #' 3 | #' @name mlr_tasks_usarrests 4 | #' @format [R6::R6Class] inheriting from [TaskClust]. 5 | #' 6 | #' @description 7 | #' A cluster task for the [datasets::USArrests] data set. 8 | #' Rownames are stored as variable `"states"` with column role `"name"`. 9 | #' 10 | #' @templateVar id usarrests 11 | #' @template task 12 | #' 13 | #' @references 14 | #' `r format_bib("berry1979inter")` 15 | #' 16 | #' @template seealso_task 17 | NULL 18 | 19 | load_task_usarrests = function(id = "usarrests") { 20 | b = as_data_backend(load_dataset("USArrests", "datasets", keep_rownames = TRUE), keep_rownames = "state") 21 | task = TaskClust$new(id, b, label = "US Arrests") 22 | b$hash = task$man = "mlr3cluster::mlr_tasks_usarrests" 23 | task$col_roles$name = "state" 24 | task$col_roles$feature = setdiff(task$col_roles$feature, "state") 25 | task 26 | } 27 | 28 | #' @include zzz.R 29 | register_task("usarrests", load_task_usarrests) 30 | -------------------------------------------------------------------------------- /R/as_prediction_clust.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Cluster Prediction 2 | #' 3 | #' @description 4 | #' Convert object to a [PredictionClust]. 5 | #' 6 | #' @inheritParams mlr3::as_prediction 7 | #' 8 | #' @return [PredictionClust]. 9 | #' @export 10 | #' @examples 11 | #' if (requireNamespace("e1071")) { 12 | #' # create a prediction object 13 | #' task = tsk("usarrests") 14 | #' learner = lrn("clust.kmeans") 15 | #' learner = lrn("clust.cmeans", predict_type = "prob") 16 | #' learner$train(task) 17 | #' p = learner$predict(task) 18 | #' 19 | #' # convert to a data.table 20 | #' tab = as.data.table(p) 21 | #' 22 | #' # convert back to a Prediction 23 | #' as_prediction_clust(tab) 24 | #' 25 | #' # split data.table into a 3 data.tables based on UrbanPop 26 | #' f = cut(task$data(rows = tab$row_ids)$UrbanPop, 3) 27 | #' tabs = split(tab, f) 28 | #' 29 | #' # convert back to list of predictions 30 | #' preds = lapply(tabs, as_prediction_clust) 31 | #' 32 | #' # calculate performance in each group 33 | #' sapply(preds, function(p) p$score(task = task)) 34 | #' } 35 | as_prediction_clust = function(x, ...) { 36 | UseMethod("as_prediction_clust") 37 | } 38 | 39 | #' @rdname as_prediction_clust 40 | #' @export 41 | as_prediction_clust.PredictionClust = function(x, ...) { # nolint 42 | x 43 | } 44 | 45 | #' @rdname as_prediction_clust 46 | #' @export 47 | as_prediction_clust.data.frame = function(x, ...) { # nolint 48 | assert_names(names(x), must.include = c("row_ids", "partition")) 49 | prob_cols = setdiff(names(x), c("row_ids", "partition")) 50 | 51 | if (length(prob_cols) > 0L) { 52 | if (!all(startsWith(prob_cols, "prob."))) { 53 | stopf( 54 | "Table may only contain columns 'row_ids', 'partition' as well as columns prefixed with 'prob.' for class probabilities." # nolint 55 | ) 56 | } 57 | prob = as.matrix(x[, prob_cols, with = FALSE]) 58 | nms = colnames(prob) 59 | colnames(prob) = substr(nms, 6L, nchar(nms)) 60 | } else { 61 | prob = NULL 62 | } 63 | 64 | invoke(PredictionClust$new, prob = prob, .args = x[, -prob_cols, with = FALSE]) 65 | } 66 | -------------------------------------------------------------------------------- /R/as_task_clust.R: -------------------------------------------------------------------------------- 1 | #' @title Convert to a Cluster Task 2 | #' 3 | #' @description 4 | #' Convert object to a [TaskClust]. 5 | #' This is a S3 generic, specialized for at least the following objects: 6 | #' 7 | #' 1. [TaskClust]: ensure the identity. 8 | #' 2. [data.frame()] and [mlr3::DataBackend]: provides an alternative to calling constructor of [TaskClust]. 9 | #' 10 | #' @inheritParams mlr3::as_task 11 | #' 12 | #' @return [TaskClust]. 13 | #' @export 14 | #' @examples 15 | #' as_task_clust(datasets::USArrests) 16 | as_task_clust = function(x, ...) { 17 | UseMethod("as_task_clust") 18 | } 19 | 20 | #' @rdname as_task_clust 21 | #' @param clone (`logical(1)`)\cr 22 | #' If `TRUE`, ensures that the returned object is not the same as the input `x`. 23 | #' @export 24 | as_task_clust.TaskClust = function(x, clone = FALSE, ...) { # nolint 25 | if (clone) x$clone() else x 26 | } 27 | 28 | #' @rdname as_task_clust 29 | #' @param id (`character(1)`)\cr 30 | #' Id for the new task. 31 | #' Defaults to the (deparsed and substituted) name of the data argument. 32 | #' @export 33 | as_task_clust.data.frame = function(x, id = deparse1(substitute(x)), ...) { # nolint 34 | force(id) 35 | 36 | ii = which(map_lgl(keep(x, is.double), anyInfinite)) 37 | if (length(ii) > 0L) { 38 | warningf("Detected columns with unsupported Inf values in data: %s", str_collapse(names(ii))) 39 | } 40 | 41 | TaskClust$new(id = id, backend = x) 42 | } 43 | 44 | #' @rdname as_task_clust 45 | #' @export 46 | as_task_clust.DataBackend = function(x, id = deparse1(substitute(x)), ...) { # nolint 47 | force(id) 48 | 49 | TaskClust$new(id = id, backend = x) 50 | } 51 | 52 | #' @rdname as_task_clust 53 | #' @param data (`data.frame()`)\cr 54 | #' Data frame containing all columns specified in formula `x`. 55 | #' @export 56 | as_task_clust.formula = function(x, data, id = deparse1(substitute(data)), ...) { # nolint 57 | force(id) 58 | 59 | assert_data_frame(data) 60 | assert_subset(all.vars(x), c(names(data), "."), .var.name = "formula") 61 | if (attributes(terms(x, data = data))$response) { 62 | stopf("Formula %s has a response.", format(x)) 63 | } 64 | tab = model.frame(x, data, na.action = "na.pass") 65 | attr(tab, "terms") = attr(tab, "na.action") = NULL 66 | 67 | as_task_clust(tab, id = id, ...) 68 | } 69 | -------------------------------------------------------------------------------- /R/helper.R: -------------------------------------------------------------------------------- 1 | warn_prediction_useless = function(id) { 2 | warningf("Learner '%s' doesn't predict on new data and predictions may not make sense on new data.", id) 3 | } 4 | 5 | allow_partial_matching = list( 6 | warnPartialMatchArgs = FALSE, 7 | warnPartialMatchAttr = FALSE, 8 | warnPartialMatchDollar = FALSE 9 | ) 10 | 11 | assert_centers_param = function(centers, task, test_class, name) { 12 | if (test_class(centers) && ncol(centers) != task$ncol) { 13 | stopf("`%s` must have same number of columns as data.", name) 14 | } 15 | } 16 | 17 | check_centers = function(x) { 18 | if (test_data_frame(x) || test_int(x, lower = 1L)) { 19 | TRUE 20 | } else { 21 | "`centers` must be integer or data.frame with initial cluster centers" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /R/measures.R: -------------------------------------------------------------------------------- 1 | # we store the information about the measures in an environment. 2 | # this way, we can (a) construct them easily and (b) generate documentation. 3 | make_measure_info = function(crit, label, lower, upper, minimize, predict_type = "partition") { 4 | list(crit = crit, label = label, lower = lower, upper = upper, minimize = minimize, predict_type = predict_type) 5 | } 6 | measures = new.env(parent = emptyenv()) 7 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import checkmate 2 | #' @import data.table 3 | #' @import mlr3 4 | #' @import mlr3misc 5 | #' @import paradox 6 | #' @importFrom R6 R6Class 7 | #' @importFrom clue cl_predict 8 | #' @importFrom cluster silhouette 9 | #' @importFrom fpc cluster.stats 10 | #' @importFrom stats model.frame terms predict runif dist 11 | "_PACKAGE" 12 | 13 | mlr3cluster_tasks = new.env() 14 | mlr3cluster_learners = new.env() 15 | 16 | register_task = function(name, constructor) { 17 | if (name %chin% names(mlr3cluster_tasks)) stopf("task %s registered twice.", name) 18 | mlr3cluster_tasks[[name]] = constructor 19 | } 20 | 21 | register_learner = function(name, constructor) { 22 | if (name %chin% names(mlr3cluster_learners)) stopf("learner %s registered twice.", name) 23 | mlr3cluster_learners[[name]] = constructor 24 | } 25 | 26 | register_mlr3 = function() { 27 | # reflections 28 | mlr_reflections = utils::getFromNamespace("mlr_reflections", ns = "mlr3") 29 | mlr_reflections$task_types = mlr_reflections$task_types[!"clust"] 30 | mlr_reflections$task_types = setkeyv(rbind(mlr_reflections$task_types, rowwise_table( 31 | ~type, ~package, ~task, ~learner, ~prediction, ~prediction_data, ~measure, 32 | "clust", "mlr3cluster", "TaskClust", "LearnerClust", "PredictionClust", "PredictionDataClust", "MeasureClust" 33 | ), fill = TRUE), "type") 34 | 35 | mlr_reflections$task_col_roles$clust = mlr_reflections$task_col_roles$regr 36 | mlr_reflections$task_properties$clust = mlr_reflections$task_properties$regr 37 | mlr_reflections$learner_properties$clust = c( 38 | "missings", "partitional", "hierarchical", "exclusive", "overlapping", "fuzzy", "complete", "partial", "density" 39 | ) 40 | mlr_reflections$learner_predict_types$clust = list(partition = "partition", prob = c("partition", "prob")) 41 | mlr_reflections$measure_properties$clust = mlr_reflections$measure_properties$regr 42 | mlr_reflections$default_measures$clust = "clust.dunn" 43 | 44 | # tasks 45 | mlr_tasks = utils::getFromNamespace("mlr_tasks", ns = "mlr3") 46 | iwalk(as.list(mlr3cluster_tasks), function(task, id) mlr_tasks$add(id, task)) 47 | 48 | # learners 49 | mlr_learners = utils::getFromNamespace("mlr_learners", ns = "mlr3") 50 | iwalk(as.list(mlr3cluster_learners), function(learner, id) mlr_learners$add(id, learner)) 51 | 52 | # measures 53 | mlr_measures = utils::getFromNamespace("mlr_measures", ns = "mlr3") 54 | mlr_measures$add("clust.silhouette", MeasureClustSil, name = "silhouette", label = "Silhouette") 55 | mlr_measures$add("clust.dunn", MeasureClustFPC, name = "dunn", label = "Dunn") 56 | mlr_measures$add("clust.ch", MeasureClustFPC, name = "ch", label = "Calinski Harabasz") 57 | mlr_measures$add("clust.wss", MeasureClustFPC, name = "wss", label = "Within Sum of Squares") 58 | } 59 | 60 | .onLoad = function(libname, pkgname) { 61 | backports::import(pkgname) 62 | 63 | register_namespace_callback(pkgname, "mlr3", register_mlr3) 64 | } 65 | 66 | .onUnload = function(libpaths) { # nolint 67 | walk(names(mlr3cluster_tasks), function(id) mlr_tasks$remove(id)) 68 | walk(names(mlr3cluster_learners), function(id) mlr_learners$remove(id)) 69 | walk(names(measures), function(id) mlr_measures$remove(paste("clust", id, sep = "."))) 70 | 71 | mlr_reflections$task_types = mlr_reflections$task_types[!"clust"] 72 | reflections = c( 73 | "measure_properties", 74 | "default_measures", 75 | "learner_properties", 76 | "learner_predict_types", 77 | "task_properties", 78 | "task_col_roles" 79 | ) 80 | walk(reflections, function(x) mlr_reflections[[x]] = remove_named(mlr_reflections[[x]], "clust")) 81 | } 82 | 83 | leanify_package() 84 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | 15 | library(data.table) 16 | library(mlr3cluster) 17 | library(mlr3misc) 18 | lrn_clust = as.data.table(mlr3::mlr_learners)[task_type == "clust", .(key, label, packages)] 19 | msr_clust = as.data.table(mlr3::mlr_measures)[task_type == "clust", .(key, label, packages)] 20 | ``` 21 | 22 | # mlr3cluster 23 | 24 | Package website: [release](https://mlr3cluster.mlr-org.com/) \| [dev](https://mlr3cluster.mlr-org.com/dev/) 25 | 26 | Cluster analysis for **[mlr3](https://github.com/mlr-org/mlr3/)**. 27 | 28 | 29 | [![r-cmd-check](https://github.com/mlr-org/mlr3cluster/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3cluster/actions/workflows/r-cmd-check.yml) 30 | [![CRAN status](https://www.r-pkg.org/badges/version/mlr3cluster)](https://CRAN.R-project.org/package=mlr3cluster) 31 | [![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) 32 | [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) 33 | 34 | 35 | **mlr3cluster** is an extension package for cluster analysis within the **[mlr3](https://github.com/mlr-org/mlr3)** ecosystem. It is a successor of clustering capabilities of **[mlr2](https://github.com/mlr-org/mlr)**. 36 | 37 | ## Installation 38 | 39 | Install the last release from CRAN: 40 | 41 | ```{r, eval = FALSE} 42 | install.packages("mlr3cluster") 43 | ``` 44 | 45 | Install the development version from GitHub: 46 | 47 | ```{r, eval = FALSE} 48 | # install.packages("pak") 49 | pak::pak("mlr-org/mlr3cluster") 50 | ``` 51 | 52 | ## Feature Overview 53 | 54 | The current version of **mlr3cluster** contains: 55 | 56 | - A selection of `r nrow(lrn_clust)` clustering learners that represent a wide variety of clusterers: partitional, hierarchical, fuzzy, etc. 57 | - A selection of `r nrow(msr_clust)` performance measures 58 | - Two built-in tasks to get started with clustering 59 | 60 | Also, the package is integrated with **[mlr3viz](https://github.com/mlr-org/mlr3viz)** which enables you to create great visualizations with just one line of code! 61 | 62 | ## Cluster Analysis 63 | 64 | ### Cluster Learners 65 | 66 | ```{r, echo = FALSE} 67 | cran_pkg = function(pkgs) { 68 | pkgs = fifelse( 69 | pkgs %chin% c("stats", "graphics", "datasets"), 70 | pkgs, sprintf("[%1$s](https://cran.r-project.org/package=%1$s)", pkgs) 71 | ) 72 | toString(pkgs) 73 | } 74 | 75 | lrn_clust[, packages := map(packages, function(x) setdiff(x, c("mlr3", "mlr3cluster")))] 76 | lrn_clust[, `:=`( 77 | key = sprintf("[%1$s](https://mlr3cluster.mlr-org.com/reference/mlr_learners_%1$s)", key), 78 | packages = map_chr(packages, cran_pkg) 79 | )] 80 | knitr::kable(lrn_clust, format = "markdown", col.names = tools::toTitleCase(names(lrn_clust))) 81 | ``` 82 | 83 | ### Cluster Measures 84 | 85 | ```{r, echo = FALSE} 86 | msr_clust[, packages := map(packages, function(x) setdiff(x, c("mlr3", "mlr3cluster")))] 87 | msr_clust[, `:=`( 88 | key = sprintf("[%1$s](https://mlr3cluster.mlr-org.com/reference/mlr_measures_%1$s)", key), 89 | packages = map_chr(packages, cran_pkg) 90 | )] 91 | knitr::kable(msr_clust, format = "markdown", col.names = tools::toTitleCase(names(msr_clust))) 92 | ``` 93 | 94 | 95 | ## Example 96 | 97 | ```{r, eval = FALSE} 98 | library(mlr3) 99 | library(mlr3cluster) 100 | 101 | task = tsk("usarrests") 102 | learner = lrn("clust.kmeans") 103 | learner$train(task) 104 | prediction = learner$predict(task = task) 105 | ``` 106 | 107 | ## More Resources 108 | 109 | Check out the **[blogpost](https://www.r-bloggers.com/2020/10/introducing-mlr3cluster-cluster-analysis-package/)** for a more detailed introduction to the package. 110 | Also, **[mlr3book](https://mlr3book.mlr-org.com/chapters/chapter13/beyond_regression_and_classification.html#sec-cluster)** has a section on clustering. 111 | 112 | ## Future Plans 113 | 114 | - Add more learners and measures 115 | - Integrate the package with **[mlr3pipelines](https://github.com/mlr-org/mlr3pipelines)** (work in progress) 116 | 117 | If you have any questions, feedback or ideas, feel free to open an issue [here](https://github.com/mlr-org/mlr3cluster/issues). 118 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | 5 | ## revdepcheck results 6 | 7 | We checked 3 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 8 | 9 | * We saw 0 new problems 10 | * We failed to check 0 packages 11 | -------------------------------------------------------------------------------- /man-roxygen/example.R: -------------------------------------------------------------------------------- 1 | <% 2 | lrn = mlr3::lrn(id) 3 | pkgs = setdiff(lrn$packages, c("mlr3", "mlr3cluster")) 4 | if (length(pkgs) == 0L) { pkgs = "mlr3" } 5 | pkgs = paste0(sprintf('requireNamespace("%s")', pkgs), collapse = " && ") 6 | %> 7 | #' @examples 8 | #' if (<%= pkgs %>) { 9 | #' learner = mlr3::lrn("<%= id %>") 10 | #' print(learner) 11 | #' 12 | #' # available parameters: 13 | #' learner$param_set$ids() 14 | #' } 15 | -------------------------------------------------------------------------------- /man-roxygen/learner.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [mlr3::Learner] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_learners] or with the associated sugar function [mlr3::lrn()]: 3 | #' ``` 4 | #' mlr_learners$get("<%= id %>") 5 | #' lrn("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))` 10 | #' @md 11 | #' 12 | #' @section Parameters: 13 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)` 14 | #' @md 15 | -------------------------------------------------------------------------------- /man-roxygen/measure_fpc.R: -------------------------------------------------------------------------------- 1 | #' @usage NULL 2 | #' @name mlr_measures_clust.<%= id %> 3 | #' @format [R6::R6Class()] inheriting from [MeasureClust]. 4 | #' 5 | #' @description 6 | #' The score function calls [fpc::cluster.stats()] from package \CRANpkg{fpc}. 7 | #' "<%= measures[[id]]$crit %>" is used subset output of the function call. 8 | #' 9 | #' @section Construction: 10 | #' This measures can be retrieved from the dictionary [mlr3::mlr_measures]: 11 | #' ``` 12 | #' mlr_measures$get("clust.<%= id %>") 13 | #' msr("clust.<%= id %>") 14 | #' ``` 15 | #' 16 | #' @section Meta Information: 17 | #' <% item = measures[[id]] %> 18 | #' * Range: <%= rd_format_range(item$lower, item$upper) %> 19 | #' * Minimize: `<%= item$minimize %>` 20 | #' * Required predict type: `<%= item$predict_type %>` 21 | #' 22 | #' 23 | #' @family cluster measures 24 | #' 25 | #' @seealso 26 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][mlr3::Measure]: [mlr3::mlr_measures] 27 | #' 28 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [mlr3::Measure] implementations. 29 | -------------------------------------------------------------------------------- /man-roxygen/measure_sil.R: -------------------------------------------------------------------------------- 1 | #' @usage NULL 2 | #' @name mlr_measures_clust.<%= id %> 3 | #' @format [R6::R6Class()] inheriting from [MeasureClust]. 4 | #' 5 | #' @description 6 | #' The score function calls [cluster::silhouette()] from package \CRANpkg{cluster}. 7 | #' "<%= measures[[id]]$crit %>" is used subset output of the function call. 8 | #' 9 | #' @section Construction: 10 | #' This measures can be retrieved from the dictionary [mlr3::mlr_measures]: 11 | #' ``` 12 | #' mlr_measures$get("clust.<%= id %>") 13 | #' msr("clust.<%= id %>") 14 | #' ``` 15 | #' 16 | #' @section Meta Information: 17 | #' <% item = measures[[id]] %> 18 | #' * Range: <%= rd_format_range(item$lower, item$upper) %> 19 | #' * Minimize: `<%= item$minimize %>` 20 | #' * Required predict type: `<%= item$predict_type %>` 21 | #' 22 | #' 23 | #' @family cluster measures 24 | #' 25 | #' @seealso 26 | #' [Dictionary][mlr3misc::Dictionary] of [Measures][mlr3::Measure]: [mlr3::mlr_measures] 27 | #' 28 | #' `as.data.table(mlr_measures)` for a complete table of all (also dynamically created) [mlr3::Measure] implementations. 29 | -------------------------------------------------------------------------------- /man-roxygen/param_aggregator.R: -------------------------------------------------------------------------------- 1 | #' @param aggregator (`function(x)`)\cr 2 | #' Function to aggregate individual performance scores `x` where `x` is a numeric vector. 3 | #' If `NULL`, defaults to [mean()]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_average.R: -------------------------------------------------------------------------------- 1 | #' @param average (`character(1)`)\cr 2 | #' How to average multiple [mlr3::Prediction]s from a [ResampleResult]. 3 | #' 4 | #' The default, `"macro"`, calculates the individual performances scores for each [mlr3::Prediction] and then uses the 5 | #' function defined in `$aggregator` to average them to a single number. 6 | #' 7 | #' If set to `"micro"`, the individual [mlr3::Prediction] objects are first combined into a single new [mlr3::Prediction] object which is then used to assess the performance. 8 | #' The function in `$aggregator` is not used in this case. 9 | -------------------------------------------------------------------------------- /man-roxygen/param_backend.R: -------------------------------------------------------------------------------- 1 | #' @param backend ([mlr3::DataBackend])\cr 2 | #' Either a [mlr3::DataBackend], or any object which is convertible to a [mlr3::DataBackend] with `as_data_backend()`. 3 | #' E.g., a `data.frame()` will be converted to a [mlr3::DataBackendDataTable]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_data_formats.R: -------------------------------------------------------------------------------- 1 | #' @param data_formats (`character()`)\cr 2 | #' Set of supported data formats which can be processed during `$train()` and `$predict()`, 3 | #' e.g. `"data.table"`. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_feature_types.R: -------------------------------------------------------------------------------- 1 | #' @param feature_types (`character()`)\cr 2 | #' Feature types the learner operates on. Must be a subset of [`mlr_reflections$task_feature_types`][mlr3::mlr_reflections]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_id.R: -------------------------------------------------------------------------------- 1 | #' @param id (`character(1)`)\cr 2 | #' Identifier for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_label.R: -------------------------------------------------------------------------------- 1 | #' @param label (`character(1)`)\cr 2 | #' Label for the new instance. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_learner_properties.R: -------------------------------------------------------------------------------- 1 | #' @param properties (`character()`)\cr 2 | #' Set of properties of the [mlr3::Learner]. 3 | #' Must be a subset of [`mlr_reflections$learner_properties`][mlr3::mlr_reflections]. 4 | #' The following properties are currently standardized and understood by learners in \CRANpkg{mlr3}: 5 | #' * `"missings"`: The learner can handle missing values in the data. 6 | #' * `"weights"`: The learner supports observation weights. 7 | #' * `"importance"`: The learner supports extraction of importance scores, i.e. comes with an `$importance()` extractor function (see section on optional extractors in [mlr3::Learner]). 8 | #' * `"selected_features"`: The learner supports extraction of the set of selected features, i.e. comes with a `$selected_features()` extractor function (see section on optional extractors in [mlr3::Learner]). 9 | #' * `"oob_error"`: The learner supports extraction of estimated out of bag error, i.e. comes with a `oob_error()` extractor function (see section on optional extractors in [mlr3::Learner]). 10 | -------------------------------------------------------------------------------- /man-roxygen/param_man.R: -------------------------------------------------------------------------------- 1 | #' @param man (`character(1)`)\cr 2 | #' String in the format `[pkg]::[topic]` pointing to a manual page for this object. 3 | #' The referenced help package can be opened via method `$help()`. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_measure_properties.R: -------------------------------------------------------------------------------- 1 | #' @param properties (`character()`)\cr 2 | #' Properties of the measure. 3 | #' Must be a subset of [mlr_reflections$measure_properties][mlr3::mlr_reflections]. 4 | #' Supported by `mlr3`: 5 | #' * `"requires_task"` (requires the complete [mlr3::Task]), 6 | #' * `"requires_learner"` (requires the trained [mlr3::Learner]), 7 | #' * `"requires_train_set"` (requires the training indices from the [mlr3::Resampling]), and 8 | #' * `"na_score"` (the measure is expected to occasionally return `NA` or `NaN`). 9 | -------------------------------------------------------------------------------- /man-roxygen/param_measures.R: -------------------------------------------------------------------------------- 1 | #' @param measures ([Measure] | list of [Measure])\cr 2 | #' Measure(s) to calculate. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_minimize.R: -------------------------------------------------------------------------------- 1 | #' @param minimize (`logical(1)`)\cr 2 | #' Set to `TRUE` if good predictions correspond to small values, 3 | #' and to `FALSE` if good predictions correspond to large values. 4 | #' If set to `NA` (default), tuning this measure is not possible. 5 | -------------------------------------------------------------------------------- /man-roxygen/param_packages.R: -------------------------------------------------------------------------------- 1 | #' @param packages (`character()`)\cr 2 | #' Set of required packages. 3 | #' A warning is signaled by the constructor if at least one of the packages is not installed, 4 | #' but loaded (not attached) later on-demand via [requireNamespace()]. 5 | -------------------------------------------------------------------------------- /man-roxygen/param_param_set.R: -------------------------------------------------------------------------------- 1 | #' @param param_set ([paradox::ParamSet])\cr 2 | #' Set of hyperparameters. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_sets.R: -------------------------------------------------------------------------------- 1 | #' @param predict_sets (`character()`)\cr 2 | #' Prediction sets to operate on, used in `aggregate()` to extract the matching `predict_sets` from the [ResampleResult]. 3 | #' Multiple predict sets are calculated by the respective [mlr3::Learner] during [resample()]/[benchmark()]. 4 | #' Must be a non-empty subset of `{"train", "test"}`. 5 | #' If multiple sets are provided, these are first combined to a single prediction object. 6 | #' Default is `"test"`. 7 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_type.R: -------------------------------------------------------------------------------- 1 | #' @param predict_type (`character(1)`)\cr 2 | #' Required predict type of the [mlr3::Learner]. 3 | #' Possible values are stored in [mlr_reflections$learner_predict_types][mlr3::mlr_reflections]. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_predict_types.R: -------------------------------------------------------------------------------- 1 | #' @param predict_types (`character()`)\cr 2 | #' Supported predict types. Must be a subset of [`mlr_reflections$learner_predict_types`][mlr3::mlr_reflections]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_primary_key.R: -------------------------------------------------------------------------------- 1 | #' @param primary_key (`character(1)` | `integer()`)\cr 2 | #' Name of the primary key column, or integer vector of row ids. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_range.R: -------------------------------------------------------------------------------- 1 | #' @param range (`numeric(2)`)\cr 2 | #' Feasible range for this measure as `c(lower_bound, upper_bound)`. 3 | #' Both bounds may be infinite. 4 | -------------------------------------------------------------------------------- /man-roxygen/param_rows.R: -------------------------------------------------------------------------------- 1 | #' @param rows `integer()`\cr 2 | #' Row indices. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_task_properties.R: -------------------------------------------------------------------------------- 1 | #' @param task_properties (`character()`)\cr 2 | #' Required task properties, see [mlr3::Task]. 3 | -------------------------------------------------------------------------------- /man-roxygen/param_task_type.R: -------------------------------------------------------------------------------- 1 | #' @param task_type (`character(1)`)\cr 2 | #' Type of task, e.g. `"regr"` or `"classif"`. 3 | #' Must be an element of [mlr_reflections$task_types$type][mlr3::mlr_reflections]. 4 | -------------------------------------------------------------------------------- /man-roxygen/seealso_learner.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} 5 | #' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners. 6 | #' * [Dictionary][mlr3misc::Dictionary] of [Learners][mlr3::Learner]: [mlr3::mlr_learners] 7 | #' * `as.data.table(mlr_learners)` for a table of available [Learners][mlr3::Learner] in the running session (depending on the loaded packages). 8 | #' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. 9 | #' * Extension packages for additional task types: 10 | #' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 11 | #' * \CRANpkg{mlr3cluster} for unsupervised clustering. 12 | #' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} 13 | #' for established default tuning spaces. 14 | #' 15 | #' @family Learner 16 | -------------------------------------------------------------------------------- /man-roxygen/seealso_task.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' 3 | #' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): 4 | #' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} 5 | #' * Package \CRANpkg{mlr3data} for more toy tasks. 6 | #' * Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. 7 | #' * Package \CRANpkg{mlr3viz} for some generic visualizations. 8 | #' * [Dictionary][mlr3misc::Dictionary] of [Tasks][mlr3::Task]: [mlr3::mlr_tasks] 9 | #' * `as.data.table(mlr_tasks)` for a table of available [Tasks][mlr3::Task] in the running session (depending on the loaded packages). 10 | #' * \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. 11 | #' * Extension packages for additional task types: 12 | #' * Unsupervised clustering: \CRANpkg{mlr3cluster} 13 | #' * Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. 14 | #' 15 | #' @family Task 16 | -------------------------------------------------------------------------------- /man-roxygen/task.R: -------------------------------------------------------------------------------- 1 | #' @section Dictionary: 2 | #' This [mlr3::Task] can be instantiated via the [dictionary][mlr3misc::Dictionary] [mlr3::mlr_tasks] or with the associated sugar function [mlr3::tsk()]: 3 | #' ``` 4 | #' mlr_tasks$get("<%= id %>") 5 | #' tsk("<%= id %>") 6 | #' ``` 7 | #' 8 | #' @section Meta Information: 9 | #' `r mlr3misc::rd_info(mlr3::tsk("<%= id %>"))` 10 | #' @md 11 | -------------------------------------------------------------------------------- /man/LearnerClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LearnerClust.R 3 | \name{LearnerClust} 4 | \alias{LearnerClust} 5 | \title{Cluster Learner} 6 | \description{ 7 | This Learner specializes \link[mlr3:Learner]{mlr3::Learner} for cluster problems: 8 | \itemize{ 9 | \item \code{task_type} is set to \code{"clust"}. 10 | \item Creates \link[mlr3:Prediction]{mlr3::Prediction}s of class \link{PredictionClust}. 11 | \item Possible values for \code{predict_types} are: 12 | \itemize{ 13 | \item \code{"partition"}: Integer indicating the cluster membership. 14 | \item \code{"prob"}: Probability for belonging to each cluster. 15 | } 16 | } 17 | 18 | Predefined learners can be found in the \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} \link[mlr3:mlr_learners]{mlr3::mlr_learners}. 19 | } 20 | \examples{ 21 | library(mlr3) 22 | library(mlr3cluster) 23 | ids = mlr_learners$keys("^clust") 24 | ids 25 | 26 | # get a specific learner from mlr_learners: 27 | learner = lrn("clust.kmeans") 28 | print(learner) 29 | } 30 | \section{Super class}{ 31 | \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{LearnerClust} 32 | } 33 | \section{Public fields}{ 34 | \if{html}{\out{
}} 35 | \describe{ 36 | \item{\code{assignments}}{(\code{NULL} | \code{vector()})\cr 37 | Cluster assignments from learned model.} 38 | 39 | \item{\code{save_assignments}}{(\code{logical()})\cr 40 | Should assignments for 'train' data be saved in the learner? 41 | Default is \code{TRUE}.} 42 | } 43 | \if{html}{\out{
}} 44 | } 45 | \section{Methods}{ 46 | \subsection{Public methods}{ 47 | \itemize{ 48 | \item \href{#method-LearnerClust-new}{\code{LearnerClust$new()}} 49 | \item \href{#method-LearnerClust-reset}{\code{LearnerClust$reset()}} 50 | \item \href{#method-LearnerClust-clone}{\code{LearnerClust$clone()}} 51 | } 52 | } 53 | \if{html}{\out{ 54 |
Inherited methods 55 | 65 |
66 | }} 67 | \if{html}{\out{
}} 68 | \if{html}{\out{}} 69 | \if{latex}{\out{\hypertarget{method-LearnerClust-new}{}}} 70 | \subsection{Method \code{new()}}{ 71 | Creates a new instance of this \link[R6:R6Class]{R6} class. 72 | \subsection{Usage}{ 73 | \if{html}{\out{
}}\preformatted{LearnerClust$new( 74 | id, 75 | param_set = ps(), 76 | predict_types = "partition", 77 | feature_types = character(), 78 | properties = character(), 79 | packages = character(), 80 | label = NA_character_, 81 | man = NA_character_ 82 | )}\if{html}{\out{
}} 83 | } 84 | 85 | \subsection{Arguments}{ 86 | \if{html}{\out{
}} 87 | \describe{ 88 | \item{\code{id}}{(\code{character(1)})\cr 89 | Identifier for the new instance.} 90 | 91 | \item{\code{param_set}}{(\link[paradox:ParamSet]{paradox::ParamSet})\cr 92 | Set of hyperparameters.} 93 | 94 | \item{\code{predict_types}}{(\code{character()})\cr 95 | Supported predict types. Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$learner_predict_types}}.} 96 | 97 | \item{\code{feature_types}}{(\code{character()})\cr 98 | Feature types the learner operates on. Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$task_feature_types}}.} 99 | 100 | \item{\code{properties}}{(\code{character()})\cr 101 | Set of properties of the \link[mlr3:Learner]{mlr3::Learner}. 102 | Must be a subset of \code{\link[mlr3:mlr_reflections]{mlr_reflections$learner_properties}}. 103 | The following properties are currently standardized and understood by learners in \CRANpkg{mlr3}: 104 | \itemize{ 105 | \item \code{"missings"}: The learner can handle missing values in the data. 106 | \item \code{"weights"}: The learner supports observation weights. 107 | \item \code{"importance"}: The learner supports extraction of importance scores, i.e. comes with an \verb{$importance()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}). 108 | \item \code{"selected_features"}: The learner supports extraction of the set of selected features, i.e. comes with a \verb{$selected_features()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}). 109 | \item \code{"oob_error"}: The learner supports extraction of estimated out of bag error, i.e. comes with a \code{oob_error()} extractor function (see section on optional extractors in \link[mlr3:Learner]{mlr3::Learner}). 110 | }} 111 | 112 | \item{\code{packages}}{(\code{character()})\cr 113 | Set of required packages. 114 | A warning is signaled by the constructor if at least one of the packages is not installed, 115 | but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} 116 | 117 | \item{\code{label}}{(\code{character(1)})\cr 118 | Label for the new instance.} 119 | 120 | \item{\code{man}}{(\code{character(1)})\cr 121 | String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. 122 | The referenced help package can be opened via method \verb{$help()}.} 123 | } 124 | \if{html}{\out{
}} 125 | } 126 | } 127 | \if{html}{\out{
}} 128 | \if{html}{\out{}} 129 | \if{latex}{\out{\hypertarget{method-LearnerClust-reset}{}}} 130 | \subsection{Method \code{reset()}}{ 131 | Reset \code{assignments} field before calling parent's \code{reset()}. 132 | \subsection{Usage}{ 133 | \if{html}{\out{
}}\preformatted{LearnerClust$reset()}\if{html}{\out{
}} 134 | } 135 | 136 | } 137 | \if{html}{\out{
}} 138 | \if{html}{\out{}} 139 | \if{latex}{\out{\hypertarget{method-LearnerClust-clone}{}}} 140 | \subsection{Method \code{clone()}}{ 141 | The objects of this class are cloneable with this method. 142 | \subsection{Usage}{ 143 | \if{html}{\out{
}}\preformatted{LearnerClust$clone(deep = FALSE)}\if{html}{\out{
}} 144 | } 145 | 146 | \subsection{Arguments}{ 147 | \if{html}{\out{
}} 148 | \describe{ 149 | \item{\code{deep}}{Whether to make a deep clone.} 150 | } 151 | \if{html}{\out{
}} 152 | } 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /man/MeasureClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureClust.R 3 | \name{MeasureClust} 4 | \alias{MeasureClust} 5 | \title{Cluster Measure} 6 | \description{ 7 | This measure specializes \link[mlr3:Measure]{mlr3::Measure} for cluster analysis: 8 | \itemize{ 9 | \item \code{task_type} is set to \code{"clust"}. 10 | \item Possible values for \code{predict_type} are \code{"partition"} and \code{"prob"}. 11 | } 12 | 13 | Predefined measures can be found in the \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} \link[mlr3:mlr_measures]{mlr3::mlr_measures}. 14 | } 15 | \seealso{ 16 | Example cluster measures: \code{\link[=mlr_measures_clust.dunn]{clust.dunn}} 17 | } 18 | \section{Super class}{ 19 | \code{\link[mlr3:Measure]{mlr3::Measure}} -> \code{MeasureClust} 20 | } 21 | \section{Methods}{ 22 | \subsection{Public methods}{ 23 | \itemize{ 24 | \item \href{#method-MeasureClust-new}{\code{MeasureClust$new()}} 25 | } 26 | } 27 | \if{html}{\out{ 28 |
Inherited methods 29 | 36 |
37 | }} 38 | \if{html}{\out{
}} 39 | \if{html}{\out{}} 40 | \if{latex}{\out{\hypertarget{method-MeasureClust-new}{}}} 41 | \subsection{Method \code{new()}}{ 42 | Creates a new instance of this \link[R6:R6Class]{R6} class. 43 | \subsection{Usage}{ 44 | \if{html}{\out{
}}\preformatted{MeasureClust$new( 45 | id, 46 | range, 47 | minimize = NA, 48 | aggregator = NULL, 49 | properties = character(), 50 | predict_type = "partition", 51 | task_properties = character(), 52 | packages = character(), 53 | label = NA_character_, 54 | man = NA_character_ 55 | )}\if{html}{\out{
}} 56 | } 57 | 58 | \subsection{Arguments}{ 59 | \if{html}{\out{
}} 60 | \describe{ 61 | \item{\code{id}}{(\code{character(1)})\cr 62 | Identifier for the new instance.} 63 | 64 | \item{\code{range}}{(\code{numeric(2)})\cr 65 | Feasible range for this measure as \code{c(lower_bound, upper_bound)}. 66 | Both bounds may be infinite.} 67 | 68 | \item{\code{minimize}}{(\code{logical(1)})\cr 69 | Set to \code{TRUE} if good predictions correspond to small values, 70 | and to \code{FALSE} if good predictions correspond to large values. 71 | If set to \code{NA} (default), tuning this measure is not possible.} 72 | 73 | \item{\code{aggregator}}{(\verb{function(x)})\cr 74 | Function to aggregate individual performance scores \code{x} where \code{x} is a numeric vector. 75 | If \code{NULL}, defaults to \code{\link[=mean]{mean()}}.} 76 | 77 | \item{\code{properties}}{(\code{character()})\cr 78 | Properties of the measure. 79 | Must be a subset of \link[mlr3:mlr_reflections]{mlr_reflections$measure_properties}. 80 | Supported by \code{mlr3}: 81 | \itemize{ 82 | \item \code{"requires_task"} (requires the complete \link[mlr3:Task]{mlr3::Task}), 83 | \item \code{"requires_learner"} (requires the trained \link[mlr3:Learner]{mlr3::Learner}), 84 | \item \code{"requires_train_set"} (requires the training indices from the \link[mlr3:Resampling]{mlr3::Resampling}), and 85 | \item \code{"na_score"} (the measure is expected to occasionally return \code{NA} or \code{NaN}). 86 | }} 87 | 88 | \item{\code{predict_type}}{(\code{character(1)})\cr 89 | Required predict type of the \link[mlr3:Learner]{mlr3::Learner}. 90 | Possible values are stored in \link[mlr3:mlr_reflections]{mlr_reflections$learner_predict_types}.} 91 | 92 | \item{\code{task_properties}}{(\code{character()})\cr 93 | Required task properties, see \link[mlr3:Task]{mlr3::Task}.} 94 | 95 | \item{\code{packages}}{(\code{character()})\cr 96 | Set of required packages. 97 | A warning is signaled by the constructor if at least one of the packages is not installed, 98 | but loaded (not attached) later on-demand via \code{\link[=requireNamespace]{requireNamespace()}}.} 99 | 100 | \item{\code{label}}{(\code{character(1)})\cr 101 | Label for the new instance.} 102 | 103 | \item{\code{man}}{(\code{character(1)})\cr 104 | String in the format \verb{[pkg]::[topic]} pointing to a manual page for this object. 105 | The referenced help package can be opened via method \verb{$help()}.} 106 | } 107 | \if{html}{\out{
}} 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /man/PredictionClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictionClust.R 3 | \name{PredictionClust} 4 | \alias{PredictionClust} 5 | \title{Prediction Object for Cluster Analysis} 6 | \description{ 7 | This object wraps the predictions returned by a learner of class \link{LearnerClust}, i.e. 8 | the predicted partition and cluster probability. 9 | } 10 | \examples{ 11 | library(mlr3) 12 | library(mlr3cluster) 13 | task = tsk("usarrests") 14 | learner = lrn("clust.kmeans") 15 | p = learner$train(task)$predict(task) 16 | p$predict_types 17 | head(as.data.table(p)) 18 | } 19 | \concept{Prediction} 20 | \section{Super class}{ 21 | \code{\link[mlr3:Prediction]{mlr3::Prediction}} -> \code{PredictionClust} 22 | } 23 | \section{Active bindings}{ 24 | \if{html}{\out{
}} 25 | \describe{ 26 | \item{\code{partition}}{(\code{integer()})\cr 27 | Access the stored partition.} 28 | 29 | \item{\code{prob}}{(\code{matrix()})\cr 30 | Access to the stored probabilities.} 31 | } 32 | \if{html}{\out{
}} 33 | } 34 | \section{Methods}{ 35 | \subsection{Public methods}{ 36 | \itemize{ 37 | \item \href{#method-PredictionClust-new}{\code{PredictionClust$new()}} 38 | \item \href{#method-PredictionClust-clone}{\code{PredictionClust$clone()}} 39 | } 40 | } 41 | \if{html}{\out{ 42 |
Inherited methods 43 | 51 |
52 | }} 53 | \if{html}{\out{
}} 54 | \if{html}{\out{}} 55 | \if{latex}{\out{\hypertarget{method-PredictionClust-new}{}}} 56 | \subsection{Method \code{new()}}{ 57 | Creates a new instance of this \link[R6:R6Class]{R6} class. 58 | \subsection{Usage}{ 59 | \if{html}{\out{
}}\preformatted{PredictionClust$new( 60 | task = NULL, 61 | row_ids = task$row_ids, 62 | partition = NULL, 63 | prob = NULL, 64 | check = TRUE 65 | )}\if{html}{\out{
}} 66 | } 67 | 68 | \subsection{Arguments}{ 69 | \if{html}{\out{
}} 70 | \describe{ 71 | \item{\code{task}}{(\link{TaskClust})\cr 72 | Task, used to extract defaults for \code{row_ids}.} 73 | 74 | \item{\code{row_ids}}{(\code{integer()})\cr 75 | Row ids of the predicted observations, i.e. the row ids of the test set.} 76 | 77 | \item{\code{partition}}{(\code{integer()})\cr 78 | Vector of cluster partitions.} 79 | 80 | \item{\code{prob}}{(\code{matrix()})\cr 81 | Numeric matrix of cluster membership probabilities with one column for each cluster 82 | and one row for each observation. 83 | Columns must be named with cluster numbers, row names are automatically removed. 84 | If \code{prob} is provided, but \code{partition} is not, the cluster memberships are calculated from 85 | the probabilities using \code{\link[=max.col]{max.col()}} with \code{ties.method} set to \code{"first"}.} 86 | 87 | \item{\code{check}}{(\code{logical(1)})\cr 88 | If \code{TRUE}, performs some argument checks and predict type conversions.} 89 | } 90 | \if{html}{\out{
}} 91 | } 92 | } 93 | \if{html}{\out{
}} 94 | \if{html}{\out{}} 95 | \if{latex}{\out{\hypertarget{method-PredictionClust-clone}{}}} 96 | \subsection{Method \code{clone()}}{ 97 | The objects of this class are cloneable with this method. 98 | \subsection{Usage}{ 99 | \if{html}{\out{
}}\preformatted{PredictionClust$clone(deep = FALSE)}\if{html}{\out{
}} 100 | } 101 | 102 | \subsection{Arguments}{ 103 | \if{html}{\out{
}} 104 | \describe{ 105 | \item{\code{deep}}{Whether to make a deep clone.} 106 | } 107 | \if{html}{\out{
}} 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /man/TaskClust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskClust.R 3 | \name{TaskClust} 4 | \alias{TaskClust} 5 | \title{Cluster Task} 6 | \description{ 7 | This task specializes \link[mlr3:Task]{mlr3::Task} for cluster problems. 8 | As an unsupervised task, this task has no target column. 9 | The \code{task_type} is set to \code{"clust"}. 10 | 11 | Predefined tasks are stored in the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks}. 12 | } 13 | \examples{ 14 | library(mlr3) 15 | library(mlr3cluster) 16 | task = TaskClust$new("usarrests", backend = USArrests) 17 | task$task_type 18 | 19 | # possible properties: 20 | mlr_reflections$task_properties$clust 21 | } 22 | \seealso{ 23 | Other Task: 24 | \code{\link{mlr_tasks_ruspini}}, 25 | \code{\link{mlr_tasks_usarrests}} 26 | } 27 | \concept{Task} 28 | \section{Super classes}{ 29 | \code{\link[mlr3:Task]{mlr3::Task}} -> \code{\link[mlr3:TaskUnsupervised]{mlr3::TaskUnsupervised}} -> \code{TaskClust} 30 | } 31 | \section{Methods}{ 32 | \subsection{Public methods}{ 33 | \itemize{ 34 | \item \href{#method-TaskClust-new}{\code{TaskClust$new()}} 35 | \item \href{#method-TaskClust-clone}{\code{TaskClust$clone()}} 36 | } 37 | } 38 | \if{html}{\out{ 39 |
Inherited methods 40 | 61 |
62 | }} 63 | \if{html}{\out{
}} 64 | \if{html}{\out{}} 65 | \if{latex}{\out{\hypertarget{method-TaskClust-new}{}}} 66 | \subsection{Method \code{new()}}{ 67 | Creates a new instance of this \link[R6:R6Class]{R6} class. 68 | \subsection{Usage}{ 69 | \if{html}{\out{
}}\preformatted{TaskClust$new(id, backend, label = NA_character_)}\if{html}{\out{
}} 70 | } 71 | 72 | \subsection{Arguments}{ 73 | \if{html}{\out{
}} 74 | \describe{ 75 | \item{\code{id}}{(\code{character(1)})\cr 76 | Identifier for the new instance.} 77 | 78 | \item{\code{backend}}{(\link[mlr3:DataBackend]{mlr3::DataBackend})\cr 79 | Either a \link[mlr3:DataBackend]{mlr3::DataBackend}, or any object which is convertible to a \link[mlr3:DataBackend]{mlr3::DataBackend} with \code{as_data_backend()}. 80 | E.g., a \code{data.frame()} will be converted to a \link[mlr3:DataBackendDataTable]{mlr3::DataBackendDataTable}.} 81 | 82 | \item{\code{label}}{(\code{character(1)})\cr 83 | Label for the new instance.} 84 | } 85 | \if{html}{\out{
}} 86 | } 87 | } 88 | \if{html}{\out{
}} 89 | \if{html}{\out{}} 90 | \if{latex}{\out{\hypertarget{method-TaskClust-clone}{}}} 91 | \subsection{Method \code{clone()}}{ 92 | The objects of this class are cloneable with this method. 93 | \subsection{Usage}{ 94 | \if{html}{\out{
}}\preformatted{TaskClust$clone(deep = FALSE)}\if{html}{\out{
}} 95 | } 96 | 97 | \subsection{Arguments}{ 98 | \if{html}{\out{
}} 99 | \describe{ 100 | \item{\code{deep}}{Whether to make a deep clone.} 101 | } 102 | \if{html}{\out{
}} 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /man/as_prediction_clust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_prediction_clust.R 3 | \name{as_prediction_clust} 4 | \alias{as_prediction_clust} 5 | \alias{as_prediction_clust.PredictionClust} 6 | \alias{as_prediction_clust.data.frame} 7 | \title{Convert to a Cluster Prediction} 8 | \usage{ 9 | as_prediction_clust(x, ...) 10 | 11 | \method{as_prediction_clust}{PredictionClust}(x, ...) 12 | 13 | \method{as_prediction_clust}{data.frame}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{(any)\cr 17 | Object to convert.} 18 | 19 | \item{...}{(any)\cr 20 | Additional arguments.} 21 | } 22 | \value{ 23 | \link{PredictionClust}. 24 | } 25 | \description{ 26 | Convert object to a \link{PredictionClust}. 27 | } 28 | \examples{ 29 | if (requireNamespace("e1071")) { 30 | # create a prediction object 31 | task = tsk("usarrests") 32 | learner = lrn("clust.kmeans") 33 | learner = lrn("clust.cmeans", predict_type = "prob") 34 | learner$train(task) 35 | p = learner$predict(task) 36 | 37 | # convert to a data.table 38 | tab = as.data.table(p) 39 | 40 | # convert back to a Prediction 41 | as_prediction_clust(tab) 42 | 43 | # split data.table into a 3 data.tables based on UrbanPop 44 | f = cut(task$data(rows = tab$row_ids)$UrbanPop, 3) 45 | tabs = split(tab, f) 46 | 47 | # convert back to list of predictions 48 | preds = lapply(tabs, as_prediction_clust) 49 | 50 | # calculate performance in each group 51 | sapply(preds, function(p) p$score(task = task)) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /man/as_task_clust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_task_clust.R 3 | \name{as_task_clust} 4 | \alias{as_task_clust} 5 | \alias{as_task_clust.TaskClust} 6 | \alias{as_task_clust.data.frame} 7 | \alias{as_task_clust.DataBackend} 8 | \alias{as_task_clust.formula} 9 | \title{Convert to a Cluster Task} 10 | \usage{ 11 | as_task_clust(x, ...) 12 | 13 | \method{as_task_clust}{TaskClust}(x, clone = FALSE, ...) 14 | 15 | \method{as_task_clust}{data.frame}(x, id = deparse1(substitute(x)), ...) 16 | 17 | \method{as_task_clust}{DataBackend}(x, id = deparse1(substitute(x)), ...) 18 | 19 | \method{as_task_clust}{formula}(x, data, id = deparse1(substitute(data)), ...) 20 | } 21 | \arguments{ 22 | \item{x}{(any)\cr 23 | Object to convert.} 24 | 25 | \item{...}{(any)\cr 26 | Additional arguments.} 27 | 28 | \item{clone}{(\code{logical(1)})\cr 29 | If \code{TRUE}, ensures that the returned object is not the same as the input \code{x}.} 30 | 31 | \item{id}{(\code{character(1)})\cr 32 | Id for the new task. 33 | Defaults to the (deparsed and substituted) name of the data argument.} 34 | 35 | \item{data}{(\code{data.frame()})\cr 36 | Data frame containing all columns specified in formula \code{x}.} 37 | } 38 | \value{ 39 | \link{TaskClust}. 40 | } 41 | \description{ 42 | Convert object to a \link{TaskClust}. 43 | This is a S3 generic, specialized for at least the following objects: 44 | \enumerate{ 45 | \item \link{TaskClust}: ensure the identity. 46 | \item \code{\link[=data.frame]{data.frame()}} and \link[mlr3:DataBackend]{mlr3::DataBackend}: provides an alternative to calling constructor of \link{TaskClust}. 47 | } 48 | } 49 | \examples{ 50 | as_task_clust(datasets::USArrests) 51 | } 52 | -------------------------------------------------------------------------------- /man/mlr3cluster-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \docType{package} 4 | \name{mlr3cluster-package} 5 | \alias{mlr3cluster} 6 | \alias{mlr3cluster-package} 7 | \title{mlr3cluster: Cluster Extension for 'mlr3'} 8 | \description{ 9 | Extends the 'mlr3' package with cluster analysis. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://mlr3cluster.mlr-org.com} 15 | \item \url{https://github.com/mlr-org/mlr3cluster} 16 | \item Report bugs at \url{https://github.com/mlr-org/mlr3cluster/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Maximilian Mücke \email{muecke.maximilian@gmail.com} (\href{https://orcid.org/0009-0000-9432-9795}{ORCID}) 22 | 23 | Authors: 24 | \itemize{ 25 | \item Damir Pulatov \email{damirpolat@protonmail.com} 26 | \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID}) 27 | } 28 | 29 | Other contributors: 30 | \itemize{ 31 | \item Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) [contributor] 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/mlr_learners_clust.featureless.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LearnerClustFeatureless.R 3 | \name{mlr_learners_clust.featureless} 4 | \alias{mlr_learners_clust.featureless} 5 | \alias{LearnerClustFeatureless} 6 | \title{Featureless Clustering Learner} 7 | \description{ 8 | A simple \link{LearnerClust} which randomly (but evenly) assigns observations to 9 | \code{num_clusters} partitions (default: 1 partition). 10 | } 11 | \section{Dictionary}{ 12 | 13 | This \link[mlr3:Learner]{mlr3::Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_learners]{mlr3::mlr_learners} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::lrn()}}: 14 | 15 | \if{html}{\out{
}}\preformatted{mlr_learners$get("clust.featureless") 16 | lrn("clust.featureless") 17 | }\if{html}{\out{
}} 18 | } 19 | 20 | \section{Meta Information}{ 21 | 22 | \itemize{ 23 | \item Task type: \dQuote{clust} 24 | \item Predict Types: \dQuote{partition}, \dQuote{prob} 25 | \item Feature Types: \dQuote{logical}, \dQuote{integer}, \dQuote{numeric} 26 | \item Required Packages: \CRANpkg{mlr3}, \CRANpkg{mlr3cluster} 27 | } 28 | } 29 | 30 | \section{Parameters}{ 31 | \tabular{llll}{ 32 | Id \tab Type \tab Default \tab Range \cr 33 | num_clusters \tab integer \tab - \tab \eqn{[1, \infty)}{[1, Inf)} \cr 34 | } 35 | } 36 | 37 | \examples{ 38 | if (requireNamespace("mlr3")) { 39 | learner = mlr3::lrn("clust.featureless") 40 | print(learner) 41 | 42 | # available parameters: 43 | learner$param_set$ids() 44 | } 45 | } 46 | \seealso{ 47 | \itemize{ 48 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: 49 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} 50 | \item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners. 51 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Learner]{Learners}: \link[mlr3:mlr_learners]{mlr3::mlr_learners} 52 | \item \code{as.data.table(mlr_learners)} for a table of available \link[mlr3:Learner]{Learners} in the running session (depending on the loaded packages). 53 | \item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. 54 | \item Extension packages for additional task types: 55 | \itemize{ 56 | \item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. 57 | \item \CRANpkg{mlr3cluster} for unsupervised clustering. 58 | } 59 | \item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} 60 | for established default tuning spaces. 61 | } 62 | 63 | Other Learner: 64 | \code{\link{mlr_learners_clust.MBatchKMeans}}, 65 | \code{\link{mlr_learners_clust.SimpleKMeans}}, 66 | \code{\link{mlr_learners_clust.agnes}}, 67 | \code{\link{mlr_learners_clust.ap}}, 68 | \code{\link{mlr_learners_clust.bico}}, 69 | \code{\link{mlr_learners_clust.birch}}, 70 | \code{\link{mlr_learners_clust.cmeans}}, 71 | \code{\link{mlr_learners_clust.cobweb}}, 72 | \code{\link{mlr_learners_clust.dbscan}}, 73 | \code{\link{mlr_learners_clust.dbscan_fpc}}, 74 | \code{\link{mlr_learners_clust.diana}}, 75 | \code{\link{mlr_learners_clust.em}}, 76 | \code{\link{mlr_learners_clust.fanny}}, 77 | \code{\link{mlr_learners_clust.ff}}, 78 | \code{\link{mlr_learners_clust.hclust}}, 79 | \code{\link{mlr_learners_clust.hdbscan}}, 80 | \code{\link{mlr_learners_clust.kkmeans}}, 81 | \code{\link{mlr_learners_clust.kmeans}}, 82 | \code{\link{mlr_learners_clust.mclust}}, 83 | \code{\link{mlr_learners_clust.meanshift}}, 84 | \code{\link{mlr_learners_clust.optics}}, 85 | \code{\link{mlr_learners_clust.pam}}, 86 | \code{\link{mlr_learners_clust.xmeans}} 87 | } 88 | \concept{Learner} 89 | \section{Super classes}{ 90 | \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3cluster:LearnerClust]{mlr3cluster::LearnerClust}} -> \code{LearnerClustFeatureless} 91 | } 92 | \section{Methods}{ 93 | \subsection{Public methods}{ 94 | \itemize{ 95 | \item \href{#method-LearnerClustFeatureless-new}{\code{LearnerClustFeatureless$new()}} 96 | \item \href{#method-LearnerClustFeatureless-clone}{\code{LearnerClustFeatureless$clone()}} 97 | } 98 | } 99 | \if{html}{\out{ 100 |
Inherited methods 101 | 112 |
113 | }} 114 | \if{html}{\out{
}} 115 | \if{html}{\out{}} 116 | \if{latex}{\out{\hypertarget{method-LearnerClustFeatureless-new}{}}} 117 | \subsection{Method \code{new()}}{ 118 | Creates a new instance of this \link[R6:R6Class]{R6} class. 119 | \subsection{Usage}{ 120 | \if{html}{\out{
}}\preformatted{LearnerClustFeatureless$new()}\if{html}{\out{
}} 121 | } 122 | 123 | } 124 | \if{html}{\out{
}} 125 | \if{html}{\out{}} 126 | \if{latex}{\out{\hypertarget{method-LearnerClustFeatureless-clone}{}}} 127 | \subsection{Method \code{clone()}}{ 128 | The objects of this class are cloneable with this method. 129 | \subsection{Usage}{ 130 | \if{html}{\out{
}}\preformatted{LearnerClustFeatureless$clone(deep = FALSE)}\if{html}{\out{
}} 131 | } 132 | 133 | \subsection{Arguments}{ 134 | \if{html}{\out{
}} 135 | \describe{ 136 | \item{\code{deep}}{Whether to make a deep clone.} 137 | } 138 | \if{html}{\out{
}} 139 | } 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /man/mlr_measures_clust.ch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureClustInternal.R 3 | \name{mlr_measures_clust.ch} 4 | \alias{mlr_measures_clust.ch} 5 | \title{Calinski Harabasz Pseudo F-Statistic} 6 | \format{ 7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}. 8 | } 9 | \description{ 10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}. 11 | "ch" is used subset output of the function call. 12 | } 13 | \section{Construction}{ 14 | 15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}: 16 | 17 | \if{html}{\out{
}}\preformatted{mlr_measures$get("clust.ch") 18 | msr("clust.ch") 19 | }\if{html}{\out{
}} 20 | } 21 | 22 | \section{Meta Information}{ 23 | 24 | \itemize{ 25 | \item Range: \eqn{[0, \infty)}{[0, Inf)} 26 | \item Minimize: \code{FALSE} 27 | \item Required predict type: \code{partition} 28 | } 29 | } 30 | 31 | \seealso{ 32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures} 33 | 34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations. 35 | 36 | Other cluster measures: 37 | \code{\link{mlr_measures_clust.dunn}}, 38 | \code{\link{mlr_measures_clust.silhouette}}, 39 | \code{\link{mlr_measures_clust.wss}} 40 | } 41 | \concept{cluster measures} 42 | -------------------------------------------------------------------------------- /man/mlr_measures_clust.dunn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureClustInternal.R 3 | \name{mlr_measures_clust.dunn} 4 | \alias{mlr_measures_clust.dunn} 5 | \title{Dunn Index} 6 | \format{ 7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}. 8 | } 9 | \description{ 10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}. 11 | "dunn" is used subset output of the function call. 12 | } 13 | \section{Construction}{ 14 | 15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}: 16 | 17 | \if{html}{\out{
}}\preformatted{mlr_measures$get("clust.dunn") 18 | msr("clust.dunn") 19 | }\if{html}{\out{
}} 20 | } 21 | 22 | \section{Meta Information}{ 23 | 24 | \itemize{ 25 | \item Range: \eqn{[0, \infty)}{[0, Inf)} 26 | \item Minimize: \code{FALSE} 27 | \item Required predict type: \code{partition} 28 | } 29 | } 30 | 31 | \seealso{ 32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures} 33 | 34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations. 35 | 36 | Other cluster measures: 37 | \code{\link{mlr_measures_clust.ch}}, 38 | \code{\link{mlr_measures_clust.silhouette}}, 39 | \code{\link{mlr_measures_clust.wss}} 40 | } 41 | \concept{cluster measures} 42 | -------------------------------------------------------------------------------- /man/mlr_measures_clust.silhouette.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureClustInternal.R 3 | \name{mlr_measures_clust.silhouette} 4 | \alias{mlr_measures_clust.silhouette} 5 | \title{Rousseeuw's Silhouette Quality Index} 6 | \format{ 7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}. 8 | } 9 | \description{ 10 | The score function calls \code{\link[cluster:silhouette]{cluster::silhouette()}} from package \CRANpkg{cluster}. 11 | "sil_width" is used subset output of the function call. 12 | } 13 | \section{Construction}{ 14 | 15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}: 16 | 17 | \if{html}{\out{
}}\preformatted{mlr_measures$get("clust.silhouette") 18 | msr("clust.silhouette") 19 | }\if{html}{\out{
}} 20 | } 21 | 22 | \section{Meta Information}{ 23 | 24 | \itemize{ 25 | \item Range: \eqn{[0, \infty)}{[0, Inf)} 26 | \item Minimize: \code{FALSE} 27 | \item Required predict type: \code{partition} 28 | } 29 | } 30 | 31 | \seealso{ 32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures} 33 | 34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations. 35 | 36 | Other cluster measures: 37 | \code{\link{mlr_measures_clust.ch}}, 38 | \code{\link{mlr_measures_clust.dunn}}, 39 | \code{\link{mlr_measures_clust.wss}} 40 | } 41 | \concept{cluster measures} 42 | -------------------------------------------------------------------------------- /man/mlr_measures_clust.wss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MeasureClustInternal.R 3 | \name{mlr_measures_clust.wss} 4 | \alias{mlr_measures_clust.wss} 5 | \title{Within Sum of Squares} 6 | \format{ 7 | \code{\link[R6:R6Class]{R6::R6Class()}} inheriting from \link{MeasureClust}. 8 | } 9 | \description{ 10 | The score function calls \code{\link[fpc:cluster.stats]{fpc::cluster.stats()}} from package \CRANpkg{fpc}. 11 | "within.cluster.ss" is used subset output of the function call. 12 | } 13 | \section{Construction}{ 14 | 15 | This measures can be retrieved from the dictionary \link[mlr3:mlr_measures]{mlr3::mlr_measures}: 16 | 17 | \if{html}{\out{
}}\preformatted{mlr_measures$get("clust.wss") 18 | msr("clust.wss") 19 | }\if{html}{\out{
}} 20 | } 21 | 22 | \section{Meta Information}{ 23 | 24 | \itemize{ 25 | \item Range: \eqn{[0, \infty)}{[0, Inf)} 26 | \item Minimize: \code{TRUE} 27 | \item Required predict type: \code{partition} 28 | } 29 | } 30 | 31 | \seealso{ 32 | \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Measure]{Measures}: \link[mlr3:mlr_measures]{mlr3::mlr_measures} 33 | 34 | \code{as.data.table(mlr_measures)} for a complete table of all (also dynamically created) \link[mlr3:Measure]{mlr3::Measure} implementations. 35 | 36 | Other cluster measures: 37 | \code{\link{mlr_measures_clust.ch}}, 38 | \code{\link{mlr_measures_clust.dunn}}, 39 | \code{\link{mlr_measures_clust.silhouette}} 40 | } 41 | \concept{cluster measures} 42 | -------------------------------------------------------------------------------- /man/mlr_tasks_ruspini.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskClust_ruspini.R 3 | \name{mlr_tasks_ruspini} 4 | \alias{mlr_tasks_ruspini} 5 | \title{Ruspini Cluster Task} 6 | \format{ 7 | \link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskClust}. 8 | } 9 | \description{ 10 | A cluster task for the \link[cluster:ruspini]{cluster::ruspini} data set. 11 | } 12 | \section{Dictionary}{ 13 | 14 | This \link[mlr3:Task]{mlr3::Task} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::tsk()}}: 15 | 16 | \if{html}{\out{
}}\preformatted{mlr_tasks$get("ruspini") 17 | tsk("ruspini") 18 | }\if{html}{\out{
}} 19 | } 20 | 21 | \section{Meta Information}{ 22 | 23 | \itemize{ 24 | \item Task type: \dQuote{clust} 25 | \item Dimensions: 75x2 26 | \item Properties: - 27 | \item Has Missings: \code{FALSE} 28 | \item Target: - 29 | \item Features: \dQuote{x}, \dQuote{y} 30 | } 31 | } 32 | 33 | \references{ 34 | Ruspini EH (1970). 35 | \dQuote{Numerical methods for fuzzy clustering.} 36 | \emph{Information Sciences}, \bold{2}(3), 319-350. 37 | \doi{10.1016/S0020-0255(70)80056-1}. 38 | } 39 | \seealso{ 40 | \itemize{ 41 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: 42 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} 43 | \item Package \CRANpkg{mlr3data} for more toy tasks. 44 | \item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. 45 | \item Package \CRANpkg{mlr3viz} for some generic visualizations. 46 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Task]{Tasks}: \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} 47 | \item \code{as.data.table(mlr_tasks)} for a table of available \link[mlr3:Task]{Tasks} in the running session (depending on the loaded packages). 48 | \item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. 49 | \item Extension packages for additional task types: 50 | \itemize{ 51 | \item Unsupervised clustering: \CRANpkg{mlr3cluster} 52 | \item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. 53 | } 54 | } 55 | 56 | Other Task: 57 | \code{\link{TaskClust}}, 58 | \code{\link{mlr_tasks_usarrests}} 59 | } 60 | \concept{Task} 61 | -------------------------------------------------------------------------------- /man/mlr_tasks_usarrests.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TaskClust_usarrest.R 3 | \name{mlr_tasks_usarrests} 4 | \alias{mlr_tasks_usarrests} 5 | \title{US Arrests Cluster Task} 6 | \format{ 7 | \link[R6:R6Class]{R6::R6Class} inheriting from \link{TaskClust}. 8 | } 9 | \description{ 10 | A cluster task for the \link[datasets:USArrests]{datasets::USArrests} data set. 11 | Rownames are stored as variable \code{"states"} with column role \code{"name"}. 12 | } 13 | \section{Dictionary}{ 14 | 15 | This \link[mlr3:Task]{mlr3::Task} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} or with the associated sugar function \code{\link[mlr3:mlr_sugar]{mlr3::tsk()}}: 16 | 17 | \if{html}{\out{
}}\preformatted{mlr_tasks$get("usarrests") 18 | tsk("usarrests") 19 | }\if{html}{\out{
}} 20 | } 21 | 22 | \section{Meta Information}{ 23 | 24 | \itemize{ 25 | \item Task type: \dQuote{clust} 26 | \item Dimensions: 50x4 27 | \item Properties: - 28 | \item Has Missings: \code{FALSE} 29 | \item Target: - 30 | \item Features: \dQuote{Assault}, \dQuote{Murder}, \dQuote{Rape}, \dQuote{UrbanPop} 31 | } 32 | } 33 | 34 | \references{ 35 | Berry, Brian J (1979). 36 | \dQuote{Interactive Data Analysis: A Practical Primer.} 37 | \emph{Journal of the Royal Statistical Society: Series C (Applied Statistics)}, \bold{28}, 181. 38 | } 39 | \seealso{ 40 | \itemize{ 41 | \item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: 42 | \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html} 43 | \item Package \CRANpkg{mlr3data} for more toy tasks. 44 | \item Package \CRANpkg{mlr3oml} for downloading tasks from \url{https://www.openml.org}. 45 | \item Package \CRANpkg{mlr3viz} for some generic visualizations. 46 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[mlr3:Task]{Tasks}: \link[mlr3:mlr_tasks]{mlr3::mlr_tasks} 47 | \item \code{as.data.table(mlr_tasks)} for a table of available \link[mlr3:Task]{Tasks} in the running session (depending on the loaded packages). 48 | \item \CRANpkg{mlr3fselect} and \CRANpkg{mlr3filters} for feature selection and feature filtering. 49 | \item Extension packages for additional task types: 50 | \itemize{ 51 | \item Unsupervised clustering: \CRANpkg{mlr3cluster} 52 | \item Probabilistic supervised regression and survival analysis: \url{https://mlr3proba.mlr-org.com/}. 53 | } 54 | } 55 | 56 | Other Task: 57 | \code{\link{TaskClust}}, 58 | \code{\link{mlr_tasks_ruspini}} 59 | } 60 | \concept{Task} 61 | -------------------------------------------------------------------------------- /mlr3cluster.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://mlr3cluster.mlr-org.com 2 | 3 | template: 4 | bootstrap: 5 5 | light-switch: true 6 | math-rendering: mathjax 7 | package: mlr3pkgdowntemplate 8 | 9 | development: 10 | mode: auto 11 | version_label: default 12 | version_tooltip: "Version" 13 | 14 | toc: 15 | depth: 3 16 | 17 | navbar: 18 | structure: 19 | left: [reference, news, book] 20 | right: [search, github, mattermost, stackoverflow, rss, lightswitch] 21 | components: 22 | home: ~ 23 | reference: 24 | icon: fa fa-file-alt 25 | text: Reference 26 | href: reference/index.html 27 | mattermost: 28 | icon: fa fa-comments 29 | href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ 30 | book: 31 | text: mlr3book 32 | icon: fa fa-link 33 | href: https://mlr3book.mlr-org.com 34 | stackoverflow: 35 | icon: fab fa-stack-overflow 36 | href: https://stackoverflow.com/questions/tagged/mlr3 37 | rss: 38 | icon: fa-rss 39 | href: https://mlr-org.com/ 40 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3cluster/f3615be21c13af3d23989a219e43c4c1a4ab6ca5/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("testthat", quietly = TRUE)) { 2 | library("checkmate") 3 | library("testthat") 4 | library("mlr3") 5 | library("mlr3cluster") 6 | test_check("mlr3cluster") 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | library(checkmate) 2 | library(mlr3) 3 | library(mlr3cluster) 4 | library(testthat) 5 | -------------------------------------------------------------------------------- /tests/testthat/helper_expectations.R: -------------------------------------------------------------------------------- 1 | expect_prediction_clust = function(p) { 2 | expect_prediction(p) 3 | expect_r6(p, "PredictionClust", public = c("row_ids", "truth", "predict_types", "prob", "partition")) 4 | expect_numeric(p$truth, any.missing = TRUE, len = length(p$row_ids), null.ok = TRUE) 5 | expect_numeric(p$partition, any.missing = FALSE, len = length(p$row_ids), null.ok = TRUE) 6 | if ("prob" %chin% p$predict_types) { 7 | expect_matrix(p$prob, "numeric", any.missing = FALSE, nrows = length(p$row_ids)) 8 | } 9 | } 10 | 11 | expect_task_clust = function(task) expect_r6(task, "TaskClust") 12 | 13 | expect_prediction_complete = function(p, predict_type) { 14 | expect_false(anyMissing(p[[predict_type]])) 15 | } 16 | 17 | expect_prediction_exclusive = function(p, predict_type) { 18 | expect_atomic(p[[predict_type]]) 19 | expect_integer(p[[predict_type]]) 20 | } 21 | 22 | expect_prediction_fuzzy = function(p, predict_type) { 23 | expect_numeric(p$prob, lower = 0L, upper = 1L) 24 | expect_numeric(round(rowSums(p$prob), 2), lower = 1L, upper = 1L) 25 | 26 | partition = max.col(p$prob, ties.method = "first") 27 | partition = as.numeric(colnames(p$prob)[partition]) 28 | expect_true(unique(partition == p$partition)) 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat/helper_mlr3.R: -------------------------------------------------------------------------------- 1 | lapply(list.files(system.file("testthat", package = "mlr3"), pattern = "^helper.*\\.[rR]", full.names = TRUE), source) 2 | 3 | generate_tasks.LearnerClust = function(learner, N = 20L) { # nolint 4 | set.seed(1L) 5 | data = mlbench::mlbench.2dnormals(N, cl = 2L, r = 2, sd = 0.1) 6 | task = TaskClust$new("sanity", mlr3::as_data_backend(as.data.frame(data$x))) 7 | list(task) 8 | } 9 | registerS3method("generate_tasks", "LearnerClust", generate_tasks.LearnerClust, envir = parent.frame()) 10 | 11 | sanity_check.PredictionClust = function(prediction, task, ...) { # nolint 12 | prediction$score(measures = msr("clust.silhouette"), task = task) > -1L 13 | } 14 | registerS3method("sanity_check", "PredictionClust", sanity_check.PredictionClust, envir = parent.frame()) 15 | -------------------------------------------------------------------------------- /tests/testthat/test_LearnerClust.R: -------------------------------------------------------------------------------- 1 | test_that("predict on newdata works / clust", { 2 | task = tsk("usarrests")$filter(1:40) 3 | learner = lrn("clust.featureless", num_clusters = 1L) 4 | expect_error(learner$predict(task), "trained") 5 | learner$train(task) 6 | expect_task(learner$state$train_task) 7 | newdata = tsk("usarrests")$filter(41:50)$data() 8 | 9 | # passing the task 10 | p = learner$predict_newdata(newdata = newdata, task = task) 11 | expect_data_table(as.data.table(p), nrows = 10) 12 | expect_set_equal(as.data.table(p)$row_ids, 1:10) 13 | expect_null(p$truth) 14 | 15 | # rely on internally stored task representation 16 | p = learner$predict_newdata(newdata = newdata, task = NULL) 17 | expect_data_table(as.data.table(p), nrows = 10L) 18 | expect_set_equal(as.data.table(p)$row_ids, 1:10) 19 | expect_null(p$truth) 20 | }) 21 | 22 | test_that("reset()", { 23 | task = tsk("usarrests") 24 | learner = lrn("clust.featureless", num_clusters = 2L) 25 | 26 | learner$train(task) 27 | expect_list(learner$state, names = "unique") 28 | expect_learner(learner$reset()) 29 | expect_null(learner$state) 30 | }) 31 | 32 | test_that("empty predict set (#421)", { 33 | task = tsk("usarrests") 34 | learner = lrn("clust.featureless", num_clusters = 1L) 35 | resampling = rsmp("holdout", ratio = 1) 36 | hout = resampling$instantiate(task) 37 | model = learner$train(task, hout$train_set(1)) 38 | pred = learner$predict(task, hout$test_set(1)) 39 | expect_match(learner$log$msg, "No data to predict on", fixed = TRUE, all = FALSE) 40 | }) 41 | 42 | test_that("assignment saving works", { 43 | task = tsk("usarrests") 44 | learner = lrn("clust.featureless") 45 | 46 | expect_true(learner$save_assignments) 47 | learner$train(task) 48 | expect_vector(learner$assignments) 49 | expect_length(learner$assignments, task$nrow) 50 | 51 | learner$reset() 52 | learner$save_assignments = FALSE 53 | expect_false(learner$save_assignments) 54 | learner$train(task) 55 | expect_null(learner$assignments) 56 | }) 57 | -------------------------------------------------------------------------------- /tests/testthat/test_MeasureClust.R: -------------------------------------------------------------------------------- 1 | test_that("Cluster measures", { 2 | keys = mlr_measures$keys("clust") 3 | task = tsk("usarrests") 4 | learner = lrn("clust.kmeans", centers = 2L) 5 | p = learner$train(task)$predict(task) 6 | 7 | for (key in keys) { 8 | m = mlr_measures$get(key) 9 | if (m$task_type == "clust") { 10 | perf = m$score(prediction = p, task = task, learner = learner) 11 | expect_number(perf, na.ok = FALSE, lower = m$range[1], upper = m$range[2]) 12 | } 13 | } 14 | }) 15 | -------------------------------------------------------------------------------- /tests/testthat/test_PredictionClust.R: -------------------------------------------------------------------------------- 1 | test_that("Construction", { 2 | task = tsk("usarrests") 3 | p = PredictionClust$new(row_ids = task$row_ids, partition = rep.int(1L, nrow(task$data()))) 4 | expect_prediction(p) 5 | expect_prediction_clust(p) 6 | 7 | expect_prediction(c(p, p)) 8 | }) 9 | 10 | test_that("Internally constructed Prediction", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.featureless", num_clusters = 1L) 13 | p = learner$train(task)$predict(task) 14 | expect_prediction(p) 15 | expect_prediction_clust(p) 16 | }) 17 | 18 | test_that("filter works", { 19 | task = tsk("usarrests") 20 | learner = lrn("clust.featureless", num_clusters = 1L) 21 | p = learner$train(task)$predict(task) 22 | pdata = p$data 23 | 24 | pdata = filter_prediction_data(pdata, row_ids = 1:3) 25 | expect_set_equal(pdata$row_ids, 1:3) 26 | expect_integer(pdata$partition, len = 3) 27 | }) 28 | 29 | test_that("construction of empty PredictionDataClust", { 30 | task = tsk("usarrests") 31 | 32 | learner = lrn("clust.featureless", predict_type = "partition") 33 | learner$train(task) 34 | pred = learner$predict(task, row_ids = integer()) 35 | expect_prediction(pred) 36 | expect_set_equal(pred$predict_types, "partition") 37 | expect_integer(pred$row_ids, len = 0L) 38 | expect_numeric(pred$partition, len = 0L) 39 | expect_null(pred$prob) 40 | expect_data_table(as.data.table(pred), nrows = 0L, ncols = 2L) 41 | 42 | learner = lrn("clust.featureless", predict_type = "prob") 43 | learner$train(task) 44 | pred = learner$predict(task, row_ids = integer()) 45 | expect_prediction(pred) 46 | expect_set_equal(pred$predict_types, c("partition", "prob")) 47 | expect_integer(pred$row_ids, len = 0L) 48 | expect_numeric(pred$partition, len = 0L) 49 | expect_numeric(pred$prob, len = 0L) 50 | expect_data_table(as.data.table(pred), nrows = 0L, ncols = 3L) 51 | }) 52 | -------------------------------------------------------------------------------- /tests/testthat/test_TaskClust.R: -------------------------------------------------------------------------------- 1 | test_that("Basic ops on usarrests task", { 2 | task = tsk("usarrests") 3 | expect_task(task) 4 | expect_task_clust(task) 5 | expect_identical(task$target_names, character()) 6 | }) 7 | 8 | test_that("Basic ops on ruspini task", { 9 | task = tsk("ruspini") 10 | expect_task(task) 11 | expect_task_clust(task) 12 | expect_identical(task$target_names, character()) 13 | }) 14 | 15 | test_that("0 feature task", { 16 | b = as_data_backend(data.table(ids = 1:30)) 17 | task = TaskClust$new(id = "zero_feat_task", b) 18 | expect_output(print(task)) 19 | b = task$backend 20 | expect_backend(b) 21 | expect_task(task) 22 | expect_task_clust(task) 23 | expect_data_table(task$data(), ncols = 1L) 24 | 25 | learner = lrn("clust.featureless", num_clusters = 3L) 26 | p = learner$train(task)$predict(task) 27 | expect_prediction(p) 28 | }) 29 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_agnes.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("clue") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.agnes") 5 | expect_learner(learner) 6 | task = generate_tasks(learner) 7 | learner$train(task[[1]]) 8 | expect_class(learner$model, "agnes") 9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data") 10 | }) 11 | 12 | test_that("Learner properties are respected", { 13 | task = tsk("usarrests") 14 | learner = lrn("clust.agnes") 15 | expect_learner(learner, task) 16 | 17 | # test on multiple paramsets 18 | parset_list = list( 19 | list(k = 2L), 20 | list(k = 5L), 21 | list(k = 2L, metric = "manhattan", method = "single"), 22 | list(k = 2L, stand = TRUE) 23 | ) 24 | 25 | for (i in seq_along(parset_list)) { 26 | parset = parset_list[[i]] 27 | learner$param_set$values = parset 28 | 29 | p = suppressWarnings(learner$train(task)$predict(task)) 30 | expect_prediction_clust(p) 31 | 32 | if ("complete" %chin% learner$properties) { 33 | expect_prediction_complete(p, learner$predict_type) 34 | } 35 | if ("exclusive" %chin% learner$properties) { 36 | expect_prediction_exclusive(p, learner$predict_type) 37 | } 38 | } 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_ap.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("apcluster") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.ap", s = apcluster::negDistMat(r = 2L)) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.ap") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(s = apcluster::negDistMat(r = 2L)), 18 | list(s = apcluster::linSimMat, details = TRUE, q = 0.5), 19 | list(s = apcluster::expSimMat, lam = 0.5, nonoise = TRUE, includeSim = TRUE), 20 | list(s = apcluster::corSimMat, convits = 50L, maxits = 500L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = suppressWarnings(learner$train(task)$predict(task)) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_bico.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("stream") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.bico") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.bico") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(k = 5), 18 | list(k = 5, space = 5L), 19 | list(k = 5, space = 5L, p = 5L), 20 | list(k = 5, space = 5L, p = 5L, iterations = 5L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_birch.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("stream") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.birch", threshold = 0.1, branching = 8L, maxLeaf = 20L) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.birch", threshold = 0.1, branching = 8L, maxLeaf = 20L) 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(threshold = 0.1, branching = 8L, maxLeaf = 20L), 18 | list(threshold = 0.2, branching = 4L, maxLeaf = 10, maxMem = 2L), 19 | list(threshold = 0.3, branching = 12L, maxLeaf = 5L, outlierThreshold = 0.3) 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = learner$train(task)$predict(task) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | } 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_cmeans.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("e1071") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.cmeans") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.cmeans") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L)) 17 | colnames(centers) = colnames(task$data()) 18 | centers$Assault = c(100, 200, 150, 300) 19 | centers$Murder = c(11, 3, 10, 5) 20 | centers$Rape = c(20, 18, 10, 26) 21 | centers$UrbanPop = c(60, 54, 53, 69) 22 | 23 | parset_list = list( 24 | list(centers = 2L), 25 | list(centers = centers), 26 | list(centers = 2L, dist = "manhattan", m = 3) 27 | ) 28 | 29 | for (i in seq_along(parset_list)) { 30 | parset = parset_list[[i]] 31 | learner$param_set$values = parset 32 | 33 | p = learner$train(task)$predict(task) 34 | expect_prediction_clust(p) 35 | 36 | if ("complete" %chin% learner$properties) { 37 | expect_prediction_complete(p, learner$predict_type) 38 | } 39 | if ("exclusive" %chin% learner$properties) { 40 | expect_prediction_exclusive(p, learner$predict_type) 41 | } 42 | if ("fuzzy" %chin% learner$properties) { 43 | expect_prediction_fuzzy(p) 44 | } 45 | } 46 | }) 47 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_cobweb.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("RWeka") 2 | skip_on_cran() 3 | 4 | test_that("autotest", { 5 | learner = lrn("clust.cobweb") 6 | expect_learner(learner) 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.cobweb") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | parset_list = list( 18 | list(A = 0.5), 19 | list(C = 1L), 20 | list(S = 100L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_dbscan.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("dbscan") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.dbscan", eps = 25) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.dbscan", eps = 25) 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(eps = 25), 18 | list(eps = 25, minPts = 10L), 19 | list(eps = 25, search = "linear") 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = learner$train(task)$predict(task) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | } 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_dbscan_fpc.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("fpc") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.dbscan_fpc", eps = 25) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.dbscan_fpc", eps = 25) 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(eps = 25), 18 | list(eps = 25, MinPts = 10), 19 | list(eps = 25, method = "hybrid") 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = learner$train(task)$predict(task) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | } 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_diana.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("clue") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.diana") 5 | expect_learner(learner) 6 | task = generate_tasks(learner) 7 | learner$train(task[[1]]) 8 | expect_class(learner$model, "diana") 9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data") 10 | }) 11 | 12 | test_that("Learner properties are respected", { 13 | task = tsk("usarrests") 14 | learner = lrn("clust.diana") 15 | expect_learner(learner, task) 16 | 17 | # test on multiple paramsets 18 | parset_list = list( 19 | list(k = 2L), 20 | list(k = 5L), 21 | list(k = 2L, metric = "manhattan"), 22 | list(k = 2L, stand = TRUE) 23 | ) 24 | 25 | for (i in seq_along(parset_list)) { 26 | parset = parset_list[[i]] 27 | learner$param_set$values = parset 28 | 29 | p = suppressWarnings(learner$train(task)$predict(task)) 30 | expect_prediction_clust(p) 31 | 32 | if ("complete" %chin% learner$properties) { 33 | expect_prediction_complete(p, learner$predict_type) 34 | } 35 | if ("exclusive" %chin% learner$properties) { 36 | expect_prediction_exclusive(p, learner$predict_type) 37 | } 38 | } 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_em.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("RWeka") 2 | skip_on_cran() 3 | 4 | test_that("autotest", { 5 | learner = lrn("clust.em") 6 | expect_learner(learner) 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.em") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | parset_list = list( 18 | list(I = 200L, num_slots = 5L), 19 | list(output_debug_info = TRUE, K = 5L), 20 | list(M = 1e-3, ll_iter = 1L, ll_cv = 1L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_fanny.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("clue") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.fanny") 5 | expect_learner(learner) 6 | task = generate_tasks(learner) 7 | learner$train(task[[1]]) 8 | expect_class(learner$model, "fanny") 9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data") 10 | }) 11 | 12 | test_that("Learner properties are respected", { 13 | task = tsk("usarrests") 14 | learner = lrn("clust.fanny") 15 | expect_learner(learner, task) 16 | 17 | # test on multiple paramsets 18 | parset_list = list( 19 | list(k = 2L), 20 | list(k = 5L), 21 | list(k = 2L, metric = "SqEuclidean") 22 | ) 23 | 24 | for (i in seq_along(parset_list)) { 25 | parset = parset_list[[i]] 26 | learner$param_set$values = parset 27 | 28 | p = suppressWarnings(learner$train(task)$predict(task)) 29 | expect_prediction_clust(p) 30 | 31 | if ("complete" %chin% learner$properties) { 32 | expect_prediction_complete(p, learner$predict_type) 33 | } 34 | if ("exclusive" %chin% learner$properties) { 35 | expect_prediction_exclusive(p, learner$predict_type) 36 | } 37 | if ("fuzzy" %chin% learner$properties) { 38 | expect_prediction_fuzzy(p) 39 | } 40 | } 41 | }) 42 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_featureless.R: -------------------------------------------------------------------------------- 1 | test_that("autotest", { 2 | learner = lrn("clust.featureless") 3 | expect_learner(learner) 4 | result = run_autotest(learner) 5 | expect_true(result, info = result$error) 6 | }) 7 | 8 | test_that("Learner properties are respected", { 9 | task = tsk("usarrests") 10 | learner = lrn("clust.featureless") 11 | expect_learner(learner, task) 12 | 13 | # test on multiple paramsets 14 | parset_list = list( 15 | list(num_clusters = 1L), 16 | list(num_clusters = 2L), 17 | list(num_clusters = 3L) 18 | ) 19 | 20 | for (i in seq_along(parset_list)) { 21 | parset = parset_list[[i]] 22 | learner$param_set$values = parset 23 | 24 | p = learner$train(task)$predict(task) 25 | expect_prediction_clust(p) 26 | 27 | if ("complete" %chin% learner$properties) { 28 | expect_prediction_complete(p, learner$predict_type) 29 | } 30 | if ("exclusive" %chin% learner$properties) { 31 | expect_prediction_exclusive(p, learner$predict_type) 32 | } 33 | } 34 | }) 35 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_ff.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("RWeka") 2 | skip_on_cran() 3 | 4 | test_that("autotest", { 5 | learner = lrn("clust.ff") 6 | expect_learner(learner) 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.ff") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | parset_list = list( 18 | list(N = 1L), 19 | list(N = 3L, S = 3L), 20 | list(S = 100L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_hclust.R: -------------------------------------------------------------------------------- 1 | test_that("autotest", { 2 | learner = lrn("clust.hclust") 3 | expect_learner(learner) 4 | task = generate_tasks(learner) 5 | learner$train(task[[1]]) 6 | expect_class(learner$model, "hclust") 7 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data") 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.hclust") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(k = 3L), 18 | list(k = 5L), 19 | list(k = 3L, method = "centroid") 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = suppressWarnings(learner$train(task)$predict(task)) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | if ("fuzzy" %chin% learner$properties) { 36 | expect_prediction_fuzzy(p) 37 | } 38 | } 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_hdbscan.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("dbscan") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.hdbscan", minPts = 5L) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.hdbscan", minPts = 5L) 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(minPts = 5L), 18 | list(minPts = 5L, gen_hdbscan_tree = TRUE), 19 | list(minPts = 5L, gen_simplified_tree = TRUE), 20 | list(minPts = 5L, gen_hdbscan_tree = TRUE, gen_simplified_tree = TRUE) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_kkmeans.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("kernlab") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.kkmeans") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.kkmeans") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L)) 17 | colnames(centers) = colnames(task$data()) 18 | centers$Assault = c(100, 200, 150, 300) 19 | centers$Murder = c(11, 3, 10, 5) 20 | centers$Rape = c(20, 18, 10, 26) 21 | centers$UrbanPop = c(60, 54, 53, 69) 22 | 23 | parset_list = list( 24 | list(centers = 2L, kernel = "polydot", degree = 2L), 25 | list(centers = centers, kernel = "laplacedot", sigma = 2L), 26 | list(centers = 3L, kernel = "anovadot") 27 | ) 28 | 29 | for (i in seq_along(parset_list)) { 30 | parset = parset_list[[i]] 31 | learner$param_set$values = parset 32 | 33 | p = learner$train(task)$predict(task) 34 | expect_prediction_clust(p) 35 | 36 | if ("complete" %chin% learner$properties) { 37 | expect_prediction_complete(p, learner$predict_type) 38 | } 39 | if ("exclusive" %chin% learner$properties) { 40 | expect_prediction_exclusive(p, learner$predict_type) 41 | } 42 | 43 | learner$reset() 44 | } 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_kmeans.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("clue") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.kmeans") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.kmeans") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L)) 17 | colnames(centers) = colnames(task$data()) 18 | centers$Assault = c(100, 200, 150, 300) 19 | centers$Murder = c(11, 3, 10, 5) 20 | centers$Rape = c(20, 18, 10, 26) 21 | centers$UrbanPop = c(60, 54, 53, 69) 22 | 23 | parset_list = list( 24 | list(centers = 2L), 25 | list(centers = centers), 26 | list(centers = 2L, algorithm = "MacQueen") 27 | ) 28 | 29 | for (i in seq_along(parset_list)) { 30 | parset = parset_list[[i]] 31 | learner$param_set$values = parset 32 | 33 | p = learner$train(task)$predict(task) 34 | expect_prediction_clust(p) 35 | 36 | if ("complete" %chin% learner$properties) { 37 | expect_prediction_complete(p, learner$predict_type) 38 | } 39 | if ("exclusive" %chin% learner$properties) { 40 | expect_prediction_exclusive(p, learner$predict_type) 41 | } 42 | 43 | learner$reset() 44 | } 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_mbatchkmeans.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("ClusterR") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.MBatchKMeans") 5 | expect_learner(learner) 6 | task = generate_tasks(learner) 7 | suppressWarnings(learner$train(task[[1]])) 8 | expect_class(learner$model, "MBatchKMeans") 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.MBatchKMeans") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | centers = data.frame(matrix(ncol = length(colnames(task$data())), nrow = 4L)) 18 | colnames(centers) = colnames(task$data()) 19 | centers$Assault = c(100, 200, 150, 300) 20 | centers$Murder = c(11, 3, 10, 5) 21 | centers$Rape = c(20, 18, 10, 26) 22 | centers$UrbanPop = c(60, 54, 53, 69) 23 | colnames(centers) = NULL 24 | centers = as.matrix(centers) 25 | 26 | parset_list = list( 27 | list(clusters = 2L), 28 | list(clusters = 4L, CENTROIDS = centers, initializer = "random"), 29 | list(clusters = 2L, early_stop_iter = 20L, batch_size = 15L, tol = 1e-03) 30 | ) 31 | 32 | for (type in c("partition", "prob")) { 33 | learner$predict_type = type 34 | for (i in seq_along(parset_list)) { 35 | parset = parset_list[[i]] 36 | learner$param_set$values = parset 37 | 38 | p = learner$train(task)$predict(task) 39 | expect_prediction_clust(p) 40 | 41 | if ("complete" %chin% learner$properties) { 42 | expect_prediction_complete(p, learner$predict_type) 43 | } 44 | if ("exclusive" %chin% learner$properties) { 45 | expect_prediction_exclusive(p, "partition") 46 | } 47 | if (learner$predict_type == "prob") { 48 | expect_prediction_fuzzy(p) 49 | } 50 | 51 | learner$reset() 52 | } 53 | } 54 | }) 55 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_mclust.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("mclust") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.mclust") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.mclust") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(G = 1:4, modelNames = "EII"), 18 | list(initialization = list(noise = 1)), 19 | list(G = 3) 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = suppressWarnings(learner$train(task)$predict(task)) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | if ("fuzzy" %chin% learner$properties) { 36 | expect_prediction_fuzzy(p) 37 | } 38 | } 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_meanshift.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("LPCM") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.meanshift") 5 | expect_learner(learner) 6 | task = generate_tasks(learner) 7 | learner$train(task[[1]]) 8 | expect_class(learner$model, "ms") 9 | expect_warning(learner$predict(task[[1]]), "doesn't predict on new data") 10 | }) 11 | 12 | test_that("Learner properties are respected", { 13 | task = tsk("usarrests") 14 | learner = lrn("clust.meanshift") 15 | expect_learner(learner, task) 16 | 17 | # test on multiple paramsets 18 | parset_list = list( 19 | list(h = 2L), 20 | list(subset = 1:3, scaled = 2L), 21 | list(thr = 0.1, iter = 100L) 22 | ) 23 | 24 | for (i in seq_along(parset_list)) { 25 | parset = parset_list[[i]] 26 | learner$param_set$values = parset 27 | 28 | p = suppressWarnings(learner$train(task)$predict(task)) 29 | expect_prediction_clust(p) 30 | 31 | if ("complete" %chin% learner$properties) { 32 | expect_prediction_complete(p, learner$predict_type) 33 | } 34 | if ("exclusive" %chin% learner$properties) { 35 | expect_prediction_exclusive(p, learner$predict_type) 36 | } 37 | } 38 | }) 39 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_optics.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("dbscan") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.optics", eps = 25, eps_cl = 20) 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.optics") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(eps_cl = 25), 18 | list(eps = 25, eps_cl = 20), 19 | list(eps_cl = 25, search = "linear") 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = learner$train(task)$predict(task) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | } 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_pam.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("clue") 2 | 3 | test_that("autotest", { 4 | learner = lrn("clust.pam") 5 | expect_learner(learner) 6 | result = run_autotest(learner) 7 | expect_true(result, info = result$error) 8 | }) 9 | 10 | test_that("Learner properties are respected", { 11 | task = tsk("usarrests") 12 | learner = lrn("clust.pam") 13 | expect_learner(learner, task) 14 | 15 | # test on multiple paramsets 16 | parset_list = list( 17 | list(k = 2L), 18 | list(k = 5L), 19 | list(k = 2L, metric = "manhattan") 20 | ) 21 | 22 | for (i in seq_along(parset_list)) { 23 | parset = parset_list[[i]] 24 | learner$param_set$values = parset 25 | 26 | p = learner$train(task)$predict(task) 27 | expect_prediction_clust(p) 28 | 29 | if ("complete" %chin% learner$properties) { 30 | expect_prediction_complete(p, learner$predict_type) 31 | } 32 | if ("exclusive" %chin% learner$properties) { 33 | expect_prediction_exclusive(p, learner$predict_type) 34 | } 35 | } 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_simplekmeans.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("RWeka") 2 | skip_on_cran() 3 | 4 | test_that("autotest", { 5 | learner = lrn("clust.SimpleKMeans") 6 | expect_learner(learner) 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.SimpleKMeans") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | parset_list = list( 18 | list(N = 3, init = 2L, periodic_pruning = 1L), 19 | list(V = TRUE, M = TRUE, O = TRUE), 20 | list(num_slots = 2L, init = 2L, min_density = 1L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_learners_clust_xmeans.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | skip_if_not_installed("RWeka") 3 | 4 | test_that("autotest", { 5 | learner = lrn("clust.xmeans") 6 | expect_learner(learner) 7 | result = run_autotest(learner) 8 | expect_true(result, info = result$error) 9 | }) 10 | 11 | test_that("Learner properties are respected", { 12 | task = tsk("usarrests") 13 | learner = lrn("clust.xmeans") 14 | expect_learner(learner, task) 15 | 16 | # test on multiple paramsets 17 | parset_list = list( 18 | list(use_kdtree = TRUE), 19 | list(L = 2L), 20 | list(I = 5L) 21 | ) 22 | 23 | for (i in seq_along(parset_list)) { 24 | parset = parset_list[[i]] 25 | learner$param_set$values = parset 26 | 27 | p = learner$train(task)$predict(task) 28 | expect_prediction_clust(p) 29 | 30 | if ("complete" %chin% learner$properties) { 31 | expect_prediction_complete(p, learner$predict_type) 32 | } 33 | if ("exclusive" %chin% learner$properties) { 34 | expect_prediction_exclusive(p, learner$predict_type) 35 | } 36 | } 37 | }) 38 | --------------------------------------------------------------------------------