├── .Rbuildignore ├── .editorconfig ├── .github └── workflows │ ├── dev-cmd-check.yml │ ├── pkgdown.yml │ └── r-cmd-check.yml ├── .gitignore ├── .ignore ├── .lintr ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── Filter.R ├── FilterAUC.R ├── FilterAnova.R ├── FilterBoruta.R ├── FilterCMIM.R ├── FilterCarScore.R ├── FilterCarSurvScore.R ├── FilterCorrelation.R ├── FilterDISR.R ├── FilterFindCorrelation.R ├── FilterImportance.R ├── FilterInformationGain.R ├── FilterJMI.R ├── FilterJMIM.R ├── FilterKruskalTest.R ├── FilterLearner.R ├── FilterMIM.R ├── FilterMRMR.R ├── FilterNJMIM.R ├── FilterPerformance.R ├── FilterPermutation.R ├── FilterRelief.R ├── FilterSelectedFeatures.R ├── FilterUnivariateCox.R ├── FilterVariance.R ├── bibentries.R ├── flt.R ├── helper.R ├── mlr_filters.R ├── reexports.R └── zzz.R ├── README.Rmd ├── README.md ├── man-roxygen ├── details_praznik.R └── seealso_filter.R ├── man ├── Filter.Rd ├── figures │ ├── logo.png │ └── logo_navbar.png ├── flt.Rd ├── mlr3filters-package.Rd ├── mlr_filters.Rd ├── mlr_filters_anova.Rd ├── mlr_filters_auc.Rd ├── mlr_filters_boruta.Rd ├── mlr_filters_carscore.Rd ├── mlr_filters_carsurvscore.Rd ├── mlr_filters_cmim.Rd ├── mlr_filters_correlation.Rd ├── mlr_filters_disr.Rd ├── mlr_filters_find_correlation.Rd ├── mlr_filters_importance.Rd ├── mlr_filters_information_gain.Rd ├── mlr_filters_jmi.Rd ├── mlr_filters_jmim.Rd ├── mlr_filters_kruskal_test.Rd ├── mlr_filters_mim.Rd ├── mlr_filters_mrmr.Rd ├── mlr_filters_njmim.Rd ├── mlr_filters_performance.Rd ├── mlr_filters_permutation.Rd ├── mlr_filters_relief.Rd ├── mlr_filters_selected_features.Rd ├── mlr_filters_univariate_cox.Rd ├── mlr_filters_variance.Rd └── reexports.Rd ├── mlr3filters.Rproj ├── pkgdown ├── _pkgdown.yml └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico └── tests ├── testthat.R └── testthat ├── helper.R ├── setup.R ├── teardown.R ├── test_FilterCorrelation.R ├── test_FilterFindCorrelation.R ├── test_FilterImportance.R ├── test_FilterInformationGain.R ├── test_FilterKruskalTest.R ├── test_FilterPerformance.R ├── test_FilterPermutation.R ├── test_FilterRelief.R ├── test_FilterSelectedFeatures.R ├── test_FilterUnivariateCox.R ├── test_filter.R ├── test_filter_boruta.R ├── test_filter_classif.R ├── test_filter_generic.R ├── test_filter_regr.R ├── test_filter_surv.R ├── test_mlr3spatiotempcv.R ├── test_mlr_filters.R └── test_partial_scoring.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^README\.Rmd$ 2 | ^LICENSE$ 3 | ^\.github$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | ^\.editorconfig$ 7 | ^\.ignore$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^man-roxygen$ 11 | ^cran-comments.md 12 | ^\.ccache$ 13 | ^codemeta\.json$ 14 | ^revdep$ 15 | ^\.vscode$ 16 | ^\.lintr$ 17 | ^cran-comments\.md$ 18 | ^CRAN-SUBMISSION$ 19 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # See http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | charset = utf-8 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | trim_trailing_whitespace = true 10 | 11 | [*.{r,R,md,Rmd}] 12 | indent_size = 2 13 | 14 | [*.{c,h}] 15 | indent_size = 4 16 | 17 | [*.{cpp,hpp}] 18 | indent_size = 4 19 | 20 | [{NEWS.md,DESCRIPTION,LICENSE}] 21 | max_line_length = 80 22 | -------------------------------------------------------------------------------- /.github/workflows/dev-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # dev cmd check workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | name: dev-check 13 | 14 | jobs: 15 | check-package: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.dev-package }} 19 | 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/paradox', 'mlr-org/mlr3learners', 'mlr-org/mlr3pipelines"} 28 | 29 | steps: 30 | - uses: actions/checkout@v3 31 | 32 | - uses: r-lib/actions/setup-r@v2 33 | with: 34 | r-version: ${{ matrix.config.r }} 35 | 36 | - uses: r-lib/actions/setup-r-dependencies@v2 37 | with: 38 | extra-packages: any::rcmdcheck 39 | needs: check 40 | 41 | - name: Install dev versions 42 | run: pak::pkg_install(c('${{ matrix.config.dev-package }}')) 43 | shell: Rscript {0} 44 | 45 | - uses: r-lib/actions/check-r-package@v2 46 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yml: -------------------------------------------------------------------------------- 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | release: 11 | types: 12 | - published 13 | workflow_dispatch: 14 | 15 | name: pkgdown 16 | 17 | jobs: 18 | pkgdown: 19 | runs-on: ubuntu-latest 20 | 21 | concurrency: 22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 23 | env: 24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 25 | steps: 26 | - uses: actions/checkout@v3 27 | 28 | - uses: r-lib/actions/setup-pandoc@v2 29 | 30 | - uses: r-lib/actions/setup-r@v2 31 | 32 | - uses: r-lib/actions/setup-r-dependencies@v2 33 | with: 34 | extra-packages: any::pkgdown, local::. 35 | needs: website 36 | 37 | - name: Install template 38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate") 39 | shell: Rscript {0} 40 | 41 | - name: Build site 42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 43 | shell: Rscript {0} 44 | 45 | - name: Deploy 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4.4.1 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /.github/workflows/r-cmd-check.yml: -------------------------------------------------------------------------------- 1 | # r cmd check workflow of the mlr3 ecosystem v0.1.0 2 | # https://github.com/mlr-org/actions 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | name: r-cmd-check 13 | 14 | jobs: 15 | r-cmd-check: 16 | runs-on: ${{ matrix.config.os }} 17 | 18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 19 | 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: ubuntu-latest, r: 'devel'} 28 | - {os: ubuntu-latest, r: 'release'} 29 | 30 | steps: 31 | - uses: actions/checkout@v3 32 | 33 | - uses: r-lib/actions/setup-r@v2 34 | with: 35 | r-version: ${{ matrix.config.r }} 36 | 37 | - uses: r-lib/actions/setup-r-dependencies@v2 38 | with: 39 | extra-packages: any::rcmdcheck 40 | needs: check 41 | 42 | - uses: r-lib/actions/check-r-package@v2 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig 2 | # Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,r,macos,linux 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### macOS ### 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | 30 | # Thumbnails 31 | ._* 32 | 33 | # Files that might appear in the root of a volume 34 | .DocumentRevisions-V100 35 | .fseventsd 36 | .Spotlight-V100 37 | .TemporaryItems 38 | .Trashes 39 | .VolumeIcon.icns 40 | .com.apple.timemachine.donotpresent 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | ### macOS Patch ### 50 | # iCloud generated files 51 | *.icloud 52 | 53 | ### R ### 54 | # History files 55 | .Rhistory 56 | .Rapp.history 57 | 58 | # Session Data files 59 | .RData 60 | .RDataTmp 61 | 62 | # User-specific files 63 | .Ruserdata 64 | 65 | # Example code in package build process 66 | *-Ex.R 67 | 68 | # Output files from R CMD build 69 | /*.tar.gz 70 | 71 | # Output files from R CMD check 72 | /*.Rcheck/ 73 | 74 | # RStudio files 75 | .Rproj.user/ 76 | 77 | # produced vignettes 78 | vignettes/*.html 79 | vignettes/*.pdf 80 | 81 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 82 | .httr-oauth 83 | 84 | # knitr and R markdown default cache directories 85 | *_cache/ 86 | /cache/ 87 | 88 | # Temporary files created by R markdown 89 | *.utf8.md 90 | *.knit.md 91 | 92 | # R Environment Variables 93 | .Renviron 94 | 95 | # pkgdown site 96 | docs/ 97 | 98 | # translation temp files 99 | po/*~ 100 | 101 | # RStudio Connect folder 102 | rsconnect/ 103 | 104 | ### R.Bookdown Stack ### 105 | # R package: bookdown caching files 106 | /*_files/ 107 | 108 | ### VisualStudioCode ### 109 | .vscode/* 110 | !.vscode/settings.json 111 | !.vscode/tasks.json 112 | !.vscode/launch.json 113 | !.vscode/extensions.json 114 | !.vscode/*.code-snippets 115 | 116 | # Local History for Visual Studio Code 117 | .history/ 118 | 119 | # Built Visual Studio Code Extensions 120 | *.vsix 121 | 122 | ### VisualStudioCode Patch ### 123 | # Ignore all local history of files 124 | .history 125 | .ionide 126 | 127 | ### Windows ### 128 | # Windows thumbnail cache files 129 | Thumbs.db 130 | Thumbs.db:encryptable 131 | ehthumbs.db 132 | ehthumbs_vista.db 133 | 134 | # Dump file 135 | *.stackdump 136 | 137 | # Folder config file 138 | [Dd]esktop.ini 139 | 140 | # Recycle Bin used on file shares 141 | $RECYCLE.BIN/ 142 | 143 | # Windows Installer files 144 | *.cab 145 | *.msi 146 | *.msix 147 | *.msm 148 | *.msp 149 | 150 | # Windows shortcuts 151 | *.lnk 152 | 153 | # End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux 154 | 155 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option) 156 | 157 | # R 158 | .Rprofile 159 | README.html 160 | src/*.o 161 | src/*.so 162 | src/*.dll 163 | 164 | # CRAN 165 | cran-comments.md 166 | CRAN-RELEASE 167 | CRAN-SUBMISSION 168 | 169 | # pkgdown 170 | docs/ 171 | 172 | # renv 173 | renv/ 174 | renv.lock 175 | 176 | # vscode 177 | .vscode 178 | 179 | # revdep 180 | revdep/ 181 | 182 | # misc 183 | Meta/ 184 | attic/ 185 | inst/docd 186 | -------------------------------------------------------------------------------- /.ignore: -------------------------------------------------------------------------------- 1 | man/ 2 | docs/ 3 | attic/ 4 | pkgdown/ 5 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | # lintr defaults: https://github.com/jimhester/lintr#available-linters 3 | # the following setup changes/removes certain linters 4 | assignment_linter = NULL, # do not force using <- for assignments 5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names 6 | cyclocomp_linter = NULL, # do not check function complexity 7 | commented_code_linter = NULL, # allow code in comments 8 | line_length_linter = line_length_linter(120) 9 | ) 10 | 11 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mlr3filters 2 | Title: Filter Based Feature Selection for 'mlr3' 3 | Version: 0.8.1.9000 4 | Authors@R: c( 5 | person("Marc", "Becker", , "marcbecker@posteo.de", role = c("cre", "aut"), 6 | comment = c(ORCID = "0000-0002-8115-0400")), 7 | person("Patrick", "Schratz", , "patrick.schratz@gmail.com", role = "aut", 8 | comment = c(ORCID = "0000-0003-0748-6624")), 9 | person("Michel", "Lang", , "michellang@gmail.com", role = "aut", 10 | comment = c(ORCID = "0000-0001-9754-0393")), 11 | person("Bernd", "Bischl", , "bernd_bischl@gmx.net", role = "aut", 12 | comment = c(ORCID = "0000-0001-6002-6980")), 13 | person("Martin", "Binder", , "mlr.developer@mb706.com", role = "aut"), 14 | person("John", "Zobolas", , "bblodfon@gmail.com", role = "aut", 15 | comment = c(ORCID = "0000-0002-3609-8674")) 16 | ) 17 | Description: Extends 'mlr3' with filter methods for feature selection. 18 | Besides standalone filter methods built-in methods of any 19 | machine-learning algorithm are supported. Partial scoring of 20 | multivariate filter methods is supported. 21 | License: LGPL-3 22 | URL: https://mlr3filters.mlr-org.com, 23 | https://github.com/mlr-org/mlr3filters 24 | BugReports: https://github.com/mlr-org/mlr3filters/issues 25 | Depends: 26 | R (>= 3.1.0) 27 | Imports: 28 | backports, 29 | checkmate, 30 | data.table, 31 | mlr3 (>= 0.12.0), 32 | mlr3misc, 33 | paradox, 34 | R6 35 | Suggests: 36 | Boruta, 37 | care, 38 | caret, 39 | carSurv, 40 | FSelectorRcpp, 41 | knitr, 42 | lgr, 43 | mlr3learners, 44 | mlr3measures, 45 | mlr3pipelines, 46 | praznik, 47 | rpart, 48 | survival, 49 | testthat (>= 3.0.0), 50 | withr 51 | Config/testthat/edition: 3 52 | Encoding: UTF-8 53 | NeedsCompilation: no 54 | Roxygen: list(markdown = TRUE, r6 = TRUE) 55 | RoxygenNote: 7.3.2 56 | Collate: 57 | 'Filter.R' 58 | 'mlr_filters.R' 59 | 'FilterAUC.R' 60 | 'FilterAnova.R' 61 | 'FilterBoruta.R' 62 | 'FilterCMIM.R' 63 | 'FilterCarScore.R' 64 | 'FilterCarSurvScore.R' 65 | 'FilterCorrelation.R' 66 | 'FilterDISR.R' 67 | 'FilterFindCorrelation.R' 68 | 'FilterLearner.R' 69 | 'FilterImportance.R' 70 | 'FilterInformationGain.R' 71 | 'FilterJMI.R' 72 | 'FilterJMIM.R' 73 | 'FilterKruskalTest.R' 74 | 'FilterMIM.R' 75 | 'FilterMRMR.R' 76 | 'FilterNJMIM.R' 77 | 'FilterPerformance.R' 78 | 'FilterPermutation.R' 79 | 'FilterRelief.R' 80 | 'FilterSelectedFeatures.R' 81 | 'FilterUnivariateCox.R' 82 | 'FilterVariance.R' 83 | 'bibentries.R' 84 | 'flt.R' 85 | 'helper.R' 86 | 'reexports.R' 87 | 'zzz.R' 88 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.data.table,DictionaryFilter) 4 | S3method(as.data.table,Filter) 5 | export(Filter) 6 | export(FilterAUC) 7 | export(FilterAnova) 8 | export(FilterBoruta) 9 | export(FilterCMIM) 10 | export(FilterCarScore) 11 | export(FilterCarSurvScore) 12 | export(FilterCorrelation) 13 | export(FilterDISR) 14 | export(FilterFindCorrelation) 15 | export(FilterImportance) 16 | export(FilterInformationGain) 17 | export(FilterJMI) 18 | export(FilterJMIM) 19 | export(FilterKruskalTest) 20 | export(FilterMIM) 21 | export(FilterMRMR) 22 | export(FilterNJMIM) 23 | export(FilterPerformance) 24 | export(FilterPermutation) 25 | export(FilterRelief) 26 | export(FilterSelectedFeatures) 27 | export(FilterUnivariateCox) 28 | export(FilterVariance) 29 | export(as.data.table) 30 | export(flt) 31 | export(flts) 32 | export(mlr_filters) 33 | import(checkmate) 34 | import(data.table) 35 | import(mlr3) 36 | import(mlr3misc) 37 | import(paradox) 38 | importFrom(R6,R6Class) 39 | importFrom(data.table,as.data.table) 40 | importFrom(stats,aov) 41 | importFrom(stats,kruskal.test) 42 | importFrom(stats,runif) 43 | importFrom(stats,var) 44 | importFrom(utils,bibentry) 45 | importFrom(utils,head) 46 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # mlr3filters (development version) 2 | 3 | # mlr3filters 0.8.1 4 | 5 | * compatibility: mlr3 0.22.0 6 | 7 | # mlr3filters 0.8.0 8 | 9 | * Added `FilterBoruta` 10 | * Fixed issue with `FilterPerformance` where the arg `measure` wasn't passed on 11 | * Added `FilterUnivariateCox` (thanks to @bblodfon) 12 | * Parameter value `na.rm` is properly initialized to `TRUE` (thanks to @bblodfon) 13 | * Bugfix: property `missings` is now set correctly for `FilterFindCorrelation` 14 | * Bugfix: `$hash` now works for `Filter`s 15 | 16 | # mlr3filters 0.7.1 17 | 18 | * Tagged multiple filters to be able of gracefully handling missing values. 19 | * Added more supported feature types to FilterCarScore. 20 | * Improved documentation. 21 | 22 | # mlr3filters 0.7.0 23 | 24 | * Features are now checked for missing values to improve error messages (#140) 25 | * Removed deprecated functions 26 | * Use featureless learner in defaults (#124) 27 | * Field `task_type` of class `Filter` has been renamed to `task_types`. 28 | 29 | # mlr3filters 0.6.0 30 | 31 | * Add `FilterCarSurvScore` (#120, @mllg) 32 | * Use featureless learner instead of rpart as default learner for `FilterImportance` and `FilterPerformance` (#124) 33 | * Add documentation for PipeOpFilter 34 | * Add mlr3pipelines examples to help pages (#135, @sebffischer) 35 | * Add `label` arg to `Filter` class (#121, @mllg) 36 | 37 | # mlr3filters 0.5.0 38 | 39 | * Add references to benchmark paper and praznik paper (#104) 40 | * New filter `FilterSelectedFeatures` which makes use of embedded feature selection methods of learners. 41 | See the help page for more details (#102) 42 | * Allow `NA` as task type. 43 | This makes it possible to use other tasks than `"regr"` or `"classif"` for certain filters, e.g. `FilterVariance` (#106) 44 | 45 | 46 | # mlr3filters 0.4.2 47 | 48 | * Fixes an issue where argument `nfeat` was not passed down to {praznik} filters (#97) 49 | 50 | 51 | # mlr3filters 0.4.1 52 | 53 | * Disable threading in praznik filters by default (5f24742e9b92f6a5f828c4f755be3fb53427afdb, @mllg) 54 | Enable by setting hyperparameter `threads` >= 2 or to `0` for auto-detection of available cores (#93, @mllg) 55 | * Document return type of private `.calculate()` (#92, @mllg) 56 | * Allow `NA` in returned vectors. 57 | Features with missing values as well as features with no calculated score are automatically ranked last, in a random order. (#92, @mllg) 58 | * praznik filters now also support `regr` Tasks (#90, @bommert) 59 | 60 | 61 | # mlr3filters 0.4.0 62 | 63 | * Add ReliefF filter (#86) 64 | * Fix praznik scores calculation: praznik filters are not monotone in the selected features due to their iterative fashion. E.g., the first selected feature can have a score of 5, the second selected feature a score of 10. This version replaces the praznik scores by a simple sequence (#87, @mllg) 65 | 66 | 67 | # mlr3filters 0.3.0 68 | 69 | * Add Permutation (#70) 70 | * Add `flts()` (#77) 71 | * Github Actions: set cron job to 4am to avoid potential download issues with R-devel on macOS 72 | * Filters now have a help method `$help()` which opens the respective help page (#68) 73 | 74 | 75 | # mlr3filters 0.2.0 76 | 77 | ## Internal 78 | 79 | * Use `private$.calculate` instead of public "calculate" method for Filters 80 | * switch from Travis to GitHub Actions 81 | * Use Roxygen R6 notation for docs 82 | 83 | ## Enhancements 84 | 85 | * new filter `FilterFindCorrelation` (#62, @mb706) 86 | 87 | 88 | # mlr3filters 0.1.1 89 | 90 | * Replace dependency `Metrics` with `mlr3measures`. 91 | 92 | 93 | # mlr3filters 0.1.0 94 | 95 | * Initial CRAN release. 96 | -------------------------------------------------------------------------------- /R/FilterAUC.R: -------------------------------------------------------------------------------- 1 | #' @title AUC Filter 2 | #' 3 | #' @name mlr_filters_auc 4 | #' 5 | #' @description 6 | #' Area under the (ROC) Curve filter, analogously to [mlr3measures::auc()] from 7 | #' \CRANpkg{mlr3measures}. Missing values of the features are removed before 8 | #' calculating the AUC. If the AUC is undefined for the input, it is set to 0.5 9 | #' (random classifier). The absolute value of the difference between the AUC and 10 | #' 0.5 is used as final filter value. 11 | #' 12 | #' @references 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @family Filter 18 | #' @include Filter.R 19 | #' @template seealso_filter 20 | #' @export 21 | #' @examples 22 | #' task = mlr3::tsk("sonar") 23 | #' filter = flt("auc") 24 | #' filter$calculate(task) 25 | #' head(as.data.table(filter), 3) 26 | #' 27 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 28 | #' library("mlr3pipelines") 29 | #' task = mlr3::tsk("spam") 30 | #' 31 | #' # Note: `filter.frac` is selected randomly and should be tuned. 32 | #' 33 | #' graph = po("filter", filter = flt("auc"), filter.frac = 0.5) %>>% 34 | #' po("learner", mlr3::lrn("classif.rpart")) 35 | #' 36 | #' graph$train(task) 37 | #' } 38 | FilterAUC = R6Class("FilterAUC", 39 | inherit = Filter, 40 | 41 | public = list( 42 | 43 | #' @description Create a FilterAUC object. 44 | initialize = function() { 45 | super$initialize( 46 | id = "auc", 47 | task_types = "classif", 48 | task_properties = "twoclass", 49 | feature_types = c("integer", "numeric"), 50 | packages = "mlr3measures", 51 | label = "Area Under the ROC Curve Score", 52 | man = "mlr3filters::mlr_filters_auc" 53 | ) 54 | } 55 | ), 56 | 57 | private = list( 58 | .calculate = function(task, nfeat) { 59 | y = task$truth() == task$positive 60 | x = task$data(cols = task$feature_names) 61 | score = map_dbl(x, function(x) { 62 | keep = !is.na(x) 63 | auc(y[keep], x[keep]) 64 | }) 65 | abs(0.5 - score) 66 | } 67 | ) 68 | ) 69 | 70 | #' @include mlr_filters.R 71 | mlr_filters$add("auc", FilterAUC) 72 | 73 | 74 | auc = function(truth, prob) { 75 | n_pos = sum(truth) 76 | n_neg = length(truth) - n_pos 77 | if (n_pos == 0L || n_neg == 0L) { 78 | return(0.5) # nocov 79 | } 80 | r = rank(prob, ties.method = "average") 81 | (sum(r[truth]) - n_pos * (n_pos + 1L) / 2L) / (n_pos * n_neg) 82 | } 83 | -------------------------------------------------------------------------------- /R/FilterAnova.R: -------------------------------------------------------------------------------- 1 | #' @title ANOVA F-Test Filter 2 | #' 3 | #' @name mlr_filters_anova 4 | #' 5 | #' @description ANOVA F-Test filter calling [stats::aov()]. Note that this is 6 | #' equivalent to a \eqn{t}-test for binary classification. 7 | #' 8 | #' The filter value is `-log10(p)` where `p` is the \eqn{p}-value. This 9 | #' transformation is necessary to ensure numerical stability for very small 10 | #' \eqn{p}-values. 11 | #' 12 | #' @references 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @family Filter 18 | #' @include Filter.R 19 | #' @importFrom stats aov 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' task = mlr3::tsk("iris") 24 | #' filter = flt("anova") 25 | #' filter$calculate(task) 26 | #' head(as.data.table(filter), 3) 27 | #' 28 | #' # transform to p-value 29 | #' 10^(-filter$scores) 30 | #' 31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 32 | #' library("mlr3pipelines") 33 | #' task = mlr3::tsk("spam") 34 | #' 35 | #' # Note: `filter.frac` is selected randomly and should be tuned. 36 | #' 37 | #' graph = po("filter", filter = flt("anova"), filter.frac = 0.5) %>>% 38 | #' po("learner", mlr3::lrn("classif.rpart")) 39 | #' 40 | #' graph$train(task) 41 | #' } 42 | FilterAnova = R6Class("FilterAnova", 43 | inherit = Filter, 44 | 45 | public = list( 46 | 47 | #' @description Create a FilterAnova object. 48 | initialize = function() { 49 | super$initialize( 50 | id = "anova", 51 | packages = "stats", 52 | feature_types = c("integer", "numeric"), 53 | task_types = "classif", 54 | label = "ANOVA F-Test", 55 | man = "mlr3filters::mlr_filters_anova" 56 | ) 57 | } 58 | ), 59 | 60 | private = list( 61 | .calculate = function(task, nfeat) { 62 | data = task$data() 63 | target = task$target_names 64 | features = task$feature_names 65 | p = map_dbl(features, function(fn) { 66 | f = formulate(fn, target) 67 | summary(aov(f, data = data))[[1L]][1L, "Pr(>F)"] 68 | }) 69 | set_names(-log10(p), features) 70 | } 71 | ) 72 | ) 73 | 74 | #' @include mlr_filters.R 75 | mlr_filters$add("anova", FilterAnova) 76 | -------------------------------------------------------------------------------- /R/FilterBoruta.R: -------------------------------------------------------------------------------- 1 | #' @title Burota Filter 2 | #' 3 | #' @name mlr_filters_boruta 4 | #' 5 | #' @description 6 | #' Filter using the Boruta algorithm for feature selection. 7 | #' If `keep = "tentative"`, confirmed and tentative features are returned. 8 | #' Note that there is no ordering in the selected features. 9 | #' Selected features get a score of 1, deselected features get a score of 0. 10 | #' The order of selected features is random. 11 | #' In combination with \CRANpkg{mlr3pipelines}, only the filter criterion `cutoff` makes sense. 12 | #' 13 | #' @section Initial parameter values: 14 | #' - `num.threads`: 15 | #' - Actual default: `NULL`, triggering auto-detection of the number of CPUs. 16 | #' - Adjusted value: 1. 17 | #' - Reason for change: Conflicting with parallelization via \CRANpkg{future}. 18 | #' 19 | #' @references 20 | #' `r format_bib("kursa_2010")` 21 | #' 22 | #' @family Filter 23 | #' @include Filter.R 24 | #' @template seealso_filter 25 | #' @export 26 | #' @examples 27 | #' \donttest{ 28 | #' if (requireNamespace("Boruta")) { 29 | #' task = mlr3::tsk("sonar") 30 | #' filter = flt("boruta") 31 | #' filter$calculate(task) 32 | #' as.data.table(filter) 33 | #' } 34 | #' } 35 | 36 | FilterBoruta = R6Class("FilterBoruta", 37 | inherit = Filter, 38 | 39 | public = list( 40 | 41 | #' @description 42 | #' Creates a new instance of this [R6][R6::R6Class] class. 43 | initialize = function() { 44 | 45 | param_set = ps( 46 | pValue = p_dbl(default = 0.01), 47 | mcAdj = p_lgl(default = TRUE), 48 | maxRuns = p_int(lower = 1, default = 100), 49 | doTrace = p_int(lower = 0, upper = 4, default = 0), 50 | holdHistory = p_lgl(default = TRUE), 51 | getImp = p_uty(), 52 | keep = p_fct(levels = c("confirmed", "tentative"), default = "confirmed"), 53 | num.threads = p_int(lower = 1, default = 1) 54 | ) 55 | 56 | param_set$set_values(keep = "confirmed", num.threads = 1) 57 | 58 | super$initialize( 59 | id = "boruta", 60 | task_types = c("regr", "classif"), 61 | param_set = param_set, 62 | packages = "Boruta", 63 | feature_types = c("integer", "numeric"), 64 | label = "Burota", 65 | man = "mlr3filters::mlr_filters_boruta" 66 | ) 67 | } 68 | ), 69 | 70 | private = list( 71 | .calculate = function(task, nfeat) { 72 | pv = self$param_set$values 73 | data = task$data() 74 | target = task$target_names 75 | features = task$feature_names 76 | formula = formulate(target, features) 77 | keep = pv$keep 78 | pv$keep = NULL 79 | 80 | res = invoke(Boruta::Boruta, formula = formula, data = data, .args = pv) 81 | 82 | selected_features = Boruta::getSelectedAttributes(res, withTentative = (keep == "tentative")) 83 | 84 | set_names(as.numeric(features %in% selected_features), features) 85 | } 86 | ) 87 | ) 88 | 89 | 90 | #' @include mlr_filters.R 91 | mlr_filters$add("boruta", FilterBoruta) 92 | -------------------------------------------------------------------------------- /R/FilterCMIM.R: -------------------------------------------------------------------------------- 1 | #' @title Minimal Conditional Mutual Information Maximization Filter 2 | #' 3 | #' @name mlr_filters_cmim 4 | #' 5 | #' @description Minimal conditional mutual information maximization filter 6 | #' calling [praznik::CMIM()] from package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("cmim") 26 | #' filter$calculate(task, nfeat = 2) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("cmim"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterCMIM = R6Class("FilterCMIM", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterCMIM object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | 53 | super$initialize( 54 | id = "cmim", 55 | task_types = c("classif", "regr"), 56 | param_set = param_set, 57 | feature_types = c("integer", "numeric", "factor", "ordered"), 58 | packages = "praznik", 59 | label = "Minimal Conditional Mutual Information Maximization", 60 | man = "mlr3filters::mlr_filters_cmim" 61 | ) 62 | } 63 | ), 64 | 65 | private = list( 66 | .calculate = function(task, nfeat) { 67 | call_praznik(self, task, praznik::CMIM, nfeat) 68 | } 69 | ) 70 | ) 71 | 72 | #' @include mlr_filters.R 73 | mlr_filters$add("cmim", FilterCMIM) 74 | -------------------------------------------------------------------------------- /R/FilterCarScore.R: -------------------------------------------------------------------------------- 1 | #' @title Correlation-Adjusted Marignal Correlation Score Filter 2 | #' 3 | #' @name mlr_filters_carscore 4 | #' 5 | #' @description Calculates the Correlation-Adjusted (marginal) coRrelation scores 6 | #' (short CAR scores) implemented in [care::carscore()] in package 7 | #' \CRANpkg{care}. The CAR scores for a set of features are defined as the 8 | #' correlations between the target and the decorrelated features. The filter 9 | #' returns the absolute value of the calculated scores. 10 | #' 11 | #' Argument `verbose` defaults to `FALSE`. 12 | #' 13 | #' @family Filter 14 | #' @include Filter.R 15 | #' @template seealso_filter 16 | #' @export 17 | #' @examples 18 | #' if (requireNamespace("care")) { 19 | #' task = mlr3::tsk("mtcars") 20 | #' filter = flt("carscore") 21 | #' filter$calculate(task) 22 | #' head(as.data.table(filter), 3) 23 | #' 24 | #' ## changing the filter settings 25 | #' filter = flt("carscore") 26 | #' filter$param_set$values = list("diagonal" = TRUE) 27 | #' filter$calculate(task) 28 | #' head(as.data.table(filter), 3) 29 | #' } 30 | #' 31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "care", "rpart"), quietly = TRUE)) { 32 | #' library("mlr3pipelines") 33 | #' task = mlr3::tsk("mtcars") 34 | #' 35 | #' # Note: `filter.frac` is selected randomly and should be tuned. 36 | #' 37 | #' graph = po("filter", filter = flt("carscore"), filter.frac = 0.5) %>>% 38 | #' po("learner", mlr3::lrn("regr.rpart")) 39 | #' 40 | #' graph$train(task) 41 | #' } 42 | FilterCarScore = R6Class("FilterCarScore", 43 | inherit = Filter, 44 | 45 | public = list( 46 | #' @description Create a FilterCarScore object. 47 | initialize = function() { 48 | param_set = ps( 49 | lambda = p_dbl(lower = 0, upper = 1, default = NO_DEF), 50 | diagonal = p_lgl(default = FALSE), 51 | verbose = p_lgl(default = TRUE) 52 | ) 53 | param_set$values = list(verbose = FALSE) 54 | 55 | super$initialize( 56 | id = "carscore", 57 | task_types = "regr", 58 | param_set = param_set, 59 | feature_types = c("logical", "integer", "numeric"), 60 | packages = "care", 61 | label = "Correlation-Adjusted coRrelation Score", 62 | man = "mlr3filters::mlr_filters_carscore" 63 | ) 64 | } 65 | ), 66 | 67 | private = list( 68 | .calculate = function(task, nfeat) { 69 | target = task$truth() 70 | features = as_numeric_matrix(task$data(cols = task$feature_names)) 71 | 72 | pv = self$param_set$values 73 | scores = invoke(care::carscore, 74 | Xtrain = features, Ytrain = target, 75 | .args = pv) 76 | set_names(abs(scores), names(scores)) 77 | } 78 | ) 79 | ) 80 | 81 | #' @include mlr_filters.R 82 | mlr_filters$add("carscore", FilterCarScore) 83 | -------------------------------------------------------------------------------- /R/FilterCarSurvScore.R: -------------------------------------------------------------------------------- 1 | #' @title Correlation-Adjusted Survival Score Filter 2 | #' 3 | #' @name mlr_filters_carsurvscore 4 | #' 5 | #' @description Calculates CARS scores for right-censored survival tasks. 6 | #' Calls the implementation in [carSurv::carSurvScore()] in package 7 | #' \CRANpkg{carSurv}. 8 | #' 9 | #' @references 10 | #' `r format_bib("bommert_2021")` 11 | #' 12 | #' @family Filter 13 | #' @include Filter.R 14 | #' @template seealso_filter 15 | #' @export 16 | FilterCarSurvScore = R6Class("FilterCarSurvScore", 17 | inherit = Filter, 18 | 19 | public = list( 20 | #' @description Create a FilterCarSurvScore object. 21 | initialize = function() { 22 | ps = ps( 23 | maxIPCweight = p_int(lower = 0, default = 10), 24 | denom = p_fct(c("1/n", "sum_w"), default = "1/n") 25 | ) 26 | super$initialize( 27 | id = "surv.carsurvscore", 28 | packages = c("carSurv", "mlr3proba"), 29 | param_set = ps, 30 | feature_types = c("integer", "numeric"), 31 | task_types = "surv", 32 | label = "Correlation-Adjusted coRrelation Survival Score", 33 | man = "mlr3filters::mlr_filters_carsurvscore" 34 | ) 35 | } 36 | ), 37 | 38 | private = list( 39 | .calculate = function(task, nfeat) { 40 | pv = self$param_set$values 41 | 42 | surv = task$truth() 43 | X = as.matrix(task$data(cols = task$feature_names)) 44 | scores = invoke(carSurv::carSurvScore, 45 | obsTime = surv[, 1L], 46 | obsEvent = surv[, 2L], 47 | X = X, 48 | .args = pv 49 | ) 50 | 51 | set_names(abs(scores), colnames(X)) 52 | } 53 | ) 54 | ) 55 | 56 | #' @include mlr_filters.R 57 | mlr_filters$add("carsurvscore", FilterCarSurvScore) 58 | -------------------------------------------------------------------------------- /R/FilterCorrelation.R: -------------------------------------------------------------------------------- 1 | #' @title Correlation Filter 2 | #' 3 | #' @name mlr_filters_correlation 4 | #' 5 | #' @description 6 | #' Simple correlation filter calling [stats::cor()]. 7 | #' The filter score is the absolute value of the correlation. 8 | #' 9 | #' @note 10 | #' This filter, in its default settings, can handle missing values in the features. 11 | #' However, the resulting filter scores may be misleading or at least difficult to compare 12 | #' if some features have a large proportion of missing values. 13 | #' 14 | #' If a feature has no non-missing value, the resulting score will be `NA`. 15 | #' Missing scores appear in a random, non-deterministic order at the end of the vector of scores. 16 | #' 17 | #' @references 18 | #' For a benchmark of filter methods: 19 | #' 20 | #' `r format_bib("bommert_2020")` 21 | #' 22 | #' @family Filter 23 | #' @include Filter.R 24 | #' @template seealso_filter 25 | #' @export 26 | #' @examples 27 | #' ## Pearson (default) 28 | #' task = mlr3::tsk("mtcars") 29 | #' filter = flt("correlation") 30 | #' filter$calculate(task) 31 | #' as.data.table(filter) 32 | #' 33 | #' ## Spearman 34 | #' filter = FilterCorrelation$new() 35 | #' filter$param_set$values = list("method" = "spearman") 36 | #' filter$calculate(task) 37 | #' as.data.table(filter) 38 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 39 | #' library("mlr3pipelines") 40 | #' task = mlr3::tsk("mtcars") 41 | #' 42 | #' # Note: `filter.frac` is selected randomly and should be tuned. 43 | #' 44 | #' graph = po("filter", filter = flt("correlation"), filter.frac = 0.5) %>>% 45 | #' po("learner", mlr3::lrn("regr.rpart")) 46 | #' 47 | #' graph$train(task) 48 | #' } 49 | FilterCorrelation = R6Class("FilterCorrelation", 50 | inherit = Filter, 51 | 52 | public = list( 53 | 54 | #' @description Create a FilterCorrelation object. 55 | initialize = function() { 56 | param_set = ps( 57 | use = p_fct(c("everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"), 58 | default = "everything"), 59 | method = p_fct(c("pearson", "kendall", "spearman"), default = "pearson") 60 | ) 61 | 62 | super$initialize( 63 | id = "correlation", 64 | task_types = "regr", 65 | param_set = param_set, 66 | feature_types = c("integer", "numeric"), 67 | packages = "stats", 68 | label = "Correlation", 69 | man = "mlr3filters::mlr_filters_correlation" 70 | ) 71 | } 72 | ), 73 | 74 | private = list( 75 | .calculate = function(task, nfeat) { 76 | fn = task$feature_names 77 | pv = self$param_set$values 78 | score = invoke(stats::cor, 79 | x = as.matrix(task$data(cols = fn)), 80 | y = as.matrix(task$truth()), 81 | .args = pv)[, 1L] 82 | set_names(abs(score), fn) 83 | }, 84 | 85 | .get_properties = function() { 86 | "missings" 87 | } 88 | 89 | ) 90 | ) 91 | 92 | #' @include mlr_filters.R 93 | mlr_filters$add("correlation", FilterCorrelation) 94 | -------------------------------------------------------------------------------- /R/FilterDISR.R: -------------------------------------------------------------------------------- 1 | #' @title Double Input Symmetrical Relevance Filter 2 | #' 3 | #' @name mlr_filters_disr 4 | #' 5 | #' @description Double input symmetrical relevance filter calling 6 | #' [praznik::DISR()] from package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("disr") 26 | #' filter$calculate(task) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("disr"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterDISR = R6Class("FilterDISR", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterDISR object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | 53 | super$initialize( 54 | id = "disr", 55 | task_types = c("classif", "regr"), 56 | param_set = param_set, 57 | feature_types = c("integer", "numeric", "factor", "ordered"), 58 | packages = "praznik", 59 | label = "Double Input Symmetrical Relevance", 60 | man = "mlr3filters::mlr_filters_disr" 61 | ) 62 | } 63 | ), 64 | 65 | private = list( 66 | .calculate = function(task, nfeat) { 67 | call_praznik(self, task, praznik::DISR, nfeat) 68 | } 69 | ) 70 | ) 71 | 72 | #' @include mlr_filters.R 73 | mlr_filters$add("disr", FilterDISR) 74 | -------------------------------------------------------------------------------- /R/FilterFindCorrelation.R: -------------------------------------------------------------------------------- 1 | #' @title Correlation Filter 2 | #' 3 | #' @name mlr_filters_find_correlation 4 | #' 5 | #' @description 6 | #' Simple filter emulating `caret::findCorrelation(exact = FALSE)`. 7 | #' 8 | #' This gives each feature a score between 0 and 1 that is *one minus* the 9 | #' cutoff value for which it is excluded when using [caret::findCorrelation()]. 10 | #' The negative is used because [caret::findCorrelation()] excludes everything 11 | #' *above* a cutoff, while filters exclude everything below a cutoff. 12 | #' Here the filter scores are shifted by +1 to get positive values for to align 13 | #' with the way other filters work. 14 | #' 15 | #' Subsequently `caret::findCorrelation(cutoff = 0.9)` lists the same features 16 | #' that are excluded with `FilterFindCorrelation` at score 0.1 (= 1 - 0.9). 17 | #' 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' # Pearson (default) 24 | #' task = mlr3::tsk("mtcars") 25 | #' filter = flt("find_correlation") 26 | #' filter$calculate(task) 27 | #' as.data.table(filter) 28 | #' 29 | #' ## Spearman 30 | #' filter = flt("find_correlation", method = "spearman") 31 | #' filter$calculate(task) 32 | #' as.data.table(filter) 33 | #' 34 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 35 | #' library("mlr3pipelines") 36 | #' task = mlr3::tsk("spam") 37 | #' 38 | #' # Note: `filter.frac` is selected randomly and should be tuned. 39 | #' 40 | #' graph = po("filter", filter = flt("find_correlation"), filter.frac = 0.5) %>>% 41 | #' po("learner", mlr3::lrn("classif.rpart")) 42 | #' 43 | #' graph$train(task) 44 | #' } 45 | FilterFindCorrelation = R6Class("FilterFindCorrelation", 46 | inherit = Filter, 47 | 48 | public = list( 49 | 50 | #' @description Create a FilterFindCorrelation object. 51 | initialize = function() { 52 | param_set = ps( 53 | use = p_fct(c("everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"), default = "everything"), 54 | method = p_fct(levels = c("pearson", "kendall", "spearman"), default = "pearson") 55 | ) 56 | 57 | super$initialize( 58 | id = "find_correlation", 59 | task_types = NA_character_, 60 | param_set = param_set, 61 | feature_types = c("integer", "numeric"), 62 | packages = "stats", 63 | label = "Correlation-based Score", 64 | man = "mlr3filters::mlr_filters_find_correlation" 65 | ) 66 | } 67 | ), 68 | 69 | private = list( 70 | .calculate = function(task, nfeat) { 71 | 72 | fn = task$feature_names 73 | pv = self$param_set$values 74 | cm = invoke(stats::cor, 75 | x = task$data(cols = fn), 76 | .args = pv) 77 | cm = abs(cm) 78 | # a feature is removed as soon as it is in the higher average correlation 79 | # col in a pair (note: tie broken by removing /later/ feature first) 80 | avg_cor = colMeans(cm) 81 | # decreasing = TRUE to emulate tie breaking 82 | avg_cor_order = order(avg_cor, decreasing = TRUE) 83 | cm = cm[avg_cor_order, avg_cor_order, drop = FALSE] 84 | # Rows / Columns of cm are now ordered by correlation mean, highest first. 85 | # A feature i is excluded as soon as a lower-average-correlation feature 86 | # has correlation with i > cutoff. This means the cutoff at which i is 87 | # excluded is the max of the correlation with all lower-avg-cor features. 88 | # Therefore we look for the highest feature correlation col-wise in the 89 | # lower triangle of the ordered cm. 90 | 91 | # the lowest avg col feature is never removed by caret, so its cutoff is 92 | # 0. 93 | cm[upper.tri(cm, diag = TRUE)] = 0 94 | # The following has the correct names and values, BUT we need scores in 95 | # reverse order. Shift by 1 to get positive values. 96 | 1 - apply(cm, 2, max) 97 | }, 98 | .get_properties = function() { 99 | use = self$param_set$values$use %??% "everything" 100 | if (use %in% c("complete.obs", "pairwise.complete.obs")) { 101 | "missings" 102 | } else { 103 | character(0) 104 | } 105 | } 106 | ) 107 | ) 108 | 109 | #' @include mlr_filters.R 110 | mlr_filters$add("find_correlation", FilterFindCorrelation) 111 | -------------------------------------------------------------------------------- /R/FilterImportance.R: -------------------------------------------------------------------------------- 1 | #' @title Filter for Embedded Feature Selection via Variable Importance 2 | #' 3 | #' @name mlr_filters_importance 4 | #' 5 | #' @description Variable Importance filter using embedded feature selection of 6 | #' machine learning algorithms. Takes a [mlr3::Learner] which is capable of 7 | #' extracting the variable importance (property "importance"), fits the model 8 | #' and extracts the importance values to use as filter scores. 9 | #' 10 | #' @family Filter 11 | #' @include FilterLearner.R 12 | #' @template seealso_filter 13 | #' @export 14 | #' @examples 15 | #' if (requireNamespace("rpart")) { 16 | #' task = mlr3::tsk("iris") 17 | #' learner = mlr3::lrn("classif.rpart") 18 | #' filter = flt("importance", learner = learner) 19 | #' filter$calculate(task) 20 | #' as.data.table(filter) 21 | #' } 22 | #' 23 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "mlr3learners"), quietly = TRUE)) { 24 | #' library("mlr3learners") 25 | #' library("mlr3pipelines") 26 | #' task = mlr3::tsk("sonar") 27 | #' 28 | #' learner = mlr3::lrn("classif.rpart") 29 | #' 30 | #' # Note: `filter.frac` is selected randomly and should be tuned. 31 | #' 32 | #' graph = po("filter", filter = flt("importance", learner = learner), filter.frac = 0.5) %>>% 33 | #' po("learner", mlr3::lrn("classif.log_reg")) 34 | #' 35 | #' graph$train(task) 36 | #' } 37 | FilterImportance = R6Class("FilterImportance", 38 | inherit = FilterLearner, 39 | 40 | public = list( 41 | 42 | #' @field learner ([mlr3::Learner])\cr 43 | #' Learner to extract the importance values from. 44 | learner = NULL, 45 | 46 | #' @description Create a FilterImportance object. 47 | #' @param learner ([mlr3::Learner])\cr 48 | #' Learner to extract the importance values from. 49 | initialize = function(learner = mlr3::lrn("classif.featureless")) { 50 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE), 51 | properties = "importance") 52 | 53 | super$initialize( 54 | id = "importance", 55 | task_types = learner$task_type, 56 | feature_types = learner$feature_types, 57 | packages = learner$packages, 58 | param_set = learner$param_set, 59 | label = "Importance Score", 60 | man = "mlr3filters::mlr_filters_importance" 61 | ) 62 | } 63 | ), 64 | 65 | 66 | private = list( 67 | .calculate = function(task, nfeat) { 68 | learner = self$learner$clone(deep = TRUE) 69 | learner = learner$train(task = task) 70 | learner$base_learner()$importance() 71 | }, 72 | 73 | .get_properties = function() { 74 | intersect("missings", self$learner$properties) 75 | } 76 | ) 77 | ) 78 | 79 | #' @include mlr_filters.R 80 | mlr_filters$add("importance", FilterImportance) 81 | -------------------------------------------------------------------------------- /R/FilterInformationGain.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Information Gain Filter 3 | #' 4 | #' @name mlr_filters_information_gain 5 | #' 6 | #' @description Information gain filter calling 7 | #' [FSelectorRcpp::information_gain()] in package \CRANpkg{FSelectorRcpp}. Set 8 | #' parameter `"type"` to `"gainratio"` to calculate the gain ratio, or set to 9 | #' `"symuncert"` to calculate the symmetrical uncertainty (see 10 | #' [FSelectorRcpp::information_gain()]). Default is `"infogain"`. 11 | #' 12 | #' Argument `equal` defaults to `FALSE` for classification tasks, and to 13 | #' `TRUE` for regression tasks. 14 | #' 15 | #' @family Filter 16 | #' @include Filter.R 17 | #' @template seealso_filter 18 | #' @export 19 | #' @examples 20 | #' if (requireNamespace("FSelectorRcpp")) { 21 | #' ## InfoGain (default) 22 | #' task = mlr3::tsk("sonar") 23 | #' filter = flt("information_gain") 24 | #' filter$calculate(task) 25 | #' head(filter$scores, 3) 26 | #' as.data.table(filter) 27 | #' 28 | #' ## GainRatio 29 | #' 30 | #' filterGR = flt("information_gain") 31 | #' filterGR$param_set$values = list("type" = "gainratio") 32 | #' filterGR$calculate(task) 33 | #' head(as.data.table(filterGR), 3) 34 | #' 35 | #' } 36 | #' 37 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) { 38 | #' library("mlr3pipelines") 39 | #' task = mlr3::tsk("spam") 40 | #' 41 | #' # Note: `filter.frac` is selected randomly and should be tuned. 42 | #' 43 | #' graph = po("filter", filter = flt("information_gain"), filter.frac = 0.5) %>>% 44 | #' po("learner", mlr3::lrn("classif.rpart")) 45 | #' 46 | #' graph$train(task) 47 | #' 48 | #' } 49 | FilterInformationGain = R6Class("FilterInformationGain", 50 | inherit = Filter, 51 | 52 | public = list( 53 | 54 | #' @description Create a FilterInformationGain object. 55 | initialize = function() { 56 | param_set = ps( 57 | type = p_fct(c("infogain", "gainratio", "symuncert"), default = "infogain"), 58 | equal = p_lgl(default = FALSE), 59 | discIntegers = p_lgl(default = TRUE), 60 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 61 | ) 62 | 63 | super$initialize( 64 | id = "information_gain", 65 | task_types = c("classif", "regr"), 66 | param_set = param_set, 67 | feature_types = c("integer", "numeric", "factor", "ordered"), 68 | packages = "FSelectorRcpp", 69 | label = "Information Gain", 70 | man = "mlr3filters::mlr_filters_information_gain" 71 | ) 72 | } 73 | ), 74 | 75 | private = list( 76 | .calculate = function(task, nfeat) { 77 | pv = self$param_set$values 78 | pv$type = pv$type %??% "infogain" 79 | pv$equal = pv$equal %??% task$task_type == "regr" 80 | 81 | x = setDF(task$data(cols = task$feature_names)) 82 | y = task$truth() 83 | scores = invoke(FSelectorRcpp::information_gain, x = x, y = y, .args = pv) 84 | set_names(scores$importance, scores$attributes) 85 | }, 86 | 87 | .get_properties = function() { 88 | "missings" 89 | } 90 | ) 91 | ) 92 | 93 | #' @include mlr_filters.R 94 | mlr_filters$add("information_gain", FilterInformationGain) 95 | -------------------------------------------------------------------------------- /R/FilterJMI.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title Joint Mutual Information Filter 3 | #' 4 | #' @name mlr_filters_jmi 5 | #' 6 | #' @description Joint mutual information filter calling [praznik::JMI()] in 7 | #' package \CRANpkg{praznik}. 8 | #' 9 | #' This filter supports partial scoring (see [Filter]). 10 | #' 11 | #' @references 12 | #' `r format_bib("kursa_2021")` 13 | #' 14 | #' For a benchmark of filter methods: 15 | #' 16 | #' `r format_bib("bommert_2020")` 17 | #' 18 | #' @template details_praznik 19 | #' @family Filter 20 | #' @include Filter.R 21 | #' @template seealso_filter 22 | #' @export 23 | #' @examples 24 | #' if (requireNamespace("praznik")) { 25 | #' task = mlr3::tsk("iris") 26 | #' filter = flt("jmi") 27 | #' filter$calculate(task, nfeat = 2) 28 | #' as.data.table(filter) 29 | #' } 30 | #' 31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 32 | #' library("mlr3pipelines") 33 | #' task = mlr3::tsk("spam") 34 | #' 35 | #' # Note: `filter.frac` is selected randomly and should be tuned. 36 | #' 37 | #' graph = po("filter", filter = flt("jmi"), filter.frac = 0.5) %>>% 38 | #' po("learner", mlr3::lrn("classif.rpart")) 39 | #' 40 | #' graph$train(task) 41 | #' } 42 | FilterJMI = R6Class("FilterJMI", 43 | inherit = Filter, 44 | 45 | public = list( 46 | 47 | #' @description Create a FilterJMI object. 48 | initialize = function() { 49 | param_set = ps( 50 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 51 | ) 52 | param_set$values = list(threads = 1L) 53 | 54 | super$initialize( 55 | id = "jmi", 56 | task_types = c("classif", "regr"), 57 | param_set = param_set, 58 | packages = "praznik", 59 | feature_types = c("integer", "numeric", "factor", "ordered"), 60 | label = "Joint Mutual Information", 61 | man = "mlr3filters::mlr_filters_jmi" 62 | ) 63 | } 64 | ), 65 | 66 | private = list( 67 | .calculate = function(task, nfeat) { 68 | call_praznik(self, task, praznik::JMI, nfeat) 69 | } 70 | ) 71 | ) 72 | 73 | #' @include mlr_filters.R 74 | mlr_filters$add("jmi", FilterJMI) 75 | -------------------------------------------------------------------------------- /R/FilterJMIM.R: -------------------------------------------------------------------------------- 1 | #' @title Minimal Joint Mutual Information Maximization Filter 2 | #' 3 | #' @name mlr_filters_jmim 4 | #' 5 | #' @description Minimal joint mutual information maximization filter calling 6 | #' [praznik::JMIM()] in package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("jmim") 26 | #' filter$calculate(task, nfeat = 2) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("jmim"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterJMIM = R6Class("FilterJMIM", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterJMIM object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | super$initialize( 53 | id = "jmim", 54 | task_types = c("classif", "regr"), 55 | param_set = param_set, 56 | packages = "praznik", 57 | feature_types = c("integer", "numeric", "factor", "ordered"), 58 | label = "Minimal Joint Mutual Information Maximization", 59 | man = "mlr3filters::mlr_filters_jmim" 60 | ) 61 | } 62 | ), 63 | 64 | private = list( 65 | .calculate = function(task, nfeat) { 66 | call_praznik(self, task, praznik::JMIM, nfeat) 67 | } 68 | ) 69 | ) 70 | 71 | #' @include mlr_filters.R 72 | mlr_filters$add("jmim", FilterJMIM) 73 | -------------------------------------------------------------------------------- /R/FilterKruskalTest.R: -------------------------------------------------------------------------------- 1 | #' @title Kruskal-Wallis Test Filter 2 | #' 3 | #' @name mlr_filters_kruskal_test 4 | #' 5 | #' @description Kruskal-Wallis rank sum test filter calling [stats::kruskal.test()]. 6 | #' 7 | #' The filter value is `-log10(p)` where `p` is the \eqn{p}-value. This 8 | #' transformation is necessary to ensure numerical stability for very small 9 | #' \eqn{p}-values. 10 | 11 | #' @note 12 | #' This filter, in its default settings, can handle missing values in the features. 13 | #' However, the resulting filter scores may be misleading or at least difficult to compare 14 | #' if some features have a large proportion of missing values. 15 | #' 16 | #' If a feature has not at least one non-missing observation per label, the resulting score will be NA. 17 | #' Missing scores appear in a random, non-deterministic order at the end of the vector of scores. 18 | #' 19 | #' 20 | #' @references 21 | #' For a benchmark of filter methods: 22 | #' 23 | #' `r format_bib("bommert_2020")` 24 | #' 25 | #' @family Filter 26 | #' @include Filter.R 27 | #' @importFrom stats kruskal.test 28 | #' @template seealso_filter 29 | #' @export 30 | #' @examples 31 | #' task = mlr3::tsk("iris") 32 | #' filter = flt("kruskal_test") 33 | #' filter$calculate(task) 34 | #' as.data.table(filter) 35 | #' 36 | #' # transform to p-value 37 | #' 10^(-filter$scores) 38 | #' 39 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 40 | #' library("mlr3pipelines") 41 | #' task = mlr3::tsk("spam") 42 | #' 43 | #' # Note: `filter.frac` is selected randomly and should be tuned. 44 | #' 45 | #' graph = po("filter", filter = flt("kruskal_test"), filter.frac = 0.5) %>>% 46 | #' po("learner", mlr3::lrn("classif.rpart")) 47 | #' 48 | #' graph$train(task) 49 | #' } 50 | FilterKruskalTest = R6Class("FilterKruskalTest", 51 | inherit = Filter, 52 | 53 | public = list( 54 | 55 | #' @description Create a FilterKruskalTest object. 56 | initialize = function() { 57 | param_set = ps( 58 | na.action = p_fct(c("na.omit", "na.fail", "na.exclude"), default = "na.omit") 59 | ) 60 | 61 | super$initialize( 62 | id = "kruskal_test", 63 | task_types = "classif", 64 | param_set = param_set, 65 | packages = "stats", 66 | feature_types = c("integer", "numeric"), 67 | label = "Kruskal-Wallis Test", 68 | man = "mlr3filters::mlr_filters_kruskal_test" 69 | ) 70 | } 71 | ), 72 | 73 | private = list( 74 | .calculate = function(task, nfeat) { 75 | na_action = self$param_set$values$na.action %??% "na.omit" 76 | 77 | data = task$data(cols = task$feature_names) 78 | g = task$truth() 79 | 80 | -log10(map_dbl(data, function(x) { 81 | tab = table(g[!is.na(x)]) 82 | 83 | if (any(tab == 0L)) { 84 | NA_real_ 85 | } else { 86 | kruskal.test(x = x, g = g, na.action = na_action)$p.value 87 | } 88 | })) 89 | }, 90 | 91 | .get_properties = function() { 92 | ok = c("na.omit", "na.exclude") 93 | if ((self$param_set$values$na.action %??% "na.omit") %in% ok) "missings" else character() 94 | } 95 | ) 96 | ) 97 | 98 | #' @include mlr_filters.R 99 | mlr_filters$add("kruskal_test", FilterKruskalTest) 100 | -------------------------------------------------------------------------------- /R/FilterLearner.R: -------------------------------------------------------------------------------- 1 | #' @include Filter.R 2 | FilterLearner = R6Class("FilterLearner", inherit = Filter, 3 | active = list( 4 | #' @field hash (`character(1)`)\cr 5 | #' Hash (unique identifier) for this object. 6 | hash = function(rhs) { 7 | assert_ro_binding(rhs) 8 | calculate_hash(class(self), self$id, self$param_set$values, self$learner$hash) 9 | }, 10 | 11 | #' @field phash (`character(1)`)\cr 12 | #' Hash (unique identifier) for this partial object, excluding some components 13 | #' which are varied systematically during tuning (parameter values) or feature 14 | #' selection (feature names). 15 | phash = function(rhs) { 16 | assert_ro_binding(rhs) 17 | calculate_hash(class(self), self$id, self$learner$hash) 18 | } 19 | ) 20 | ) 21 | -------------------------------------------------------------------------------- /R/FilterMIM.R: -------------------------------------------------------------------------------- 1 | #' @title Mutual Information Maximization Filter 2 | #' 3 | #' @name mlr_filters_mim 4 | #' 5 | #' @description Conditional mutual information based feature selection filter 6 | #' calling [praznik::MIM()] in package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("mim") 26 | #' filter$calculate(task, nfeat = 2) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("mim"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterMIM = R6Class("FilterMIM", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterMIM object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | 53 | super$initialize( 54 | id = "mim", 55 | task_types = c("classif", "regr"), 56 | param_set = param_set, 57 | packages = "praznik", 58 | feature_types = c("integer", "numeric", "factor", "ordered"), 59 | label = "Mutual Information Maximization", 60 | man = "mlr3filters::mlr_filters_mim" 61 | ) 62 | } 63 | ), 64 | 65 | private = list( 66 | .calculate = function(task, nfeat) { 67 | call_praznik(self, task, praznik::MIM, nfeat) 68 | } 69 | ) 70 | ) 71 | 72 | #' @include mlr_filters.R 73 | mlr_filters$add("mim", FilterMIM) 74 | -------------------------------------------------------------------------------- /R/FilterMRMR.R: -------------------------------------------------------------------------------- 1 | #' @title Minimum Redundancy Maximal Relevancy Filter 2 | #' 3 | #' @name mlr_filters_mrmr 4 | #' 5 | #' @description Minimum redundancy maximal relevancy filter calling 6 | #' [praznik::MRMR()] in package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("mrmr") 26 | #' filter$calculate(task, nfeat = 2) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("mrmr"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterMRMR = R6Class("FilterMRMR", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterMRMR object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | 53 | super$initialize( 54 | id = "mrmr", 55 | task_types = c("classif", "regr"), 56 | param_set = param_set, 57 | packages = "praznik", 58 | feature_types = c("integer", "numeric", "factor", "ordered"), 59 | label = "Minimum Redundancy Maximal Relevancy", 60 | man = "mlr3filters::mlr_filters_mrmr" 61 | ) 62 | } 63 | ), 64 | 65 | private = list( 66 | .calculate = function(task, nfeat) { 67 | call_praznik(self, task, praznik::MRMR, nfeat) 68 | } 69 | ) 70 | ) 71 | 72 | #' @include mlr_filters.R 73 | mlr_filters$add("mrmr", FilterMRMR) 74 | -------------------------------------------------------------------------------- /R/FilterNJMIM.R: -------------------------------------------------------------------------------- 1 | #' @title Minimal Normalised Joint Mutual Information Maximization Filter 2 | #' 3 | #' @name mlr_filters_njmim 4 | #' 5 | #' @description Minimal normalised joint mutual information maximization filter 6 | #' calling [praznik::NJMIM()] from package \CRANpkg{praznik}. 7 | #' 8 | #' This filter supports partial scoring (see [Filter]). 9 | #' 10 | #' @references 11 | #' `r format_bib("kursa_2021")` 12 | #' 13 | #' For a benchmark of filter methods: 14 | #' 15 | #' `r format_bib("bommert_2020")` 16 | #' 17 | #' @template details_praznik 18 | #' @family Filter 19 | #' @include Filter.R 20 | #' @template seealso_filter 21 | #' @export 22 | #' @examples 23 | #' if (requireNamespace("praznik")) { 24 | #' task = mlr3::tsk("iris") 25 | #' filter = flt("njmim") 26 | #' filter$calculate(task, nfeat = 2) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' task = mlr3::tsk("spam") 33 | #' 34 | #' # Note: `filter.frac` is selected randomly and should be tuned. 35 | #' 36 | #' graph = po("filter", filter = flt("njmim"), filter.frac = 0.5) %>>% 37 | #' po("learner", mlr3::lrn("classif.rpart")) 38 | #' 39 | #' graph$train(task) 40 | #' } 41 | FilterNJMIM = R6Class("FilterNJMIM", 42 | inherit = Filter, 43 | 44 | public = list( 45 | 46 | #' @description Create a FilterNJMIM object. 47 | initialize = function() { 48 | param_set = ps( 49 | threads = p_int(lower = 0L, default = 0L, tags = "threads") 50 | ) 51 | param_set$values = list(threads = 1L) 52 | super$initialize( 53 | id = "njmim", 54 | task_types = c("classif", "regr"), 55 | param_set = param_set, 56 | packages = "praznik", 57 | feature_types = c("integer", "numeric", "factor", "ordered"), 58 | label = "Minimal Normalised Joint Mutual Information Maximization", 59 | man = "mlr3filters::mlr_filters_njmim" 60 | ) 61 | } 62 | ), 63 | 64 | private = list( 65 | .calculate = function(task, nfeat) { 66 | call_praznik(self, task, praznik::NJMIM, nfeat) 67 | } 68 | ) 69 | ) 70 | 71 | #' @include mlr_filters.R 72 | mlr_filters$add("njmim", FilterNJMIM) 73 | -------------------------------------------------------------------------------- /R/FilterPerformance.R: -------------------------------------------------------------------------------- 1 | #' @title Predictive Performance Filter 2 | #' 3 | #' @name mlr_filters_performance 4 | #' 5 | #' @description Filter which uses the predictive performance of a 6 | #' [mlr3::Learner] as filter score. Performs a [mlr3::resample()] for each 7 | #' feature separately. The filter score is the aggregated performance of the 8 | #' [mlr3::Measure], or the negated aggregated performance if the measure has 9 | #' to be minimized. 10 | #' 11 | #' @family Filter 12 | #' @include FilterLearner.R 13 | #' @template seealso_filter 14 | #' @export 15 | #' @examples 16 | #' if (requireNamespace("rpart")) { 17 | #' task = mlr3::tsk("iris") 18 | #' learner = mlr3::lrn("classif.rpart") 19 | #' filter = flt("performance", learner = learner) 20 | #' filter$calculate(task) 21 | #' as.data.table(filter) 22 | #' } 23 | #' 24 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 25 | #' library("mlr3pipelines") 26 | #' task = mlr3::tsk("iris") 27 | #' l = lrn("classif.rpart") 28 | #' 29 | #' # Note: `filter.frac` is selected randomly and should be tuned. 30 | #' 31 | #' graph = po("filter", filter = flt("performance", learner = l), filter.frac = 0.5) %>>% 32 | #' po("learner", mlr3::lrn("classif.rpart")) 33 | #' 34 | #' graph$train(task) 35 | #' } 36 | FilterPerformance = R6Class("FilterPerformance", 37 | inherit = FilterLearner, 38 | 39 | public = list( 40 | 41 | #' @field learner ([mlr3::Learner])\cr 42 | learner = NULL, 43 | #' @field resampling ([mlr3::Resampling])\cr 44 | resampling = NULL, 45 | #' @field measure ([mlr3::Measure])\cr 46 | measure = NULL, 47 | 48 | #' @description Create a FilterDISR object. 49 | #' @param learner ([mlr3::Learner])\cr 50 | #' [mlr3::Learner] to use for model fitting. 51 | #' @param resampling ([mlr3::Resampling])\cr 52 | #' [mlr3::Resampling] to be used within resampling. 53 | #' @param measure ([mlr3::Measure])\cr 54 | #' [mlr3::Measure] to be used for evaluating the performance. 55 | initialize = function(learner = mlr3::lrn("classif.featureless"), 56 | resampling = mlr3::rsmp("holdout"), measure = NULL) { 57 | 58 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE)) 59 | self$resampling = assert_resampling(as_resampling(resampling)) 60 | self$measure = assert_measure(as_measure(measure, 61 | task_type = learner$task_type), learner = learner) 62 | packages = unique(c(self$learner$packages, self$measure$packages)) 63 | 64 | super$initialize( 65 | id = "performance", 66 | task_types = learner$task_type, 67 | param_set = learner$param_set, 68 | feature_types = learner$feature_types, 69 | packages = packages, 70 | label = "Predictive Performance", 71 | man = "mlr3filters::mlr_filters_performance" 72 | ) 73 | } 74 | ), 75 | 76 | private = list( 77 | .calculate = function(task, nfeat) { 78 | task = task$clone() 79 | fn = task$feature_names 80 | 81 | perf = map_dbl(fn, function(x) { 82 | task$col_roles$feature = x 83 | resample(task, self$learner, self$resampling, clone = character())$ 84 | aggregate(measures = self$measure) 85 | }) 86 | 87 | if (self$measure$minimize) { 88 | perf = -perf 89 | } 90 | 91 | set_names(perf, fn) 92 | }, 93 | 94 | .get_properties = function() { 95 | intersect("missings", self$learner$properties) 96 | } 97 | ) 98 | ) 99 | 100 | #' @include mlr_filters.R 101 | mlr_filters$add("performance", FilterPerformance) 102 | -------------------------------------------------------------------------------- /R/FilterRelief.R: -------------------------------------------------------------------------------- 1 | #' @title RELIEF Filter 2 | #' 3 | #' @name mlr_filters_relief 4 | #' 5 | #' @description Information gain filter calling 6 | #' [FSelectorRcpp::relief()] in package \CRANpkg{FSelectorRcpp}. 7 | #' 8 | #' @note 9 | #' This filter can handle missing values in the features. 10 | #' However, the resulting filter scores may be misleading or at least difficult to compare 11 | #' if some features have a large proportion of missing values. 12 | #' 13 | #' If a feature has no non-missing observation, the resulting score will be (close to) 0. 14 | #' 15 | #' @family Filter 16 | #' @include Filter.R 17 | #' @template seealso_filter 18 | #' @export 19 | #' @examples 20 | #' if (requireNamespace("FSelectorRcpp")) { 21 | #' ## Relief (default) 22 | #' task = mlr3::tsk("iris") 23 | #' filter = flt("relief") 24 | #' filter$calculate(task) 25 | #' head(filter$scores, 3) 26 | #' as.data.table(filter) 27 | #' } 28 | #' 29 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) { 30 | #' library("mlr3pipelines") 31 | #' task = mlr3::tsk("iris") 32 | #' 33 | #' # Note: `filter.frac` is selected randomly and should be tuned. 34 | #' 35 | #' graph = po("filter", filter = flt("relief"), filter.frac = 0.5) %>>% 36 | #' po("learner", mlr3::lrn("classif.rpart")) 37 | #' 38 | #' graph$train(task) 39 | #' } 40 | FilterRelief = R6Class("FilterRelief", 41 | inherit = Filter, 42 | 43 | public = list( 44 | 45 | #' @description Create a FilterRelief object. 46 | initialize = function() { 47 | param_set = ps( 48 | neighboursCount = p_int(lower = 1L, default = 5L), 49 | sampleSize = p_int(lower = 1L, default = 10L) 50 | ) 51 | 52 | super$initialize( 53 | id = "relief", 54 | task_types = c("classif", "regr"), 55 | param_set = param_set, 56 | feature_types = c("integer", "numeric", "factor", "ordered"), 57 | packages = "FSelectorRcpp", 58 | label = "RELIEF", 59 | man = "mlr3filters::mlr_filters_relief" 60 | ) 61 | } 62 | ), 63 | 64 | private = list( 65 | .get_properties = function() { 66 | "missings" 67 | }, 68 | 69 | .calculate = function(task, nfeat) { 70 | pv = self$param_set$values 71 | 72 | x = setDF(task$data(cols = task$feature_names)) 73 | y = task$truth() 74 | scores = invoke(FSelectorRcpp::relief, x = x, y = y, .args = pv) 75 | set_names(scores$importance, scores$attributes) 76 | } 77 | ) 78 | ) 79 | 80 | #' @include mlr_filters.R 81 | mlr_filters$add("relief", FilterRelief) 82 | -------------------------------------------------------------------------------- /R/FilterSelectedFeatures.R: -------------------------------------------------------------------------------- 1 | #' @title Filter for Embedded Feature Selection 2 | #' 3 | #' @name mlr_filters_selected_features 4 | #' 5 | #' @description 6 | #' Filter using embedded feature selection of machine learning algorithms. 7 | #' Takes a [mlr3::Learner] which is capable of extracting the selected features 8 | #' (property "selected_features"), fits the model and extracts the selected 9 | #' features. 10 | #' 11 | #' Note that contrary to [mlr_filters_importance], there is no ordering in 12 | #' the selected features. Selected features get a score of 1, deselected 13 | #' features get a score of 0. The order of selected features is random and 14 | #' different from the order in the learner. In combination with 15 | #' \CRANpkg{mlr3pipelines}, only the filter criterion `cutoff` makes sense. 16 | #' 17 | #' @family Filter 18 | #' @include Filter.R 19 | #' @template seealso_filter 20 | #' @export 21 | #' @examples 22 | #' if (requireNamespace("rpart")) { 23 | #' task = mlr3::tsk("iris") 24 | #' learner = mlr3::lrn("classif.rpart") 25 | #' filter = flt("selected_features", learner = learner) 26 | #' filter$calculate(task) 27 | #' as.data.table(filter) 28 | #' } 29 | #' 30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners", "rpart"), quietly = TRUE)) { 31 | #' library("mlr3pipelines") 32 | #' library("mlr3learners") 33 | #' task = mlr3::tsk("sonar") 34 | #' 35 | #' filter = flt("selected_features", learner = lrn("classif.rpart")) 36 | #' 37 | #' # Note: All filter scores are either 0 or 1, i.e. setting `filter.cutoff = 0.5` means that 38 | #' # we select all "selected features". 39 | #' 40 | #' graph = po("filter", filter = filter, filter.cutoff = 0.5) %>>% 41 | #' po("learner", mlr3::lrn("classif.log_reg")) 42 | #' 43 | #' graph$train(task) 44 | #' } 45 | FilterSelectedFeatures = R6Class("FilterSelectedFeatures", 46 | inherit = FilterLearner, 47 | 48 | public = list( 49 | 50 | #' @field learner ([mlr3::Learner])\cr 51 | #' Learner to extract the importance values from. 52 | learner = NULL, 53 | 54 | #' @description Create a FilterImportance object. 55 | #' @param learner ([mlr3::Learner])\cr 56 | #' Learner to extract the selected features from. 57 | initialize = function(learner = mlr3::lrn("classif.featureless")) { 58 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE), 59 | properties = "selected_features") 60 | 61 | super$initialize( 62 | id = "selected_features", 63 | task_types = learner$task_type, 64 | feature_types = learner$feature_types, 65 | packages = learner$packages, 66 | param_set = learner$param_set, 67 | label = "Embedded Feature Selection", 68 | man = "mlr3filters::mlr_filters_selected_features" 69 | ) 70 | } 71 | ), 72 | 73 | private = list( 74 | .calculate = function(task, nfeat) { 75 | learner = self$learner$clone(deep = TRUE) 76 | learner = learner$train(task = task) 77 | score = named_vector(task$feature_names, init = 0) 78 | replace(score, names(score) %in% learner$selected_features(), 1) 79 | }, 80 | 81 | .get_properties = function() { 82 | intersect("missings", self$learner$properties) 83 | } 84 | ) 85 | ) 86 | 87 | #' @include mlr_filters.R 88 | mlr_filters$add("selected_features", FilterSelectedFeatures) 89 | -------------------------------------------------------------------------------- /R/FilterUnivariateCox.R: -------------------------------------------------------------------------------- 1 | #' @title Univariate Cox Survival Filter 2 | #' 3 | #' @name mlr_filters_univariate_cox 4 | #' 5 | #' @description Calculates scores for assessing the relationship between 6 | #' individual features and the time-to-event outcome (right-censored survival 7 | #' data) using a univariate Cox proportional hazards model. 8 | #' The goal is to determine which features have a statistically significant 9 | #' association with the event of interest, typically in the context of clinical 10 | #' or biomedical research. 11 | #' 12 | #' This filter fits a [Cox Proportional Hazards][survival::coxph()] model using 13 | #' each feature independently and extracts the \eqn{p}-value that quantifies the 14 | #' significance of the feature's impact on survival. The filter value is 15 | #' `-log10(p)` where `p` is the \eqn{p}-value. This transformation is necessary 16 | #' to ensure numerical stability for very small \eqn{p}-values. Also higher 17 | #' values denote more important features. The filter works only for numeric 18 | #' features so please ensure that factor variables are properly encoded, e.g. 19 | #' using [PipeOpEncode][mlr3pipelines::PipeOpEncode]. 20 | #' 21 | #' @family Filter 22 | #' @include Filter.R 23 | #' @template seealso_filter 24 | #' @export 25 | #' @examples 26 | #' 27 | #' filter = flt("univariate_cox") 28 | #' filter 29 | #' 30 | FilterUnivariateCox = R6Class("FilterUnivariateCox", 31 | inherit = Filter, 32 | public = list( 33 | #' @description Create a FilterUnivariateCox object. 34 | initialize = function() { 35 | super$initialize( 36 | id = "surv.univariate_cox", 37 | packages = "survival", 38 | param_set = ps(), 39 | feature_types = c("integer", "numeric", "logical"), 40 | task_types = "surv", 41 | label = "Univariate Cox Survival Score", 42 | man = "mlr3filters::mlr_filters_univariate_cox" 43 | ) 44 | } 45 | ), 46 | 47 | private = list( 48 | .calculate = function(task, nfeat) { 49 | features = task$feature_names 50 | targets = task$data(cols = task$target_names) 51 | 52 | scores = map_dbl(features, function(feature) { 53 | model = invoke( 54 | survival::coxph, 55 | formula = task$formula(rhs = feature), 56 | data = cbind(task$data(cols = feature), targets) 57 | ) 58 | pval = summary(model)$coefficients[, "Pr(>|z|)"] 59 | -log10(pval) # smaller p-values => larger scores 60 | }) 61 | 62 | set_names(scores, features) 63 | } 64 | ) 65 | ) 66 | 67 | #' @include mlr_filters.R 68 | mlr_filters$add("univariate_cox", FilterUnivariateCox) 69 | -------------------------------------------------------------------------------- /R/FilterVariance.R: -------------------------------------------------------------------------------- 1 | #' @title Variance Filter 2 | #' 3 | #' @name mlr_filters_variance 4 | #' 5 | #' @description Variance filter calling `stats::var()`. 6 | #' 7 | #' Argument `na.rm` defaults to `TRUE` here. 8 | #' 9 | #' @references 10 | #' For a benchmark of filter methods: 11 | #' 12 | #' `r format_bib("bommert_2020")` 13 | #' 14 | #' @family Filter 15 | #' @include Filter.R 16 | #' @importFrom stats var 17 | #' @template seealso_filter 18 | #' @export 19 | #' @examples 20 | #' task = mlr3::tsk("mtcars") 21 | #' filter = flt("variance") 22 | #' filter$calculate(task) 23 | #' head(filter$scores, 3) 24 | #' as.data.table(filter) 25 | #' 26 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 27 | #' library("mlr3pipelines") 28 | #' task = mlr3::tsk("spam") 29 | #' 30 | #' # Note: `filter.frac` is selected randomly and should be tuned. 31 | #' 32 | #' graph = po("filter", filter = flt("variance"), filter.frac = 0.5) %>>% 33 | #' po("learner", mlr3::lrn("classif.rpart")) 34 | #' 35 | #' graph$train(task) 36 | #' } 37 | FilterVariance = R6Class("FilterVariance", 38 | inherit = Filter, 39 | 40 | public = list( 41 | 42 | #' @description Create a FilterVariance object. 43 | initialize = function() { 44 | param_set = ps( 45 | na.rm = p_lgl(default = TRUE) 46 | ) 47 | param_set$values = list(na.rm = TRUE) 48 | 49 | super$initialize( 50 | id = "variance", 51 | task_types = NA_character_, 52 | param_set = param_set, 53 | packages = "stats", 54 | feature_types = c("integer", "numeric"), 55 | label = "Variance", 56 | man = "mlr3filters::mlr_filters_variance" 57 | ) 58 | } 59 | ), 60 | 61 | private = list( 62 | .calculate = function(task, nfeat) { 63 | na_rm = self$param_set$values$na.rm %??% TRUE 64 | map_dbl(task$data(cols = task$feature_names), var, na.rm = na_rm) 65 | }, 66 | 67 | .get_properties = function() { 68 | if (isTRUE(self$param_set$values$na.rm)) "missings" else character() 69 | } 70 | ) 71 | ) 72 | 73 | #' @include mlr_filters.R 74 | mlr_filters$add("variance", FilterVariance) 75 | -------------------------------------------------------------------------------- /R/bibentries.R: -------------------------------------------------------------------------------- 1 | #' @importFrom utils bibentry 2 | bibentries = c( 3 | bommert_2020 = bibentry("article", 4 | doi = "10.1016/j.csda.2019.106839", 5 | year = "2020", 6 | month = "3", 7 | publisher = "Elsevier {BV}", 8 | volume = "143", 9 | pages = "106839", 10 | author = "Andrea Bommert and Xudong Sun and Bernd Bischl and J\u00f6rg Rahnenf\u00fchrer and Michel Lang", 11 | title = "Benchmark for filter methods for feature selection in high-dimensional classification data", 12 | journal = "Computational Statistics & Data Analysis" 13 | ), 14 | 15 | kursa_2021 = bibentry("article", 16 | doi = "10.1016/j.softx.2021.100819", 17 | year = "2021", 18 | month = "12", 19 | publisher = "Elsevier {BV}", 20 | volume = "16", 21 | pages = "100819", 22 | author = "Miron B. Kursa", 23 | title = "Praznik: High performance information-based feature selection", 24 | journal = "{SoftwareX}" 25 | ), 26 | 27 | bommert_2021 = bibentry("article", 28 | doi = "10.1093/bib/bbab354", 29 | year = "2021", 30 | month = "9", 31 | publisher = "Oxford University Press ({OUP})", 32 | volume = "23", 33 | number = "1", 34 | author = "Andrea Bommert and Thomas Welchowski and Matthias Schmid and J\u00f6rg Rahnenf\u00fchrer", 35 | title = "Benchmark of filter methods for feature selection in high-dimensional gene expression survival data", 36 | journal = "Briefings in Bioinformatics" 37 | ), 38 | 39 | kursa_2010 = bibentry("article", 40 | title = "Feature Selection with the Boruta Package", 41 | volume = "36", 42 | number = "11", 43 | journal = "Journal of Statistical Software", 44 | author = "Miron B. Kursa and Witold R. Rudnicki", 45 | year = "2010", 46 | pages = "1-13") 47 | ) 48 | -------------------------------------------------------------------------------- /R/flt.R: -------------------------------------------------------------------------------- 1 | #' @title Syntactic Sugar for Filter Construction 2 | #' 3 | #' @description 4 | #' These functions complements [mlr_filters] with a function in the spirit of [mlr3::mlr_sugar]. 5 | #' 6 | #' @inheritParams mlr3::mlr_sugar 7 | #' @return [Filter]. 8 | #' @export 9 | #' @examples 10 | #' flt("correlation", method = "kendall") 11 | #' flts(c("mrmr", "jmim")) 12 | flt = function(.key, ...) { 13 | dictionary_sugar_get(mlr_filters, .key, ...) 14 | } 15 | 16 | #' @rdname flt 17 | #' @export 18 | flts = function(.keys, ...) { 19 | dictionary_sugar_mget(mlr_filters, .keys, ...) 20 | } 21 | -------------------------------------------------------------------------------- /R/helper.R: -------------------------------------------------------------------------------- 1 | call_praznik = function(self, task, fun, nfeat) { 2 | selection = invoke(fun, 3 | X = task$data(cols = task$feature_names), 4 | Y = task$truth(), 5 | k = nfeat, 6 | .args = self$param_set$get_values() 7 | )$selection 8 | 9 | set_names(seq(from = 1, to = 0, length.out = length(selection)), names(selection)) 10 | } 11 | 12 | catn = function(..., file = "") { 13 | cat(paste0(..., collapse = "\n"), "\n", sep = "", file = file) 14 | } 15 | 16 | as_numeric_matrix = function(x) { 17 | x = as.matrix(x) 18 | if (is.logical(x)) { 19 | storage.mode(x) = "double" 20 | } 21 | x 22 | } 23 | 24 | test_matching_task_type = function(task_type, object, class) { 25 | fget = function(tab, i, j, key) { 26 | x = tab[[key]] 27 | tab[[j]][x %chin% i] 28 | } 29 | 30 | if (is.null(task_type) || object$task_type == task_type) { 31 | return(TRUE) 32 | } 33 | 34 | cl_task_type = fget(mlr_reflections$task_types, task_type, class, "type") 35 | if (inherits(object, cl_task_type)) { 36 | return(TRUE) 37 | } 38 | 39 | cl_object = fget(mlr_reflections$task_types, object$task_type, class, "type") 40 | return(cl_task_type == cl_object) 41 | } 42 | -------------------------------------------------------------------------------- /R/mlr_filters.R: -------------------------------------------------------------------------------- 1 | #' @title Dictionary of Filters 2 | #' 3 | #' @format [R6::R6Class] object 4 | #' @description 5 | #' A simple [mlr3misc::Dictionary] storing objects of class [Filter]. 6 | #' Each Filter has an associated help page, see `mlr_filters_[id]`. 7 | #' 8 | #' This dictionary can get populated with additional filters by add-on packages. 9 | #' 10 | #' For a more convenient way to retrieve and construct filters, see [flt()]. 11 | #' @section Usage: 12 | #' 13 | #' See [mlr3misc::Dictionary]. 14 | #' 15 | #' @family Dictionary 16 | #' @family Filter 17 | #' @export 18 | #' @examples 19 | #' mlr_filters$keys() 20 | #' as.data.table(mlr_filters) 21 | #' mlr_filters$get("mim") 22 | #' flt("anova") 23 | mlr_filters = DictionaryFilter = R6Class("DictionaryFilter", 24 | inherit = mlr3misc::Dictionary, 25 | cloneable = FALSE, 26 | )$new() 27 | 28 | 29 | #' @export 30 | as.data.table.DictionaryFilter = function(x, ..., objects = FALSE) { 31 | assert_flag(objects) 32 | 33 | setkeyv(map_dtr(x$keys(), function(key) { 34 | f = x$get(key) 35 | insert_named( 36 | list(key = key, label = f$label, task_types = list(f$task_types), 37 | task_properties = list(f$task_properties), params = list(f$param_set$ids()), 38 | feature_types = list(f$feature_types), packages = list(f$packages)), 39 | if (objects) list(object = list(f)) 40 | ) 41 | }), "key")[] 42 | } 43 | -------------------------------------------------------------------------------- /R/reexports.R: -------------------------------------------------------------------------------- 1 | #' @importFrom data.table as.data.table 2 | #' @export 3 | data.table::as.data.table 4 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import data.table 2 | #' @import checkmate 3 | #' @import paradox 4 | #' @import mlr3misc 5 | #' @import mlr3 6 | #' @importFrom R6 R6Class 7 | #' @importFrom utils head 8 | "_PACKAGE" 9 | 10 | .onLoad = function(libname, pkgname) { 11 | # nolint 12 | # nocov start 13 | backports::import(pkgname) 14 | } # nocov end 15 | 16 | leanify_package() 17 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | # mlr3filters 6 | 7 | Package website: [release](https://mlr3filters.mlr-org.com/) | [dev](https://mlr3filters.mlr-org.com/dev/) 8 | 9 | {mlr3filters} adds feature selection filters to [mlr3](https://mlr3.mlr-org.com). 10 | The implemented filters can be used stand-alone, or as part of a machine learning pipeline in combination with 11 | [mlr3pipelines](https://mlr3pipelines.mlr-org.com) and the [filter operator](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_filter.html). 12 | 13 | Wrapper methods for feature selection are implemented in [mlr3fselect](https://mlr3fselect.mlr-org.com). 14 | Learners which support the extraction feature importance scores can be combined with a filter from this package for embedded feature selection. 15 | 16 | 17 | [![r-cmd-check](https://github.com/mlr-org/mlr3filters/actions/workflows/r-cmd-check.yml/badge.svg)](https://github.com/mlr-org/mlr3filters/actions/workflows/r-cmd-check.yml) 18 | [![CRAN Status](https://www.r-pkg.org/badges/version-ago/mlr3filters)](https://cran.r-project.org/package=mlr3filters) 19 | [![StackOverflow](https://img.shields.io/badge/stackoverflow-mlr3-orange.svg)](https://stackoverflow.com/questions/tagged/mlr3) 20 | [![Mattermost](https://img.shields.io/badge/chat-mattermost-orange.svg)](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/) 21 | 22 | 23 | ## Installation 24 | 25 | CRAN version 26 | 27 | ```{r eval = FALSE} 28 | install.packages("mlr3filters") 29 | ``` 30 | 31 | Development version 32 | 33 | ```{r, eval = FALSE} 34 | remotes::install_github("mlr-org/mlr3filters") 35 | ``` 36 | 37 | ## Filters 38 | 39 | ### Filter Example 40 | 41 | ```{r} 42 | set.seed(1) 43 | library("mlr3") 44 | library("mlr3filters") 45 | 46 | task = tsk("sonar") 47 | filter = flt("auc") 48 | head(as.data.table(filter$calculate(task))) 49 | ``` 50 | 51 | ### Implemented Filters 52 | 53 | ```{r echo = FALSE, message=FALSE} 54 | library("mlr3misc") 55 | library("mlr3filters") 56 | library("data.table") 57 | 58 | link_cran = function(pkg) { 59 | mlr3misc::map(pkg, function(.x) { 60 | mlr3misc::map_chr(.x, function(.y) { 61 | if (unlist(.y) %in% getOption("defaultPackages")) { 62 | .y 63 | } else { 64 | sprintf("[%1$s](https://cran.r-project.org/package=%1$s)", .y) 65 | } 66 | }) 67 | }) 68 | } 69 | 70 | tab = as.data.table(mlr_filters)[, !c("params", "task_properties")] 71 | tab[, task_types := sapply(task_types, function(x) if (is_scalar_na(x)) "Universal" else paste(capitalize(x), collapse = " & "))] 72 | tab[, feature_types := sapply(feature_types, function(x) paste(capitalize(x), collapse = ", "))] 73 | tab[, packages := sapply(packages, function(x) paste(link_cran(x), collapse = ", "))] 74 | 75 | # manually change the task type for specific filters 76 | learner_based = c("performance", "permutation", "importance", "selected_features") 77 | tab[key %in% learner_based, task_types := "Universal"] 78 | tab[key %in% learner_based, packages := ""] 79 | 80 | 81 | setnames(tab, 82 | old = c("key", "task_types", "feature_types", "packages"), 83 | new = c("Name", "Task Types", "Feature Types", "Package") 84 | ) 85 | 86 | knitr::kable(tab, format = "markdown") 87 | ``` 88 | 89 | ### Variable Importance Filters 90 | 91 | The following learners allow the extraction of variable importance and therefore are supported by `FilterImportance`: 92 | 93 | ```{r echo=FALSE, warning=FALSE} 94 | library("mlr3learners") 95 | tab = as.data.table(mlr_learners) 96 | tab[sapply(properties, is.element, el = "importance"), key] 97 | ``` 98 | 99 | If your learner is not listed here but capable of extracting variable importance from the fitted model, the reason is most likely that it is not yet integrated in the package [mlr3learners](https://github.com/mlr-org/mlr3learners) or the [extra learner extension](https://github.com/mlr-org/mlr3extralearners). 100 | Please open an issue so we can add your package. 101 | 102 | Some learners need to have their variable importance measure "activated" during learner creation. 103 | For example, to use the "impurity" measure of Random Forest via the {ranger} package: 104 | 105 | ```{r} 106 | task = tsk("iris") 107 | lrn = lrn("classif.ranger", seed = 42) 108 | lrn$param_set$values = list(importance = "impurity") 109 | 110 | filter = flt("importance", learner = lrn) 111 | filter$calculate(task) 112 | head(as.data.table(filter), 3) 113 | ``` 114 | 115 | ### Performance Filter 116 | 117 | `FilterPerformance` is a univariate filter method which calls `resample()` with every predictor variable in the dataset and ranks the final outcome using the supplied measure. 118 | Any learner can be passed to this filter with `classif.rpart` being the default. 119 | Of course, also regression learners can be passed if the task is of type "regr". 120 | 121 | 122 | ### Filter-based Feature Selection 123 | 124 | In many cases filtering is only one step in the modeling pipeline. 125 | To select features based on filter values, one can use [`PipeOpFilter`](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_filter.html) from [mlr3pipelines](https://github.com/mlr-org/mlr3pipelines). 126 | 127 | ```{r, results='hide'} 128 | library(mlr3pipelines) 129 | task = tsk("spam") 130 | 131 | # the `filter.frac` should be tuned 132 | graph = po("filter", filter = flt("auc"), filter.frac = 0.5) %>>% 133 | po("learner", lrn("classif.rpart")) 134 | 135 | learner = as_learner(graph) 136 | rr = resample(task, learner, rsmp("holdout")) 137 | ``` 138 | -------------------------------------------------------------------------------- /man-roxygen/details_praznik.R: -------------------------------------------------------------------------------- 1 | #' @details 2 | #' As the scores calculated by the \CRANpkg{praznik} package are not monotone due 3 | #' to the greedy forward fashion, the returned scores simply reflect the selection order: 4 | #' `1`, `(k-1)/k`, ..., `1/k` where `k` is the number of selected features. 5 | #' 6 | #' Threading is disabled by default (hyperparameter `threads` is set to 1). 7 | #' Set to a number `>= 2` to enable threading, or to `0` for auto-detecting the number 8 | #' of available cores. 9 | -------------------------------------------------------------------------------- /man-roxygen/seealso_filter.R: -------------------------------------------------------------------------------- 1 | #' @seealso 2 | #' * [PipeOpFilter][mlr3pipelines::PipeOpFilter] for filter-based feature selection. 3 | #' * [Dictionary][mlr3misc::Dictionary] of [Filters][Filter]: [mlr_filters] 4 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/logo_navbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/man/figures/logo_navbar.png -------------------------------------------------------------------------------- /man/flt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flt.R 3 | \name{flt} 4 | \alias{flt} 5 | \alias{flts} 6 | \title{Syntactic Sugar for Filter Construction} 7 | \usage{ 8 | flt(.key, ...) 9 | 10 | flts(.keys, ...) 11 | } 12 | \arguments{ 13 | \item{.key}{(\code{character(1)})\cr 14 | Key passed to the respective \link[mlr3misc:Dictionary]{dictionary} to retrieve the object.} 15 | 16 | \item{...}{(any)\cr 17 | Additional arguments.} 18 | 19 | \item{.keys}{(\code{character()})\cr 20 | Keys passed to the respective \link[mlr3misc:Dictionary]{dictionary} to retrieve multiple objects.} 21 | } 22 | \value{ 23 | \link{Filter}. 24 | } 25 | \description{ 26 | These functions complements \link{mlr_filters} with a function in the spirit of \link[mlr3:mlr_sugar]{mlr3::mlr_sugar}. 27 | } 28 | \examples{ 29 | flt("correlation", method = "kendall") 30 | flts(c("mrmr", "jmim")) 31 | } 32 | -------------------------------------------------------------------------------- /man/mlr3filters-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \docType{package} 4 | \name{mlr3filters-package} 5 | \alias{mlr3filters} 6 | \alias{mlr3filters-package} 7 | \title{mlr3filters: Filter Based Feature Selection for 'mlr3'} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Extends 'mlr3' with filter methods for feature selection. Besides standalone filter methods built-in methods of any machine-learning algorithm are supported. Partial scoring of multivariate filter methods is supported. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://mlr3filters.mlr-org.com} 17 | \item \url{https://github.com/mlr-org/mlr3filters} 18 | \item Report bugs at \url{https://github.com/mlr-org/mlr3filters/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID}) 24 | 25 | Authors: 26 | \itemize{ 27 | \item Patrick Schratz \email{patrick.schratz@gmail.com} (\href{https://orcid.org/0000-0003-0748-6624}{ORCID}) 28 | \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID}) 29 | \item Bernd Bischl \email{bernd_bischl@gmx.net} (\href{https://orcid.org/0000-0001-6002-6980}{ORCID}) 30 | \item Martin Binder \email{mlr.developer@mb706.com} 31 | \item John Zobolas \email{bblodfon@gmail.com} (\href{https://orcid.org/0000-0002-3609-8674}{ORCID}) 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/mlr_filters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mlr_filters.R 3 | \docType{data} 4 | \name{mlr_filters} 5 | \alias{mlr_filters} 6 | \title{Dictionary of Filters} 7 | \format{ 8 | \link[R6:R6Class]{R6::R6Class} object 9 | } 10 | \usage{ 11 | mlr_filters 12 | } 13 | \description{ 14 | A simple \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} storing objects of class \link{Filter}. 15 | Each Filter has an associated help page, see \code{mlr_filters_[id]}. 16 | 17 | This dictionary can get populated with additional filters by add-on packages. 18 | 19 | For a more convenient way to retrieve and construct filters, see \code{\link[=flt]{flt()}}. 20 | } 21 | \section{Usage}{ 22 | 23 | 24 | See \link[mlr3misc:Dictionary]{mlr3misc::Dictionary}. 25 | } 26 | 27 | \examples{ 28 | mlr_filters$keys() 29 | as.data.table(mlr_filters) 30 | mlr_filters$get("mim") 31 | flt("anova") 32 | } 33 | \seealso{ 34 | Other Filter: 35 | \code{\link{Filter}}, 36 | \code{\link{mlr_filters_anova}}, 37 | \code{\link{mlr_filters_auc}}, 38 | \code{\link{mlr_filters_boruta}}, 39 | \code{\link{mlr_filters_carscore}}, 40 | \code{\link{mlr_filters_carsurvscore}}, 41 | \code{\link{mlr_filters_cmim}}, 42 | \code{\link{mlr_filters_correlation}}, 43 | \code{\link{mlr_filters_disr}}, 44 | \code{\link{mlr_filters_find_correlation}}, 45 | \code{\link{mlr_filters_importance}}, 46 | \code{\link{mlr_filters_information_gain}}, 47 | \code{\link{mlr_filters_jmi}}, 48 | \code{\link{mlr_filters_jmim}}, 49 | \code{\link{mlr_filters_kruskal_test}}, 50 | \code{\link{mlr_filters_mim}}, 51 | \code{\link{mlr_filters_mrmr}}, 52 | \code{\link{mlr_filters_njmim}}, 53 | \code{\link{mlr_filters_performance}}, 54 | \code{\link{mlr_filters_permutation}}, 55 | \code{\link{mlr_filters_relief}}, 56 | \code{\link{mlr_filters_selected_features}}, 57 | \code{\link{mlr_filters_univariate_cox}}, 58 | \code{\link{mlr_filters_variance}} 59 | } 60 | \concept{Dictionary} 61 | \concept{Filter} 62 | \keyword{datasets} 63 | -------------------------------------------------------------------------------- /man/mlr_filters_anova.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterAnova.R 3 | \name{mlr_filters_anova} 4 | \alias{mlr_filters_anova} 5 | \alias{FilterAnova} 6 | \title{ANOVA F-Test Filter} 7 | \description{ 8 | ANOVA F-Test filter calling \code{\link[stats:aov]{stats::aov()}}. Note that this is 9 | equivalent to a \eqn{t}-test for binary classification. 10 | 11 | The filter value is \code{-log10(p)} where \code{p} is the \eqn{p}-value. This 12 | transformation is necessary to ensure numerical stability for very small 13 | \eqn{p}-values. 14 | } 15 | \examples{ 16 | task = mlr3::tsk("iris") 17 | filter = flt("anova") 18 | filter$calculate(task) 19 | head(as.data.table(filter), 3) 20 | 21 | # transform to p-value 22 | 10^(-filter$scores) 23 | 24 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 25 | library("mlr3pipelines") 26 | task = mlr3::tsk("spam") 27 | 28 | # Note: `filter.frac` is selected randomly and should be tuned. 29 | 30 | graph = po("filter", filter = flt("anova"), filter.frac = 0.5) \%>>\% 31 | po("learner", mlr3::lrn("classif.rpart")) 32 | 33 | graph$train(task) 34 | } 35 | } 36 | \references{ 37 | For a benchmark of filter methods: 38 | 39 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 40 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 41 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 42 | \doi{10.1016/j.csda.2019.106839}. 43 | } 44 | \seealso{ 45 | \itemize{ 46 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 47 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 48 | } 49 | 50 | Other Filter: 51 | \code{\link{Filter}}, 52 | \code{\link{mlr_filters}}, 53 | \code{\link{mlr_filters_auc}}, 54 | \code{\link{mlr_filters_boruta}}, 55 | \code{\link{mlr_filters_carscore}}, 56 | \code{\link{mlr_filters_carsurvscore}}, 57 | \code{\link{mlr_filters_cmim}}, 58 | \code{\link{mlr_filters_correlation}}, 59 | \code{\link{mlr_filters_disr}}, 60 | \code{\link{mlr_filters_find_correlation}}, 61 | \code{\link{mlr_filters_importance}}, 62 | \code{\link{mlr_filters_information_gain}}, 63 | \code{\link{mlr_filters_jmi}}, 64 | \code{\link{mlr_filters_jmim}}, 65 | \code{\link{mlr_filters_kruskal_test}}, 66 | \code{\link{mlr_filters_mim}}, 67 | \code{\link{mlr_filters_mrmr}}, 68 | \code{\link{mlr_filters_njmim}}, 69 | \code{\link{mlr_filters_performance}}, 70 | \code{\link{mlr_filters_permutation}}, 71 | \code{\link{mlr_filters_relief}}, 72 | \code{\link{mlr_filters_selected_features}}, 73 | \code{\link{mlr_filters_univariate_cox}}, 74 | \code{\link{mlr_filters_variance}} 75 | } 76 | \concept{Filter} 77 | \section{Super class}{ 78 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterAnova} 79 | } 80 | \section{Methods}{ 81 | \subsection{Public methods}{ 82 | \itemize{ 83 | \item \href{#method-FilterAnova-new}{\code{FilterAnova$new()}} 84 | \item \href{#method-FilterAnova-clone}{\code{FilterAnova$clone()}} 85 | } 86 | } 87 | \if{html}{\out{ 88 |
Inherited methods 89 | 95 |
96 | }} 97 | \if{html}{\out{
}} 98 | \if{html}{\out{}} 99 | \if{latex}{\out{\hypertarget{method-FilterAnova-new}{}}} 100 | \subsection{Method \code{new()}}{ 101 | Create a FilterAnova object. 102 | \subsection{Usage}{ 103 | \if{html}{\out{
}}\preformatted{FilterAnova$new()}\if{html}{\out{
}} 104 | } 105 | 106 | } 107 | \if{html}{\out{
}} 108 | \if{html}{\out{}} 109 | \if{latex}{\out{\hypertarget{method-FilterAnova-clone}{}}} 110 | \subsection{Method \code{clone()}}{ 111 | The objects of this class are cloneable with this method. 112 | \subsection{Usage}{ 113 | \if{html}{\out{
}}\preformatted{FilterAnova$clone(deep = FALSE)}\if{html}{\out{
}} 114 | } 115 | 116 | \subsection{Arguments}{ 117 | \if{html}{\out{
}} 118 | \describe{ 119 | \item{\code{deep}}{Whether to make a deep clone.} 120 | } 121 | \if{html}{\out{
}} 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /man/mlr_filters_auc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterAUC.R 3 | \name{mlr_filters_auc} 4 | \alias{mlr_filters_auc} 5 | \alias{FilterAUC} 6 | \title{AUC Filter} 7 | \description{ 8 | Area under the (ROC) Curve filter, analogously to \code{\link[mlr3measures:auc]{mlr3measures::auc()}} from 9 | \CRANpkg{mlr3measures}. Missing values of the features are removed before 10 | calculating the AUC. If the AUC is undefined for the input, it is set to 0.5 11 | (random classifier). The absolute value of the difference between the AUC and 12 | 0.5 is used as final filter value. 13 | } 14 | \examples{ 15 | task = mlr3::tsk("sonar") 16 | filter = flt("auc") 17 | filter$calculate(task) 18 | head(as.data.table(filter), 3) 19 | 20 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 21 | library("mlr3pipelines") 22 | task = mlr3::tsk("spam") 23 | 24 | # Note: `filter.frac` is selected randomly and should be tuned. 25 | 26 | graph = po("filter", filter = flt("auc"), filter.frac = 0.5) \%>>\% 27 | po("learner", mlr3::lrn("classif.rpart")) 28 | 29 | graph$train(task) 30 | } 31 | } 32 | \references{ 33 | For a benchmark of filter methods: 34 | 35 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 36 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 37 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 38 | \doi{10.1016/j.csda.2019.106839}. 39 | } 40 | \seealso{ 41 | \itemize{ 42 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 43 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 44 | } 45 | 46 | Other Filter: 47 | \code{\link{Filter}}, 48 | \code{\link{mlr_filters}}, 49 | \code{\link{mlr_filters_anova}}, 50 | \code{\link{mlr_filters_boruta}}, 51 | \code{\link{mlr_filters_carscore}}, 52 | \code{\link{mlr_filters_carsurvscore}}, 53 | \code{\link{mlr_filters_cmim}}, 54 | \code{\link{mlr_filters_correlation}}, 55 | \code{\link{mlr_filters_disr}}, 56 | \code{\link{mlr_filters_find_correlation}}, 57 | \code{\link{mlr_filters_importance}}, 58 | \code{\link{mlr_filters_information_gain}}, 59 | \code{\link{mlr_filters_jmi}}, 60 | \code{\link{mlr_filters_jmim}}, 61 | \code{\link{mlr_filters_kruskal_test}}, 62 | \code{\link{mlr_filters_mim}}, 63 | \code{\link{mlr_filters_mrmr}}, 64 | \code{\link{mlr_filters_njmim}}, 65 | \code{\link{mlr_filters_performance}}, 66 | \code{\link{mlr_filters_permutation}}, 67 | \code{\link{mlr_filters_relief}}, 68 | \code{\link{mlr_filters_selected_features}}, 69 | \code{\link{mlr_filters_univariate_cox}}, 70 | \code{\link{mlr_filters_variance}} 71 | } 72 | \concept{Filter} 73 | \section{Super class}{ 74 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterAUC} 75 | } 76 | \section{Methods}{ 77 | \subsection{Public methods}{ 78 | \itemize{ 79 | \item \href{#method-FilterAUC-new}{\code{FilterAUC$new()}} 80 | \item \href{#method-FilterAUC-clone}{\code{FilterAUC$clone()}} 81 | } 82 | } 83 | \if{html}{\out{ 84 |
Inherited methods 85 | 91 |
92 | }} 93 | \if{html}{\out{
}} 94 | \if{html}{\out{}} 95 | \if{latex}{\out{\hypertarget{method-FilterAUC-new}{}}} 96 | \subsection{Method \code{new()}}{ 97 | Create a FilterAUC object. 98 | \subsection{Usage}{ 99 | \if{html}{\out{
}}\preformatted{FilterAUC$new()}\if{html}{\out{
}} 100 | } 101 | 102 | } 103 | \if{html}{\out{
}} 104 | \if{html}{\out{}} 105 | \if{latex}{\out{\hypertarget{method-FilterAUC-clone}{}}} 106 | \subsection{Method \code{clone()}}{ 107 | The objects of this class are cloneable with this method. 108 | \subsection{Usage}{ 109 | \if{html}{\out{
}}\preformatted{FilterAUC$clone(deep = FALSE)}\if{html}{\out{
}} 110 | } 111 | 112 | \subsection{Arguments}{ 113 | \if{html}{\out{
}} 114 | \describe{ 115 | \item{\code{deep}}{Whether to make a deep clone.} 116 | } 117 | \if{html}{\out{
}} 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /man/mlr_filters_boruta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterBoruta.R 3 | \name{mlr_filters_boruta} 4 | \alias{mlr_filters_boruta} 5 | \alias{FilterBoruta} 6 | \title{Burota Filter} 7 | \description{ 8 | Filter using the Boruta algorithm for feature selection. 9 | If \code{keep = "tentative"}, confirmed and tentative features are returned. 10 | Note that there is no ordering in the selected features. 11 | Selected features get a score of 1, deselected features get a score of 0. 12 | The order of selected features is random. 13 | In combination with \CRANpkg{mlr3pipelines}, only the filter criterion \code{cutoff} makes sense. 14 | } 15 | \section{Initial parameter values}{ 16 | 17 | \itemize{ 18 | \item \code{num.threads}: 19 | \itemize{ 20 | \item Actual default: \code{NULL}, triggering auto-detection of the number of CPUs. 21 | \item Adjusted value: 1. 22 | \item Reason for change: Conflicting with parallelization via \CRANpkg{future}. 23 | } 24 | } 25 | } 26 | 27 | \examples{ 28 | \donttest{ 29 | if (requireNamespace("Boruta")) { 30 | task = mlr3::tsk("sonar") 31 | filter = flt("boruta") 32 | filter$calculate(task) 33 | as.data.table(filter) 34 | } 35 | } 36 | } 37 | \references{ 38 | Kursa MB, Rudnicki WR (2010). 39 | \dQuote{Feature Selection with the Boruta Package.} 40 | \emph{Journal of Statistical Software}, \bold{36}(11), 1-13. 41 | } 42 | \seealso{ 43 | \itemize{ 44 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 45 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 46 | } 47 | 48 | Other Filter: 49 | \code{\link{Filter}}, 50 | \code{\link{mlr_filters}}, 51 | \code{\link{mlr_filters_anova}}, 52 | \code{\link{mlr_filters_auc}}, 53 | \code{\link{mlr_filters_carscore}}, 54 | \code{\link{mlr_filters_carsurvscore}}, 55 | \code{\link{mlr_filters_cmim}}, 56 | \code{\link{mlr_filters_correlation}}, 57 | \code{\link{mlr_filters_disr}}, 58 | \code{\link{mlr_filters_find_correlation}}, 59 | \code{\link{mlr_filters_importance}}, 60 | \code{\link{mlr_filters_information_gain}}, 61 | \code{\link{mlr_filters_jmi}}, 62 | \code{\link{mlr_filters_jmim}}, 63 | \code{\link{mlr_filters_kruskal_test}}, 64 | \code{\link{mlr_filters_mim}}, 65 | \code{\link{mlr_filters_mrmr}}, 66 | \code{\link{mlr_filters_njmim}}, 67 | \code{\link{mlr_filters_performance}}, 68 | \code{\link{mlr_filters_permutation}}, 69 | \code{\link{mlr_filters_relief}}, 70 | \code{\link{mlr_filters_selected_features}}, 71 | \code{\link{mlr_filters_univariate_cox}}, 72 | \code{\link{mlr_filters_variance}} 73 | } 74 | \concept{Filter} 75 | \section{Super class}{ 76 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterBoruta} 77 | } 78 | \section{Methods}{ 79 | \subsection{Public methods}{ 80 | \itemize{ 81 | \item \href{#method-FilterBoruta-new}{\code{FilterBoruta$new()}} 82 | \item \href{#method-FilterBoruta-clone}{\code{FilterBoruta$clone()}} 83 | } 84 | } 85 | \if{html}{\out{ 86 |
Inherited methods 87 | 93 |
94 | }} 95 | \if{html}{\out{
}} 96 | \if{html}{\out{}} 97 | \if{latex}{\out{\hypertarget{method-FilterBoruta-new}{}}} 98 | \subsection{Method \code{new()}}{ 99 | Creates a new instance of this \link[R6:R6Class]{R6} class. 100 | \subsection{Usage}{ 101 | \if{html}{\out{
}}\preformatted{FilterBoruta$new()}\if{html}{\out{
}} 102 | } 103 | 104 | } 105 | \if{html}{\out{
}} 106 | \if{html}{\out{}} 107 | \if{latex}{\out{\hypertarget{method-FilterBoruta-clone}{}}} 108 | \subsection{Method \code{clone()}}{ 109 | The objects of this class are cloneable with this method. 110 | \subsection{Usage}{ 111 | \if{html}{\out{
}}\preformatted{FilterBoruta$clone(deep = FALSE)}\if{html}{\out{
}} 112 | } 113 | 114 | \subsection{Arguments}{ 115 | \if{html}{\out{
}} 116 | \describe{ 117 | \item{\code{deep}}{Whether to make a deep clone.} 118 | } 119 | \if{html}{\out{
}} 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /man/mlr_filters_carscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterCarScore.R 3 | \name{mlr_filters_carscore} 4 | \alias{mlr_filters_carscore} 5 | \alias{FilterCarScore} 6 | \title{Correlation-Adjusted Marignal Correlation Score Filter} 7 | \description{ 8 | Calculates the Correlation-Adjusted (marginal) coRrelation scores 9 | (short CAR scores) implemented in \code{\link[care:carscore]{care::carscore()}} in package 10 | \CRANpkg{care}. The CAR scores for a set of features are defined as the 11 | correlations between the target and the decorrelated features. The filter 12 | returns the absolute value of the calculated scores. 13 | 14 | Argument \code{verbose} defaults to \code{FALSE}. 15 | } 16 | \examples{ 17 | if (requireNamespace("care")) { 18 | task = mlr3::tsk("mtcars") 19 | filter = flt("carscore") 20 | filter$calculate(task) 21 | head(as.data.table(filter), 3) 22 | 23 | ## changing the filter settings 24 | filter = flt("carscore") 25 | filter$param_set$values = list("diagonal" = TRUE) 26 | filter$calculate(task) 27 | head(as.data.table(filter), 3) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "care", "rpart"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("mtcars") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("carscore"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("regr.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \seealso{ 43 | \itemize{ 44 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 45 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 46 | } 47 | 48 | Other Filter: 49 | \code{\link{Filter}}, 50 | \code{\link{mlr_filters}}, 51 | \code{\link{mlr_filters_anova}}, 52 | \code{\link{mlr_filters_auc}}, 53 | \code{\link{mlr_filters_boruta}}, 54 | \code{\link{mlr_filters_carsurvscore}}, 55 | \code{\link{mlr_filters_cmim}}, 56 | \code{\link{mlr_filters_correlation}}, 57 | \code{\link{mlr_filters_disr}}, 58 | \code{\link{mlr_filters_find_correlation}}, 59 | \code{\link{mlr_filters_importance}}, 60 | \code{\link{mlr_filters_information_gain}}, 61 | \code{\link{mlr_filters_jmi}}, 62 | \code{\link{mlr_filters_jmim}}, 63 | \code{\link{mlr_filters_kruskal_test}}, 64 | \code{\link{mlr_filters_mim}}, 65 | \code{\link{mlr_filters_mrmr}}, 66 | \code{\link{mlr_filters_njmim}}, 67 | \code{\link{mlr_filters_performance}}, 68 | \code{\link{mlr_filters_permutation}}, 69 | \code{\link{mlr_filters_relief}}, 70 | \code{\link{mlr_filters_selected_features}}, 71 | \code{\link{mlr_filters_univariate_cox}}, 72 | \code{\link{mlr_filters_variance}} 73 | } 74 | \concept{Filter} 75 | \section{Super class}{ 76 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterCarScore} 77 | } 78 | \section{Methods}{ 79 | \subsection{Public methods}{ 80 | \itemize{ 81 | \item \href{#method-FilterCarScore-new}{\code{FilterCarScore$new()}} 82 | \item \href{#method-FilterCarScore-clone}{\code{FilterCarScore$clone()}} 83 | } 84 | } 85 | \if{html}{\out{ 86 |
Inherited methods 87 | 93 |
94 | }} 95 | \if{html}{\out{
}} 96 | \if{html}{\out{}} 97 | \if{latex}{\out{\hypertarget{method-FilterCarScore-new}{}}} 98 | \subsection{Method \code{new()}}{ 99 | Create a FilterCarScore object. 100 | \subsection{Usage}{ 101 | \if{html}{\out{
}}\preformatted{FilterCarScore$new()}\if{html}{\out{
}} 102 | } 103 | 104 | } 105 | \if{html}{\out{
}} 106 | \if{html}{\out{}} 107 | \if{latex}{\out{\hypertarget{method-FilterCarScore-clone}{}}} 108 | \subsection{Method \code{clone()}}{ 109 | The objects of this class are cloneable with this method. 110 | \subsection{Usage}{ 111 | \if{html}{\out{
}}\preformatted{FilterCarScore$clone(deep = FALSE)}\if{html}{\out{
}} 112 | } 113 | 114 | \subsection{Arguments}{ 115 | \if{html}{\out{
}} 116 | \describe{ 117 | \item{\code{deep}}{Whether to make a deep clone.} 118 | } 119 | \if{html}{\out{
}} 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /man/mlr_filters_carsurvscore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterCarSurvScore.R 3 | \name{mlr_filters_carsurvscore} 4 | \alias{mlr_filters_carsurvscore} 5 | \alias{FilterCarSurvScore} 6 | \title{Correlation-Adjusted Survival Score Filter} 7 | \description{ 8 | Calculates CARS scores for right-censored survival tasks. 9 | Calls the implementation in \code{\link[carSurv:carSurvScore]{carSurv::carSurvScore()}} in package 10 | \CRANpkg{carSurv}. 11 | } 12 | \references{ 13 | Bommert A, Welchowski T, Schmid M, Rahnenführer J (2021). 14 | \dQuote{Benchmark of filter methods for feature selection in high-dimensional gene expression survival data.} 15 | \emph{Briefings in Bioinformatics}, \bold{23}(1). 16 | \doi{10.1093/bib/bbab354}. 17 | } 18 | \seealso{ 19 | \itemize{ 20 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 21 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 22 | } 23 | 24 | Other Filter: 25 | \code{\link{Filter}}, 26 | \code{\link{mlr_filters}}, 27 | \code{\link{mlr_filters_anova}}, 28 | \code{\link{mlr_filters_auc}}, 29 | \code{\link{mlr_filters_boruta}}, 30 | \code{\link{mlr_filters_carscore}}, 31 | \code{\link{mlr_filters_cmim}}, 32 | \code{\link{mlr_filters_correlation}}, 33 | \code{\link{mlr_filters_disr}}, 34 | \code{\link{mlr_filters_find_correlation}}, 35 | \code{\link{mlr_filters_importance}}, 36 | \code{\link{mlr_filters_information_gain}}, 37 | \code{\link{mlr_filters_jmi}}, 38 | \code{\link{mlr_filters_jmim}}, 39 | \code{\link{mlr_filters_kruskal_test}}, 40 | \code{\link{mlr_filters_mim}}, 41 | \code{\link{mlr_filters_mrmr}}, 42 | \code{\link{mlr_filters_njmim}}, 43 | \code{\link{mlr_filters_performance}}, 44 | \code{\link{mlr_filters_permutation}}, 45 | \code{\link{mlr_filters_relief}}, 46 | \code{\link{mlr_filters_selected_features}}, 47 | \code{\link{mlr_filters_univariate_cox}}, 48 | \code{\link{mlr_filters_variance}} 49 | } 50 | \concept{Filter} 51 | \section{Super class}{ 52 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterCarSurvScore} 53 | } 54 | \section{Methods}{ 55 | \subsection{Public methods}{ 56 | \itemize{ 57 | \item \href{#method-FilterCarSurvScore-new}{\code{FilterCarSurvScore$new()}} 58 | \item \href{#method-FilterCarSurvScore-clone}{\code{FilterCarSurvScore$clone()}} 59 | } 60 | } 61 | \if{html}{\out{ 62 |
Inherited methods 63 | 69 |
70 | }} 71 | \if{html}{\out{
}} 72 | \if{html}{\out{}} 73 | \if{latex}{\out{\hypertarget{method-FilterCarSurvScore-new}{}}} 74 | \subsection{Method \code{new()}}{ 75 | Create a FilterCarSurvScore object. 76 | \subsection{Usage}{ 77 | \if{html}{\out{
}}\preformatted{FilterCarSurvScore$new()}\if{html}{\out{
}} 78 | } 79 | 80 | } 81 | \if{html}{\out{
}} 82 | \if{html}{\out{}} 83 | \if{latex}{\out{\hypertarget{method-FilterCarSurvScore-clone}{}}} 84 | \subsection{Method \code{clone()}}{ 85 | The objects of this class are cloneable with this method. 86 | \subsection{Usage}{ 87 | \if{html}{\out{
}}\preformatted{FilterCarSurvScore$clone(deep = FALSE)}\if{html}{\out{
}} 88 | } 89 | 90 | \subsection{Arguments}{ 91 | \if{html}{\out{
}} 92 | \describe{ 93 | \item{\code{deep}}{Whether to make a deep clone.} 94 | } 95 | \if{html}{\out{
}} 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /man/mlr_filters_cmim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterCMIM.R 3 | \name{mlr_filters_cmim} 4 | \alias{mlr_filters_cmim} 5 | \alias{FilterCMIM} 6 | \title{Minimal Conditional Mutual Information Maximization Filter} 7 | \description{ 8 | Minimal conditional mutual information maximization filter 9 | calling \code{\link[praznik:CMIM]{praznik::CMIM()}} from package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("cmim") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("cmim"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_correlation}}, 70 | \code{\link{mlr_filters_disr}}, 71 | \code{\link{mlr_filters_find_correlation}}, 72 | \code{\link{mlr_filters_importance}}, 73 | \code{\link{mlr_filters_information_gain}}, 74 | \code{\link{mlr_filters_jmi}}, 75 | \code{\link{mlr_filters_jmim}}, 76 | \code{\link{mlr_filters_kruskal_test}}, 77 | \code{\link{mlr_filters_mim}}, 78 | \code{\link{mlr_filters_mrmr}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterCMIM} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterCMIM-new}{\code{FilterCMIM$new()}} 95 | \item \href{#method-FilterCMIM-clone}{\code{FilterCMIM$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterCMIM-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterCMIM object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterCMIM$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterCMIM-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterCMIM$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_correlation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterCorrelation.R 3 | \name{mlr_filters_correlation} 4 | \alias{mlr_filters_correlation} 5 | \alias{FilterCorrelation} 6 | \title{Correlation Filter} 7 | \description{ 8 | Simple correlation filter calling \code{\link[stats:cor]{stats::cor()}}. 9 | The filter score is the absolute value of the correlation. 10 | } 11 | \note{ 12 | This filter, in its default settings, can handle missing values in the features. 13 | However, the resulting filter scores may be misleading or at least difficult to compare 14 | if some features have a large proportion of missing values. 15 | 16 | If a feature has no non-missing value, the resulting score will be \code{NA}. 17 | Missing scores appear in a random, non-deterministic order at the end of the vector of scores. 18 | } 19 | \examples{ 20 | ## Pearson (default) 21 | task = mlr3::tsk("mtcars") 22 | filter = flt("correlation") 23 | filter$calculate(task) 24 | as.data.table(filter) 25 | 26 | ## Spearman 27 | filter = FilterCorrelation$new() 28 | filter$param_set$values = list("method" = "spearman") 29 | filter$calculate(task) 30 | as.data.table(filter) 31 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 32 | library("mlr3pipelines") 33 | task = mlr3::tsk("mtcars") 34 | 35 | # Note: `filter.frac` is selected randomly and should be tuned. 36 | 37 | graph = po("filter", filter = flt("correlation"), filter.frac = 0.5) \%>>\% 38 | po("learner", mlr3::lrn("regr.rpart")) 39 | 40 | graph$train(task) 41 | } 42 | } 43 | \references{ 44 | For a benchmark of filter methods: 45 | 46 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 47 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 48 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 49 | \doi{10.1016/j.csda.2019.106839}. 50 | } 51 | \seealso{ 52 | \itemize{ 53 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 54 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 55 | } 56 | 57 | Other Filter: 58 | \code{\link{Filter}}, 59 | \code{\link{mlr_filters}}, 60 | \code{\link{mlr_filters_anova}}, 61 | \code{\link{mlr_filters_auc}}, 62 | \code{\link{mlr_filters_boruta}}, 63 | \code{\link{mlr_filters_carscore}}, 64 | \code{\link{mlr_filters_carsurvscore}}, 65 | \code{\link{mlr_filters_cmim}}, 66 | \code{\link{mlr_filters_disr}}, 67 | \code{\link{mlr_filters_find_correlation}}, 68 | \code{\link{mlr_filters_importance}}, 69 | \code{\link{mlr_filters_information_gain}}, 70 | \code{\link{mlr_filters_jmi}}, 71 | \code{\link{mlr_filters_jmim}}, 72 | \code{\link{mlr_filters_kruskal_test}}, 73 | \code{\link{mlr_filters_mim}}, 74 | \code{\link{mlr_filters_mrmr}}, 75 | \code{\link{mlr_filters_njmim}}, 76 | \code{\link{mlr_filters_performance}}, 77 | \code{\link{mlr_filters_permutation}}, 78 | \code{\link{mlr_filters_relief}}, 79 | \code{\link{mlr_filters_selected_features}}, 80 | \code{\link{mlr_filters_univariate_cox}}, 81 | \code{\link{mlr_filters_variance}} 82 | } 83 | \concept{Filter} 84 | \section{Super class}{ 85 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterCorrelation} 86 | } 87 | \section{Methods}{ 88 | \subsection{Public methods}{ 89 | \itemize{ 90 | \item \href{#method-FilterCorrelation-new}{\code{FilterCorrelation$new()}} 91 | \item \href{#method-FilterCorrelation-clone}{\code{FilterCorrelation$clone()}} 92 | } 93 | } 94 | \if{html}{\out{ 95 |
Inherited methods 96 | 102 |
103 | }} 104 | \if{html}{\out{
}} 105 | \if{html}{\out{}} 106 | \if{latex}{\out{\hypertarget{method-FilterCorrelation-new}{}}} 107 | \subsection{Method \code{new()}}{ 108 | Create a FilterCorrelation object. 109 | \subsection{Usage}{ 110 | \if{html}{\out{
}}\preformatted{FilterCorrelation$new()}\if{html}{\out{
}} 111 | } 112 | 113 | } 114 | \if{html}{\out{
}} 115 | \if{html}{\out{}} 116 | \if{latex}{\out{\hypertarget{method-FilterCorrelation-clone}{}}} 117 | \subsection{Method \code{clone()}}{ 118 | The objects of this class are cloneable with this method. 119 | \subsection{Usage}{ 120 | \if{html}{\out{
}}\preformatted{FilterCorrelation$clone(deep = FALSE)}\if{html}{\out{
}} 121 | } 122 | 123 | \subsection{Arguments}{ 124 | \if{html}{\out{
}} 125 | \describe{ 126 | \item{\code{deep}}{Whether to make a deep clone.} 127 | } 128 | \if{html}{\out{
}} 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /man/mlr_filters_disr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterDISR.R 3 | \name{mlr_filters_disr} 4 | \alias{mlr_filters_disr} 5 | \alias{FilterDISR} 6 | \title{Double Input Symmetrical Relevance Filter} 7 | \description{ 8 | Double input symmetrical relevance filter calling 9 | \code{\link[praznik:DISR]{praznik::DISR()}} from package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("disr") 26 | filter$calculate(task) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("disr"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_find_correlation}}, 72 | \code{\link{mlr_filters_importance}}, 73 | \code{\link{mlr_filters_information_gain}}, 74 | \code{\link{mlr_filters_jmi}}, 75 | \code{\link{mlr_filters_jmim}}, 76 | \code{\link{mlr_filters_kruskal_test}}, 77 | \code{\link{mlr_filters_mim}}, 78 | \code{\link{mlr_filters_mrmr}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterDISR} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterDISR-new}{\code{FilterDISR$new()}} 95 | \item \href{#method-FilterDISR-clone}{\code{FilterDISR$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterDISR-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterDISR object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterDISR$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterDISR-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterDISR$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_find_correlation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterFindCorrelation.R 3 | \name{mlr_filters_find_correlation} 4 | \alias{mlr_filters_find_correlation} 5 | \alias{FilterFindCorrelation} 6 | \title{Correlation Filter} 7 | \description{ 8 | Simple filter emulating \code{caret::findCorrelation(exact = FALSE)}. 9 | 10 | This gives each feature a score between 0 and 1 that is \emph{one minus} the 11 | cutoff value for which it is excluded when using \code{\link[caret:findCorrelation]{caret::findCorrelation()}}. 12 | The negative is used because \code{\link[caret:findCorrelation]{caret::findCorrelation()}} excludes everything 13 | \emph{above} a cutoff, while filters exclude everything below a cutoff. 14 | Here the filter scores are shifted by +1 to get positive values for to align 15 | with the way other filters work. 16 | 17 | Subsequently \code{caret::findCorrelation(cutoff = 0.9)} lists the same features 18 | that are excluded with \code{FilterFindCorrelation} at score 0.1 (= 1 - 0.9). 19 | } 20 | \examples{ 21 | # Pearson (default) 22 | task = mlr3::tsk("mtcars") 23 | filter = flt("find_correlation") 24 | filter$calculate(task) 25 | as.data.table(filter) 26 | 27 | ## Spearman 28 | filter = flt("find_correlation", method = "spearman") 29 | filter$calculate(task) 30 | as.data.table(filter) 31 | 32 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 33 | library("mlr3pipelines") 34 | task = mlr3::tsk("spam") 35 | 36 | # Note: `filter.frac` is selected randomly and should be tuned. 37 | 38 | graph = po("filter", filter = flt("find_correlation"), filter.frac = 0.5) \%>>\% 39 | po("learner", mlr3::lrn("classif.rpart")) 40 | 41 | graph$train(task) 42 | } 43 | } 44 | \seealso{ 45 | \itemize{ 46 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 47 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 48 | } 49 | 50 | Other Filter: 51 | \code{\link{Filter}}, 52 | \code{\link{mlr_filters}}, 53 | \code{\link{mlr_filters_anova}}, 54 | \code{\link{mlr_filters_auc}}, 55 | \code{\link{mlr_filters_boruta}}, 56 | \code{\link{mlr_filters_carscore}}, 57 | \code{\link{mlr_filters_carsurvscore}}, 58 | \code{\link{mlr_filters_cmim}}, 59 | \code{\link{mlr_filters_correlation}}, 60 | \code{\link{mlr_filters_disr}}, 61 | \code{\link{mlr_filters_importance}}, 62 | \code{\link{mlr_filters_information_gain}}, 63 | \code{\link{mlr_filters_jmi}}, 64 | \code{\link{mlr_filters_jmim}}, 65 | \code{\link{mlr_filters_kruskal_test}}, 66 | \code{\link{mlr_filters_mim}}, 67 | \code{\link{mlr_filters_mrmr}}, 68 | \code{\link{mlr_filters_njmim}}, 69 | \code{\link{mlr_filters_performance}}, 70 | \code{\link{mlr_filters_permutation}}, 71 | \code{\link{mlr_filters_relief}}, 72 | \code{\link{mlr_filters_selected_features}}, 73 | \code{\link{mlr_filters_univariate_cox}}, 74 | \code{\link{mlr_filters_variance}} 75 | } 76 | \concept{Filter} 77 | \section{Super class}{ 78 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterFindCorrelation} 79 | } 80 | \section{Methods}{ 81 | \subsection{Public methods}{ 82 | \itemize{ 83 | \item \href{#method-FilterFindCorrelation-new}{\code{FilterFindCorrelation$new()}} 84 | \item \href{#method-FilterFindCorrelation-clone}{\code{FilterFindCorrelation$clone()}} 85 | } 86 | } 87 | \if{html}{\out{ 88 |
Inherited methods 89 | 95 |
96 | }} 97 | \if{html}{\out{
}} 98 | \if{html}{\out{}} 99 | \if{latex}{\out{\hypertarget{method-FilterFindCorrelation-new}{}}} 100 | \subsection{Method \code{new()}}{ 101 | Create a FilterFindCorrelation object. 102 | \subsection{Usage}{ 103 | \if{html}{\out{
}}\preformatted{FilterFindCorrelation$new()}\if{html}{\out{
}} 104 | } 105 | 106 | } 107 | \if{html}{\out{
}} 108 | \if{html}{\out{}} 109 | \if{latex}{\out{\hypertarget{method-FilterFindCorrelation-clone}{}}} 110 | \subsection{Method \code{clone()}}{ 111 | The objects of this class are cloneable with this method. 112 | \subsection{Usage}{ 113 | \if{html}{\out{
}}\preformatted{FilterFindCorrelation$clone(deep = FALSE)}\if{html}{\out{
}} 114 | } 115 | 116 | \subsection{Arguments}{ 117 | \if{html}{\out{
}} 118 | \describe{ 119 | \item{\code{deep}}{Whether to make a deep clone.} 120 | } 121 | \if{html}{\out{
}} 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /man/mlr_filters_importance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterImportance.R 3 | \name{mlr_filters_importance} 4 | \alias{mlr_filters_importance} 5 | \alias{FilterImportance} 6 | \title{Filter for Embedded Feature Selection via Variable Importance} 7 | \description{ 8 | Variable Importance filter using embedded feature selection of 9 | machine learning algorithms. Takes a \link[mlr3:Learner]{mlr3::Learner} which is capable of 10 | extracting the variable importance (property "importance"), fits the model 11 | and extracts the importance values to use as filter scores. 12 | } 13 | \examples{ 14 | if (requireNamespace("rpart")) { 15 | task = mlr3::tsk("iris") 16 | learner = mlr3::lrn("classif.rpart") 17 | filter = flt("importance", learner = learner) 18 | filter$calculate(task) 19 | as.data.table(filter) 20 | } 21 | 22 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "mlr3learners"), quietly = TRUE)) { 23 | library("mlr3learners") 24 | library("mlr3pipelines") 25 | task = mlr3::tsk("sonar") 26 | 27 | learner = mlr3::lrn("classif.rpart") 28 | 29 | # Note: `filter.frac` is selected randomly and should be tuned. 30 | 31 | graph = po("filter", filter = flt("importance", learner = learner), filter.frac = 0.5) \%>>\% 32 | po("learner", mlr3::lrn("classif.log_reg")) 33 | 34 | graph$train(task) 35 | } 36 | } 37 | \seealso{ 38 | \itemize{ 39 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 40 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 41 | } 42 | 43 | Other Filter: 44 | \code{\link{Filter}}, 45 | \code{\link{mlr_filters}}, 46 | \code{\link{mlr_filters_anova}}, 47 | \code{\link{mlr_filters_auc}}, 48 | \code{\link{mlr_filters_boruta}}, 49 | \code{\link{mlr_filters_carscore}}, 50 | \code{\link{mlr_filters_carsurvscore}}, 51 | \code{\link{mlr_filters_cmim}}, 52 | \code{\link{mlr_filters_correlation}}, 53 | \code{\link{mlr_filters_disr}}, 54 | \code{\link{mlr_filters_find_correlation}}, 55 | \code{\link{mlr_filters_information_gain}}, 56 | \code{\link{mlr_filters_jmi}}, 57 | \code{\link{mlr_filters_jmim}}, 58 | \code{\link{mlr_filters_kruskal_test}}, 59 | \code{\link{mlr_filters_mim}}, 60 | \code{\link{mlr_filters_mrmr}}, 61 | \code{\link{mlr_filters_njmim}}, 62 | \code{\link{mlr_filters_performance}}, 63 | \code{\link{mlr_filters_permutation}}, 64 | \code{\link{mlr_filters_relief}}, 65 | \code{\link{mlr_filters_selected_features}}, 66 | \code{\link{mlr_filters_univariate_cox}}, 67 | \code{\link{mlr_filters_variance}} 68 | } 69 | \concept{Filter} 70 | \section{Super classes}{ 71 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{mlr3filters::FilterLearner} -> \code{FilterImportance} 72 | } 73 | \section{Public fields}{ 74 | \if{html}{\out{
}} 75 | \describe{ 76 | \item{\code{learner}}{(\link[mlr3:Learner]{mlr3::Learner})\cr 77 | Learner to extract the importance values from.} 78 | } 79 | \if{html}{\out{
}} 80 | } 81 | \section{Methods}{ 82 | \subsection{Public methods}{ 83 | \itemize{ 84 | \item \href{#method-FilterImportance-new}{\code{FilterImportance$new()}} 85 | \item \href{#method-FilterImportance-clone}{\code{FilterImportance$clone()}} 86 | } 87 | } 88 | \if{html}{\out{ 89 |
Inherited methods 90 | 96 |
97 | }} 98 | \if{html}{\out{
}} 99 | \if{html}{\out{}} 100 | \if{latex}{\out{\hypertarget{method-FilterImportance-new}{}}} 101 | \subsection{Method \code{new()}}{ 102 | Create a FilterImportance object. 103 | \subsection{Usage}{ 104 | \if{html}{\out{
}}\preformatted{FilterImportance$new(learner = mlr3::lrn("classif.featureless"))}\if{html}{\out{
}} 105 | } 106 | 107 | \subsection{Arguments}{ 108 | \if{html}{\out{
}} 109 | \describe{ 110 | \item{\code{learner}}{(\link[mlr3:Learner]{mlr3::Learner})\cr 111 | Learner to extract the importance values from.} 112 | } 113 | \if{html}{\out{
}} 114 | } 115 | } 116 | \if{html}{\out{
}} 117 | \if{html}{\out{}} 118 | \if{latex}{\out{\hypertarget{method-FilterImportance-clone}{}}} 119 | \subsection{Method \code{clone()}}{ 120 | The objects of this class are cloneable with this method. 121 | \subsection{Usage}{ 122 | \if{html}{\out{
}}\preformatted{FilterImportance$clone(deep = FALSE)}\if{html}{\out{
}} 123 | } 124 | 125 | \subsection{Arguments}{ 126 | \if{html}{\out{
}} 127 | \describe{ 128 | \item{\code{deep}}{Whether to make a deep clone.} 129 | } 130 | \if{html}{\out{
}} 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /man/mlr_filters_information_gain.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterInformationGain.R 3 | \name{mlr_filters_information_gain} 4 | \alias{mlr_filters_information_gain} 5 | \alias{FilterInformationGain} 6 | \title{Information Gain Filter} 7 | \description{ 8 | Information gain filter calling 9 | \code{\link[FSelectorRcpp:information_gain]{FSelectorRcpp::information_gain()}} in package \CRANpkg{FSelectorRcpp}. Set 10 | parameter \code{"type"} to \code{"gainratio"} to calculate the gain ratio, or set to 11 | \code{"symuncert"} to calculate the symmetrical uncertainty (see 12 | \code{\link[FSelectorRcpp:information_gain]{FSelectorRcpp::information_gain()}}). Default is \code{"infogain"}. 13 | 14 | Argument \code{equal} defaults to \code{FALSE} for classification tasks, and to 15 | \code{TRUE} for regression tasks. 16 | } 17 | \examples{ 18 | if (requireNamespace("FSelectorRcpp")) { 19 | ## InfoGain (default) 20 | task = mlr3::tsk("sonar") 21 | filter = flt("information_gain") 22 | filter$calculate(task) 23 | head(filter$scores, 3) 24 | as.data.table(filter) 25 | 26 | ## GainRatio 27 | 28 | filterGR = flt("information_gain") 29 | filterGR$param_set$values = list("type" = "gainratio") 30 | filterGR$calculate(task) 31 | head(as.data.table(filterGR), 3) 32 | 33 | } 34 | 35 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) { 36 | library("mlr3pipelines") 37 | task = mlr3::tsk("spam") 38 | 39 | # Note: `filter.frac` is selected randomly and should be tuned. 40 | 41 | graph = po("filter", filter = flt("information_gain"), filter.frac = 0.5) \%>>\% 42 | po("learner", mlr3::lrn("classif.rpart")) 43 | 44 | graph$train(task) 45 | 46 | } 47 | } 48 | \seealso{ 49 | \itemize{ 50 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 51 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 52 | } 53 | 54 | Other Filter: 55 | \code{\link{Filter}}, 56 | \code{\link{mlr_filters}}, 57 | \code{\link{mlr_filters_anova}}, 58 | \code{\link{mlr_filters_auc}}, 59 | \code{\link{mlr_filters_boruta}}, 60 | \code{\link{mlr_filters_carscore}}, 61 | \code{\link{mlr_filters_carsurvscore}}, 62 | \code{\link{mlr_filters_cmim}}, 63 | \code{\link{mlr_filters_correlation}}, 64 | \code{\link{mlr_filters_disr}}, 65 | \code{\link{mlr_filters_find_correlation}}, 66 | \code{\link{mlr_filters_importance}}, 67 | \code{\link{mlr_filters_jmi}}, 68 | \code{\link{mlr_filters_jmim}}, 69 | \code{\link{mlr_filters_kruskal_test}}, 70 | \code{\link{mlr_filters_mim}}, 71 | \code{\link{mlr_filters_mrmr}}, 72 | \code{\link{mlr_filters_njmim}}, 73 | \code{\link{mlr_filters_performance}}, 74 | \code{\link{mlr_filters_permutation}}, 75 | \code{\link{mlr_filters_relief}}, 76 | \code{\link{mlr_filters_selected_features}}, 77 | \code{\link{mlr_filters_univariate_cox}}, 78 | \code{\link{mlr_filters_variance}} 79 | } 80 | \concept{Filter} 81 | \section{Super class}{ 82 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterInformationGain} 83 | } 84 | \section{Methods}{ 85 | \subsection{Public methods}{ 86 | \itemize{ 87 | \item \href{#method-FilterInformationGain-new}{\code{FilterInformationGain$new()}} 88 | \item \href{#method-FilterInformationGain-clone}{\code{FilterInformationGain$clone()}} 89 | } 90 | } 91 | \if{html}{\out{ 92 |
Inherited methods 93 | 99 |
100 | }} 101 | \if{html}{\out{
}} 102 | \if{html}{\out{}} 103 | \if{latex}{\out{\hypertarget{method-FilterInformationGain-new}{}}} 104 | \subsection{Method \code{new()}}{ 105 | Create a FilterInformationGain object. 106 | \subsection{Usage}{ 107 | \if{html}{\out{
}}\preformatted{FilterInformationGain$new()}\if{html}{\out{
}} 108 | } 109 | 110 | } 111 | \if{html}{\out{
}} 112 | \if{html}{\out{}} 113 | \if{latex}{\out{\hypertarget{method-FilterInformationGain-clone}{}}} 114 | \subsection{Method \code{clone()}}{ 115 | The objects of this class are cloneable with this method. 116 | \subsection{Usage}{ 117 | \if{html}{\out{
}}\preformatted{FilterInformationGain$clone(deep = FALSE)}\if{html}{\out{
}} 118 | } 119 | 120 | \subsection{Arguments}{ 121 | \if{html}{\out{
}} 122 | \describe{ 123 | \item{\code{deep}}{Whether to make a deep clone.} 124 | } 125 | \if{html}{\out{
}} 126 | } 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /man/mlr_filters_jmi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterJMI.R 3 | \name{mlr_filters_jmi} 4 | \alias{mlr_filters_jmi} 5 | \alias{FilterJMI} 6 | \title{Joint Mutual Information Filter} 7 | \description{ 8 | Joint mutual information filter calling \code{\link[praznik:JMI]{praznik::JMI()}} in 9 | package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("jmi") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("jmi"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_disr}}, 72 | \code{\link{mlr_filters_find_correlation}}, 73 | \code{\link{mlr_filters_importance}}, 74 | \code{\link{mlr_filters_information_gain}}, 75 | \code{\link{mlr_filters_jmim}}, 76 | \code{\link{mlr_filters_kruskal_test}}, 77 | \code{\link{mlr_filters_mim}}, 78 | \code{\link{mlr_filters_mrmr}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterJMI} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterJMI-new}{\code{FilterJMI$new()}} 95 | \item \href{#method-FilterJMI-clone}{\code{FilterJMI$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterJMI-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterJMI object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterJMI$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterJMI-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterJMI$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_jmim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterJMIM.R 3 | \name{mlr_filters_jmim} 4 | \alias{mlr_filters_jmim} 5 | \alias{FilterJMIM} 6 | \title{Minimal Joint Mutual Information Maximization Filter} 7 | \description{ 8 | Minimal joint mutual information maximization filter calling 9 | \code{\link[praznik:JMIM]{praznik::JMIM()}} in package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("jmim") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("jmim"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_disr}}, 72 | \code{\link{mlr_filters_find_correlation}}, 73 | \code{\link{mlr_filters_importance}}, 74 | \code{\link{mlr_filters_information_gain}}, 75 | \code{\link{mlr_filters_jmi}}, 76 | \code{\link{mlr_filters_kruskal_test}}, 77 | \code{\link{mlr_filters_mim}}, 78 | \code{\link{mlr_filters_mrmr}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterJMIM} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterJMIM-new}{\code{FilterJMIM$new()}} 95 | \item \href{#method-FilterJMIM-clone}{\code{FilterJMIM$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterJMIM-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterJMIM object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterJMIM$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterJMIM-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterJMIM$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_kruskal_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterKruskalTest.R 3 | \name{mlr_filters_kruskal_test} 4 | \alias{mlr_filters_kruskal_test} 5 | \alias{FilterKruskalTest} 6 | \title{Kruskal-Wallis Test Filter} 7 | \description{ 8 | Kruskal-Wallis rank sum test filter calling \code{\link[stats:kruskal.test]{stats::kruskal.test()}}. 9 | 10 | The filter value is \code{-log10(p)} where \code{p} is the \eqn{p}-value. This 11 | transformation is necessary to ensure numerical stability for very small 12 | \eqn{p}-values. 13 | } 14 | \note{ 15 | This filter, in its default settings, can handle missing values in the features. 16 | However, the resulting filter scores may be misleading or at least difficult to compare 17 | if some features have a large proportion of missing values. 18 | 19 | If a feature has not at least one non-missing observation per label, the resulting score will be NA. 20 | Missing scores appear in a random, non-deterministic order at the end of the vector of scores. 21 | } 22 | \examples{ 23 | task = mlr3::tsk("iris") 24 | filter = flt("kruskal_test") 25 | filter$calculate(task) 26 | as.data.table(filter) 27 | 28 | # transform to p-value 29 | 10^(-filter$scores) 30 | 31 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 32 | library("mlr3pipelines") 33 | task = mlr3::tsk("spam") 34 | 35 | # Note: `filter.frac` is selected randomly and should be tuned. 36 | 37 | graph = po("filter", filter = flt("kruskal_test"), filter.frac = 0.5) \%>>\% 38 | po("learner", mlr3::lrn("classif.rpart")) 39 | 40 | graph$train(task) 41 | } 42 | } 43 | \references{ 44 | For a benchmark of filter methods: 45 | 46 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 47 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 48 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 49 | \doi{10.1016/j.csda.2019.106839}. 50 | } 51 | \seealso{ 52 | \itemize{ 53 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 54 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 55 | } 56 | 57 | Other Filter: 58 | \code{\link{Filter}}, 59 | \code{\link{mlr_filters}}, 60 | \code{\link{mlr_filters_anova}}, 61 | \code{\link{mlr_filters_auc}}, 62 | \code{\link{mlr_filters_boruta}}, 63 | \code{\link{mlr_filters_carscore}}, 64 | \code{\link{mlr_filters_carsurvscore}}, 65 | \code{\link{mlr_filters_cmim}}, 66 | \code{\link{mlr_filters_correlation}}, 67 | \code{\link{mlr_filters_disr}}, 68 | \code{\link{mlr_filters_find_correlation}}, 69 | \code{\link{mlr_filters_importance}}, 70 | \code{\link{mlr_filters_information_gain}}, 71 | \code{\link{mlr_filters_jmi}}, 72 | \code{\link{mlr_filters_jmim}}, 73 | \code{\link{mlr_filters_mim}}, 74 | \code{\link{mlr_filters_mrmr}}, 75 | \code{\link{mlr_filters_njmim}}, 76 | \code{\link{mlr_filters_performance}}, 77 | \code{\link{mlr_filters_permutation}}, 78 | \code{\link{mlr_filters_relief}}, 79 | \code{\link{mlr_filters_selected_features}}, 80 | \code{\link{mlr_filters_univariate_cox}}, 81 | \code{\link{mlr_filters_variance}} 82 | } 83 | \concept{Filter} 84 | \section{Super class}{ 85 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterKruskalTest} 86 | } 87 | \section{Methods}{ 88 | \subsection{Public methods}{ 89 | \itemize{ 90 | \item \href{#method-FilterKruskalTest-new}{\code{FilterKruskalTest$new()}} 91 | \item \href{#method-FilterKruskalTest-clone}{\code{FilterKruskalTest$clone()}} 92 | } 93 | } 94 | \if{html}{\out{ 95 |
Inherited methods 96 | 102 |
103 | }} 104 | \if{html}{\out{
}} 105 | \if{html}{\out{}} 106 | \if{latex}{\out{\hypertarget{method-FilterKruskalTest-new}{}}} 107 | \subsection{Method \code{new()}}{ 108 | Create a FilterKruskalTest object. 109 | \subsection{Usage}{ 110 | \if{html}{\out{
}}\preformatted{FilterKruskalTest$new()}\if{html}{\out{
}} 111 | } 112 | 113 | } 114 | \if{html}{\out{
}} 115 | \if{html}{\out{}} 116 | \if{latex}{\out{\hypertarget{method-FilterKruskalTest-clone}{}}} 117 | \subsection{Method \code{clone()}}{ 118 | The objects of this class are cloneable with this method. 119 | \subsection{Usage}{ 120 | \if{html}{\out{
}}\preformatted{FilterKruskalTest$clone(deep = FALSE)}\if{html}{\out{
}} 121 | } 122 | 123 | \subsection{Arguments}{ 124 | \if{html}{\out{
}} 125 | \describe{ 126 | \item{\code{deep}}{Whether to make a deep clone.} 127 | } 128 | \if{html}{\out{
}} 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /man/mlr_filters_mim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterMIM.R 3 | \name{mlr_filters_mim} 4 | \alias{mlr_filters_mim} 5 | \alias{FilterMIM} 6 | \title{Mutual Information Maximization Filter} 7 | \description{ 8 | Conditional mutual information based feature selection filter 9 | calling \code{\link[praznik:MIM]{praznik::MIM()}} in package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("mim") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("mim"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_disr}}, 72 | \code{\link{mlr_filters_find_correlation}}, 73 | \code{\link{mlr_filters_importance}}, 74 | \code{\link{mlr_filters_information_gain}}, 75 | \code{\link{mlr_filters_jmi}}, 76 | \code{\link{mlr_filters_jmim}}, 77 | \code{\link{mlr_filters_kruskal_test}}, 78 | \code{\link{mlr_filters_mrmr}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterMIM} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterMIM-new}{\code{FilterMIM$new()}} 95 | \item \href{#method-FilterMIM-clone}{\code{FilterMIM$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterMIM-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterMIM object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterMIM$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterMIM-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterMIM$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_mrmr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterMRMR.R 3 | \name{mlr_filters_mrmr} 4 | \alias{mlr_filters_mrmr} 5 | \alias{FilterMRMR} 6 | \title{Minimum Redundancy Maximal Relevancy Filter} 7 | \description{ 8 | Minimum redundancy maximal relevancy filter calling 9 | \code{\link[praznik:MRMR]{praznik::MRMR()}} in package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("mrmr") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("mrmr"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_disr}}, 72 | \code{\link{mlr_filters_find_correlation}}, 73 | \code{\link{mlr_filters_importance}}, 74 | \code{\link{mlr_filters_information_gain}}, 75 | \code{\link{mlr_filters_jmi}}, 76 | \code{\link{mlr_filters_jmim}}, 77 | \code{\link{mlr_filters_kruskal_test}}, 78 | \code{\link{mlr_filters_mim}}, 79 | \code{\link{mlr_filters_njmim}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterMRMR} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterMRMR-new}{\code{FilterMRMR$new()}} 95 | \item \href{#method-FilterMRMR-clone}{\code{FilterMRMR$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterMRMR-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterMRMR object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterMRMR$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterMRMR-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterMRMR$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_njmim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterNJMIM.R 3 | \name{mlr_filters_njmim} 4 | \alias{mlr_filters_njmim} 5 | \alias{FilterNJMIM} 6 | \title{Minimal Normalised Joint Mutual Information Maximization Filter} 7 | \description{ 8 | Minimal normalised joint mutual information maximization filter 9 | calling \code{\link[praznik:NJMIM]{praznik::NJMIM()}} from package \CRANpkg{praznik}. 10 | 11 | This filter supports partial scoring (see \link{Filter}). 12 | } 13 | \details{ 14 | As the scores calculated by the \CRANpkg{praznik} package are not monotone due 15 | to the greedy forward fashion, the returned scores simply reflect the selection order: 16 | \code{1}, \code{(k-1)/k}, ..., \code{1/k} where \code{k} is the number of selected features. 17 | 18 | Threading is disabled by default (hyperparameter \code{threads} is set to 1). 19 | Set to a number \verb{>= 2} to enable threading, or to \code{0} for auto-detecting the number 20 | of available cores. 21 | } 22 | \examples{ 23 | if (requireNamespace("praznik")) { 24 | task = mlr3::tsk("iris") 25 | filter = flt("njmim") 26 | filter$calculate(task, nfeat = 2) 27 | as.data.table(filter) 28 | } 29 | 30 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) { 31 | library("mlr3pipelines") 32 | task = mlr3::tsk("spam") 33 | 34 | # Note: `filter.frac` is selected randomly and should be tuned. 35 | 36 | graph = po("filter", filter = flt("njmim"), filter.frac = 0.5) \%>>\% 37 | po("learner", mlr3::lrn("classif.rpart")) 38 | 39 | graph$train(task) 40 | } 41 | } 42 | \references{ 43 | Kursa MB (2021). 44 | \dQuote{Praznik: High performance information-based feature selection.} 45 | \emph{SoftwareX}, \bold{16}, 100819. 46 | \doi{10.1016/j.softx.2021.100819}. 47 | 48 | For a benchmark of filter methods: 49 | 50 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 51 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 52 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 53 | \doi{10.1016/j.csda.2019.106839}. 54 | } 55 | \seealso{ 56 | \itemize{ 57 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 58 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 59 | } 60 | 61 | Other Filter: 62 | \code{\link{Filter}}, 63 | \code{\link{mlr_filters}}, 64 | \code{\link{mlr_filters_anova}}, 65 | \code{\link{mlr_filters_auc}}, 66 | \code{\link{mlr_filters_boruta}}, 67 | \code{\link{mlr_filters_carscore}}, 68 | \code{\link{mlr_filters_carsurvscore}}, 69 | \code{\link{mlr_filters_cmim}}, 70 | \code{\link{mlr_filters_correlation}}, 71 | \code{\link{mlr_filters_disr}}, 72 | \code{\link{mlr_filters_find_correlation}}, 73 | \code{\link{mlr_filters_importance}}, 74 | \code{\link{mlr_filters_information_gain}}, 75 | \code{\link{mlr_filters_jmi}}, 76 | \code{\link{mlr_filters_jmim}}, 77 | \code{\link{mlr_filters_kruskal_test}}, 78 | \code{\link{mlr_filters_mim}}, 79 | \code{\link{mlr_filters_mrmr}}, 80 | \code{\link{mlr_filters_performance}}, 81 | \code{\link{mlr_filters_permutation}}, 82 | \code{\link{mlr_filters_relief}}, 83 | \code{\link{mlr_filters_selected_features}}, 84 | \code{\link{mlr_filters_univariate_cox}}, 85 | \code{\link{mlr_filters_variance}} 86 | } 87 | \concept{Filter} 88 | \section{Super class}{ 89 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterNJMIM} 90 | } 91 | \section{Methods}{ 92 | \subsection{Public methods}{ 93 | \itemize{ 94 | \item \href{#method-FilterNJMIM-new}{\code{FilterNJMIM$new()}} 95 | \item \href{#method-FilterNJMIM-clone}{\code{FilterNJMIM$clone()}} 96 | } 97 | } 98 | \if{html}{\out{ 99 |
Inherited methods 100 | 106 |
107 | }} 108 | \if{html}{\out{
}} 109 | \if{html}{\out{}} 110 | \if{latex}{\out{\hypertarget{method-FilterNJMIM-new}{}}} 111 | \subsection{Method \code{new()}}{ 112 | Create a FilterNJMIM object. 113 | \subsection{Usage}{ 114 | \if{html}{\out{
}}\preformatted{FilterNJMIM$new()}\if{html}{\out{
}} 115 | } 116 | 117 | } 118 | \if{html}{\out{
}} 119 | \if{html}{\out{}} 120 | \if{latex}{\out{\hypertarget{method-FilterNJMIM-clone}{}}} 121 | \subsection{Method \code{clone()}}{ 122 | The objects of this class are cloneable with this method. 123 | \subsection{Usage}{ 124 | \if{html}{\out{
}}\preformatted{FilterNJMIM$clone(deep = FALSE)}\if{html}{\out{
}} 125 | } 126 | 127 | \subsection{Arguments}{ 128 | \if{html}{\out{
}} 129 | \describe{ 130 | \item{\code{deep}}{Whether to make a deep clone.} 131 | } 132 | \if{html}{\out{
}} 133 | } 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /man/mlr_filters_relief.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterRelief.R 3 | \name{mlr_filters_relief} 4 | \alias{mlr_filters_relief} 5 | \alias{FilterRelief} 6 | \title{RELIEF Filter} 7 | \description{ 8 | Information gain filter calling 9 | \code{\link[FSelectorRcpp:relief]{FSelectorRcpp::relief()}} in package \CRANpkg{FSelectorRcpp}. 10 | } 11 | \note{ 12 | This filter can handle missing values in the features. 13 | However, the resulting filter scores may be misleading or at least difficult to compare 14 | if some features have a large proportion of missing values. 15 | 16 | If a feature has no non-missing observation, the resulting score will be (close to) 0. 17 | } 18 | \examples{ 19 | if (requireNamespace("FSelectorRcpp")) { 20 | ## Relief (default) 21 | task = mlr3::tsk("iris") 22 | filter = flt("relief") 23 | filter$calculate(task) 24 | head(filter$scores, 3) 25 | as.data.table(filter) 26 | } 27 | 28 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) { 29 | library("mlr3pipelines") 30 | task = mlr3::tsk("iris") 31 | 32 | # Note: `filter.frac` is selected randomly and should be tuned. 33 | 34 | graph = po("filter", filter = flt("relief"), filter.frac = 0.5) \%>>\% 35 | po("learner", mlr3::lrn("classif.rpart")) 36 | 37 | graph$train(task) 38 | } 39 | } 40 | \seealso{ 41 | \itemize{ 42 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 43 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 44 | } 45 | 46 | Other Filter: 47 | \code{\link{Filter}}, 48 | \code{\link{mlr_filters}}, 49 | \code{\link{mlr_filters_anova}}, 50 | \code{\link{mlr_filters_auc}}, 51 | \code{\link{mlr_filters_boruta}}, 52 | \code{\link{mlr_filters_carscore}}, 53 | \code{\link{mlr_filters_carsurvscore}}, 54 | \code{\link{mlr_filters_cmim}}, 55 | \code{\link{mlr_filters_correlation}}, 56 | \code{\link{mlr_filters_disr}}, 57 | \code{\link{mlr_filters_find_correlation}}, 58 | \code{\link{mlr_filters_importance}}, 59 | \code{\link{mlr_filters_information_gain}}, 60 | \code{\link{mlr_filters_jmi}}, 61 | \code{\link{mlr_filters_jmim}}, 62 | \code{\link{mlr_filters_kruskal_test}}, 63 | \code{\link{mlr_filters_mim}}, 64 | \code{\link{mlr_filters_mrmr}}, 65 | \code{\link{mlr_filters_njmim}}, 66 | \code{\link{mlr_filters_performance}}, 67 | \code{\link{mlr_filters_permutation}}, 68 | \code{\link{mlr_filters_selected_features}}, 69 | \code{\link{mlr_filters_univariate_cox}}, 70 | \code{\link{mlr_filters_variance}} 71 | } 72 | \concept{Filter} 73 | \section{Super class}{ 74 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterRelief} 75 | } 76 | \section{Methods}{ 77 | \subsection{Public methods}{ 78 | \itemize{ 79 | \item \href{#method-FilterRelief-new}{\code{FilterRelief$new()}} 80 | \item \href{#method-FilterRelief-clone}{\code{FilterRelief$clone()}} 81 | } 82 | } 83 | \if{html}{\out{ 84 |
Inherited methods 85 | 91 |
92 | }} 93 | \if{html}{\out{
}} 94 | \if{html}{\out{}} 95 | \if{latex}{\out{\hypertarget{method-FilterRelief-new}{}}} 96 | \subsection{Method \code{new()}}{ 97 | Create a FilterRelief object. 98 | \subsection{Usage}{ 99 | \if{html}{\out{
}}\preformatted{FilterRelief$new()}\if{html}{\out{
}} 100 | } 101 | 102 | } 103 | \if{html}{\out{
}} 104 | \if{html}{\out{}} 105 | \if{latex}{\out{\hypertarget{method-FilterRelief-clone}{}}} 106 | \subsection{Method \code{clone()}}{ 107 | The objects of this class are cloneable with this method. 108 | \subsection{Usage}{ 109 | \if{html}{\out{
}}\preformatted{FilterRelief$clone(deep = FALSE)}\if{html}{\out{
}} 110 | } 111 | 112 | \subsection{Arguments}{ 113 | \if{html}{\out{
}} 114 | \describe{ 115 | \item{\code{deep}}{Whether to make a deep clone.} 116 | } 117 | \if{html}{\out{
}} 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /man/mlr_filters_univariate_cox.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterUnivariateCox.R 3 | \name{mlr_filters_univariate_cox} 4 | \alias{mlr_filters_univariate_cox} 5 | \alias{FilterUnivariateCox} 6 | \title{Univariate Cox Survival Filter} 7 | \description{ 8 | Calculates scores for assessing the relationship between 9 | individual features and the time-to-event outcome (right-censored survival 10 | data) using a univariate Cox proportional hazards model. 11 | The goal is to determine which features have a statistically significant 12 | association with the event of interest, typically in the context of clinical 13 | or biomedical research. 14 | 15 | This filter fits a \link[survival:coxph]{Cox Proportional Hazards} model using 16 | each feature independently and extracts the \eqn{p}-value that quantifies the 17 | significance of the feature's impact on survival. The filter value is 18 | \code{-log10(p)} where \code{p} is the \eqn{p}-value. This transformation is necessary 19 | to ensure numerical stability for very small \eqn{p}-values. Also higher 20 | values denote more important features. The filter works only for numeric 21 | features so please ensure that factor variables are properly encoded, e.g. 22 | using \link[mlr3pipelines:mlr_pipeops_encode]{PipeOpEncode}. 23 | } 24 | \examples{ 25 | 26 | filter = flt("univariate_cox") 27 | filter 28 | 29 | } 30 | \seealso{ 31 | \itemize{ 32 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 33 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 34 | } 35 | 36 | Other Filter: 37 | \code{\link{Filter}}, 38 | \code{\link{mlr_filters}}, 39 | \code{\link{mlr_filters_anova}}, 40 | \code{\link{mlr_filters_auc}}, 41 | \code{\link{mlr_filters_boruta}}, 42 | \code{\link{mlr_filters_carscore}}, 43 | \code{\link{mlr_filters_carsurvscore}}, 44 | \code{\link{mlr_filters_cmim}}, 45 | \code{\link{mlr_filters_correlation}}, 46 | \code{\link{mlr_filters_disr}}, 47 | \code{\link{mlr_filters_find_correlation}}, 48 | \code{\link{mlr_filters_importance}}, 49 | \code{\link{mlr_filters_information_gain}}, 50 | \code{\link{mlr_filters_jmi}}, 51 | \code{\link{mlr_filters_jmim}}, 52 | \code{\link{mlr_filters_kruskal_test}}, 53 | \code{\link{mlr_filters_mim}}, 54 | \code{\link{mlr_filters_mrmr}}, 55 | \code{\link{mlr_filters_njmim}}, 56 | \code{\link{mlr_filters_performance}}, 57 | \code{\link{mlr_filters_permutation}}, 58 | \code{\link{mlr_filters_relief}}, 59 | \code{\link{mlr_filters_selected_features}}, 60 | \code{\link{mlr_filters_variance}} 61 | } 62 | \concept{Filter} 63 | \section{Super class}{ 64 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterUnivariateCox} 65 | } 66 | \section{Methods}{ 67 | \subsection{Public methods}{ 68 | \itemize{ 69 | \item \href{#method-FilterUnivariateCox-new}{\code{FilterUnivariateCox$new()}} 70 | \item \href{#method-FilterUnivariateCox-clone}{\code{FilterUnivariateCox$clone()}} 71 | } 72 | } 73 | \if{html}{\out{ 74 |
Inherited methods 75 | 81 |
82 | }} 83 | \if{html}{\out{
}} 84 | \if{html}{\out{}} 85 | \if{latex}{\out{\hypertarget{method-FilterUnivariateCox-new}{}}} 86 | \subsection{Method \code{new()}}{ 87 | Create a FilterUnivariateCox object. 88 | \subsection{Usage}{ 89 | \if{html}{\out{
}}\preformatted{FilterUnivariateCox$new()}\if{html}{\out{
}} 90 | } 91 | 92 | } 93 | \if{html}{\out{
}} 94 | \if{html}{\out{}} 95 | \if{latex}{\out{\hypertarget{method-FilterUnivariateCox-clone}{}}} 96 | \subsection{Method \code{clone()}}{ 97 | The objects of this class are cloneable with this method. 98 | \subsection{Usage}{ 99 | \if{html}{\out{
}}\preformatted{FilterUnivariateCox$clone(deep = FALSE)}\if{html}{\out{
}} 100 | } 101 | 102 | \subsection{Arguments}{ 103 | \if{html}{\out{
}} 104 | \describe{ 105 | \item{\code{deep}}{Whether to make a deep clone.} 106 | } 107 | \if{html}{\out{
}} 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /man/mlr_filters_variance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FilterVariance.R 3 | \name{mlr_filters_variance} 4 | \alias{mlr_filters_variance} 5 | \alias{FilterVariance} 6 | \title{Variance Filter} 7 | \description{ 8 | Variance filter calling \code{stats::var()}. 9 | 10 | Argument \code{na.rm} defaults to \code{TRUE} here. 11 | } 12 | \examples{ 13 | task = mlr3::tsk("mtcars") 14 | filter = flt("variance") 15 | filter$calculate(task) 16 | head(filter$scores, 3) 17 | as.data.table(filter) 18 | 19 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) { 20 | library("mlr3pipelines") 21 | task = mlr3::tsk("spam") 22 | 23 | # Note: `filter.frac` is selected randomly and should be tuned. 24 | 25 | graph = po("filter", filter = flt("variance"), filter.frac = 0.5) \%>>\% 26 | po("learner", mlr3::lrn("classif.rpart")) 27 | 28 | graph$train(task) 29 | } 30 | } 31 | \references{ 32 | For a benchmark of filter methods: 33 | 34 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). 35 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.} 36 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839. 37 | \doi{10.1016/j.csda.2019.106839}. 38 | } 39 | \seealso{ 40 | \itemize{ 41 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection. 42 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters} 43 | } 44 | 45 | Other Filter: 46 | \code{\link{Filter}}, 47 | \code{\link{mlr_filters}}, 48 | \code{\link{mlr_filters_anova}}, 49 | \code{\link{mlr_filters_auc}}, 50 | \code{\link{mlr_filters_boruta}}, 51 | \code{\link{mlr_filters_carscore}}, 52 | \code{\link{mlr_filters_carsurvscore}}, 53 | \code{\link{mlr_filters_cmim}}, 54 | \code{\link{mlr_filters_correlation}}, 55 | \code{\link{mlr_filters_disr}}, 56 | \code{\link{mlr_filters_find_correlation}}, 57 | \code{\link{mlr_filters_importance}}, 58 | \code{\link{mlr_filters_information_gain}}, 59 | \code{\link{mlr_filters_jmi}}, 60 | \code{\link{mlr_filters_jmim}}, 61 | \code{\link{mlr_filters_kruskal_test}}, 62 | \code{\link{mlr_filters_mim}}, 63 | \code{\link{mlr_filters_mrmr}}, 64 | \code{\link{mlr_filters_njmim}}, 65 | \code{\link{mlr_filters_performance}}, 66 | \code{\link{mlr_filters_permutation}}, 67 | \code{\link{mlr_filters_relief}}, 68 | \code{\link{mlr_filters_selected_features}}, 69 | \code{\link{mlr_filters_univariate_cox}} 70 | } 71 | \concept{Filter} 72 | \section{Super class}{ 73 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterVariance} 74 | } 75 | \section{Methods}{ 76 | \subsection{Public methods}{ 77 | \itemize{ 78 | \item \href{#method-FilterVariance-new}{\code{FilterVariance$new()}} 79 | \item \href{#method-FilterVariance-clone}{\code{FilterVariance$clone()}} 80 | } 81 | } 82 | \if{html}{\out{ 83 |
Inherited methods 84 | 90 |
91 | }} 92 | \if{html}{\out{
}} 93 | \if{html}{\out{}} 94 | \if{latex}{\out{\hypertarget{method-FilterVariance-new}{}}} 95 | \subsection{Method \code{new()}}{ 96 | Create a FilterVariance object. 97 | \subsection{Usage}{ 98 | \if{html}{\out{
}}\preformatted{FilterVariance$new()}\if{html}{\out{
}} 99 | } 100 | 101 | } 102 | \if{html}{\out{
}} 103 | \if{html}{\out{}} 104 | \if{latex}{\out{\hypertarget{method-FilterVariance-clone}{}}} 105 | \subsection{Method \code{clone()}}{ 106 | The objects of this class are cloneable with this method. 107 | \subsection{Usage}{ 108 | \if{html}{\out{
}}\preformatted{FilterVariance$clone(deep = FALSE)}\if{html}{\out{
}} 109 | } 110 | 111 | \subsection{Arguments}{ 112 | \if{html}{\out{
}} 113 | \describe{ 114 | \item{\code{deep}}{Whether to make a deep clone.} 115 | } 116 | \if{html}{\out{
}} 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{as.data.table} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{data.table}{\code{\link[data.table]{as.data.table}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /mlr3filters.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageCheckArgs: --no-tests 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://mlr3filters.mlr-org.com 2 | 3 | template: 4 | bootstrap: 5 5 | package: mlr3pkgdowntemplate 6 | 7 | development: 8 | mode: auto 9 | version_label: default 10 | version_tooltip: "Version" 11 | 12 | toc: 13 | depth: 3 14 | 15 | authors: 16 | Patrick Schratz: 17 | href: https://pat-s.me 18 | 19 | navbar: 20 | structure: 21 | left: [reference, news, book] 22 | right: [search, github, mattermost, stackoverflow, rss] 23 | components: 24 | home: ~ 25 | reference: 26 | icon: fa fa-file-alt 27 | text: Reference 28 | href: reference/index.html 29 | mattermost: 30 | icon: fa fa-comments 31 | href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ 32 | book: 33 | text: mlr3book 34 | icon: fa fa-link 35 | href: https://mlr3book.mlr-org.com 36 | stackoverflow: 37 | icon: fab fa-stack-overflow 38 | href: https://stackoverflow.com/questions/tagged/mlr3 39 | rss: 40 | icon: fa-rss 41 | href: https://mlr-org.com/ 42 | 43 | reference: 44 | - title: Filters 45 | contents: 46 | - starts_with("mlr_filters_") 47 | - title: General 48 | contents: 49 | - Filter 50 | - flt 51 | - mlr3filters-package 52 | - mlr_filters 53 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("testthat", quietly = TRUE)) { 2 | library("testthat") 3 | library("mlr3filters") 4 | test_check("mlr3filters") 5 | } 6 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | library(checkmate) 2 | library(mlr3) 3 | lapply(list.files(system.file("testthat", package = "mlr3"), 4 | pattern = "^helper.*\\.[rR]$", full.names = TRUE), source) 5 | 6 | expect_filter = function(f, task = NULL) { 7 | expect_r6(f, "Filter", 8 | public = c( 9 | "packages", "feature_types", "task_types", "param_set", "scores", 10 | "calculate") 11 | ) 12 | 13 | expect_character(f$packages, any.missing = FALSE, unique = TRUE) 14 | expect_subset(f$task_types, c(mlr_reflections$task_types$type, NA)) 15 | expect_subset(f$feature_types, mlr_reflections$task_feature_types) 16 | expect_class(f$param_set, "ParamSet") 17 | expect_function(f$calculate, args = c("task", "nfeat"), ordered = TRUE) 18 | expect_numeric(f$scores, names = "unique") 19 | if (!is.null(task)) { 20 | expect_names(names(f$scores), permutation.of = task$feature_names) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | old_opts = options( 2 | warnPartialMatchArgs = TRUE, 3 | warnPartialMatchAttr = TRUE, 4 | warnPartialMatchDollar = TRUE 5 | ) 6 | 7 | # https://github.com/HenrikBengtsson/Wishlist-for-R/issues/88 8 | old_opts = lapply(old_opts, function(x) if (is.null(x)) FALSE else x) 9 | 10 | lg = lgr::get_logger("mlr3") 11 | old_threshold = lg$threshold 12 | lg$set_threshold("warn") 13 | -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | options(old_opts) 2 | lg$set_threshold(old_threshold) 3 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterCorrelation.R: -------------------------------------------------------------------------------- 1 | test_that("FilterCorrelation handles features with only missings gracefully", { 2 | data = as.data.table(mtcars) 3 | data[, disp := NA] 4 | task = as_task_regr(data, target = "mpg") 5 | 6 | scores = flt("correlation")$calculate(task)$scores 7 | 8 | expect_numeric(scores) 9 | expect_true(is.na(scores["disp"])) 10 | expect_true(all(!is.na(scores[setdiff(names(scores), "disp")]))) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterFindCorrelation.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("caret") 2 | 3 | test_that("FilterFindCorrelation", { 4 | task = mlr3::mlr_tasks$get("sonar") 5 | equalcor = cbind( 6 | a = rep(c(1, 0, 0, 0), task$nrow / 4), b = c(0, 1, 0, 0), 7 | c = c(0, 0, 1, 0), d = c(0, 0, 0, 1), e = c(0.1, -0.1, 0.1, 0.99), 8 | f = c(-0.1, 0.1, 0.1, 0.99)) 9 | task$cbind(as.data.frame(equalcor)) 10 | data = task$data(cols = task$feature_names) 11 | cm = cor(data) 12 | checkpoints = (0:100) / 100 13 | remove_caret = lapply(checkpoints, caret::findCorrelation, x = cm, exact = FALSE) 14 | f = FilterFindCorrelation$new() 15 | f$calculate(task) 16 | remove_filter = lapply(checkpoints, function(cutoff) { 17 | match(names(f$scores)[f$scores < 1 - cutoff], task$feature_names) 18 | }) 19 | mapply(expect_set_equal, remove_caret, remove_filter) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterImportance.R: -------------------------------------------------------------------------------- 1 | test_that("FilterImportance", { 2 | set.seed(42) 3 | task = mlr3::mlr_tasks$get("wine") 4 | learner = mlr3::mlr_learners$get("classif.rpart") 5 | f = FilterImportance$new(learner = learner) 6 | f$calculate(task) 7 | expect_filter(f, task = task) 8 | }) 9 | 10 | test_that("task_types check", { 11 | task = mlr3::tsk("mtcars") 12 | filter = flt("importance", learner = mlr3::lrn("classif.featureless")) 13 | 14 | expect_error( 15 | filter$calculate(task), 16 | "type" 17 | ) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterInformationGain.R: -------------------------------------------------------------------------------- 1 | test_that("FilterInformationGain handles features with only missings gracefully", { 2 | data = tsk("mtcars")$data() 3 | data[, wt := NA] 4 | task = as_task_regr(data, target = "mpg") 5 | 6 | scores = flt("information_gain")$calculate(task)$scores 7 | 8 | expect_numeric(scores, any.missing = FALSE) 9 | expect_lte(scores["wt"], 1e-8) 10 | }) 11 | 12 | test_that("FilterInformationGain handles features with only missings gracefully", { 13 | data = tsk("iris")$data() 14 | data[, Sepal.Length := NA] 15 | task = as_task_classif(data, target = "Species") 16 | 17 | scores = flt("information_gain")$calculate(task)$scores 18 | 19 | expect_numeric(scores, any.missing = FALSE) 20 | expect_lte(scores["Sepal.Length"], 1e-8) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterKruskalTest.R: -------------------------------------------------------------------------------- 1 | test_that("FilterKruskalTest handles features with only missings gracefully", { 2 | data = tsk("spam")$data() 3 | data[, report := NA] 4 | data[1, report := 1] 5 | task = as_task_classif(data, target = "type") 6 | 7 | scores = flt("kruskal_test")$calculate(task)$scores 8 | 9 | expect_numeric(scores) 10 | expect_true(is.na(scores["disp"])) 11 | expect_true(all(!is.na(scores[setdiff(names(scores), "report")]))) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterPerformance.R: -------------------------------------------------------------------------------- 1 | test_that("FilterPerformance", { 2 | task = mlr3::mlr_tasks$get("iris") 3 | learner = mlr3::mlr_learners$get("classif.rpart") 4 | resampling = rsmp("holdout") 5 | f = flt("performance", learner = learner, resampling = resampling) 6 | 7 | expect_equal(f$measure$id, "classif.ce") 8 | f$calculate(task) 9 | expect_filter(f, task = task) 10 | expect_true(all(f$scores <= 0)) # default measure is classif.error 11 | 12 | f = flt("performance", learner = learner, resampling = resampling, 13 | measure = msr("classif.acc")) # change measure 14 | expect_equal(f$measure$id, "classif.acc") 15 | f$calculate(task) 16 | expect_filter(f, task = task) 17 | expect_true(all(f$scores >= 0)) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterPermutation.R: -------------------------------------------------------------------------------- 1 | test_that("FilterPermutation", { 2 | task = mlr3::mlr_tasks$get("iris") 3 | learner = mlr3::mlr_learners$get("classif.rpart") 4 | resampling = mlr3::rsmp("cv", folds = 2) 5 | f = flt("permutation", learner = learner, resampling = resampling, nmc = 3) 6 | 7 | f$calculate(task) 8 | expect_filter(f, task = task) 9 | }) 10 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterRelief.R: -------------------------------------------------------------------------------- 1 | test_that("FilterRelief handles features with only missings gracefully", { 2 | data = tsk("mtcars")$data() 3 | data[, wt := NA] 4 | task = as_task_regr(data, target = "mpg") 5 | 6 | scores = flt("relief")$calculate(task)$scores 7 | 8 | expect_numeric(scores, any.missing = FALSE) 9 | expect_lte(scores["wt"], 1e-8) 10 | }) 11 | 12 | test_that("FilterRelief handles features with only missings gracefully", { 13 | data = tsk("iris")$data() 14 | data[, Sepal.Length := NA] 15 | task = as_task_classif(data, target = "Species") 16 | 17 | scores = flt("relief")$calculate(task)$scores 18 | 19 | expect_numeric(scores, any.missing = FALSE) 20 | expect_lte(scores["Sepal.Length"], 1e-8) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterSelectedFeatures.R: -------------------------------------------------------------------------------- 1 | test_that("FilterSelectedFeatures", { 2 | set.seed(42) 3 | task = mlr3::mlr_tasks$get("wine") 4 | learner = mlr3::mlr_learners$get("classif.rpart") 5 | f = FilterSelectedFeatures$new(learner = learner) 6 | f$calculate(task) 7 | expect_filter(f, task = task) 8 | }) 9 | -------------------------------------------------------------------------------- /tests/testthat/test_FilterUnivariateCox.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("mlr3proba") 2 | 3 | test_that("FilterUnivariateCox", { 4 | t = tsk("rats") 5 | t2 = t$clone()$select(c("rx", "litter")) 6 | f = flt("univariate_cox") 7 | f$calculate(t2) 8 | 9 | # simple testing of filter scores 10 | expect_filter(f, task = t2) 11 | expect_true(all(f$scores >= 0)) 12 | 13 | # doesn't work with factors (feature: sex) 14 | expect_error(f$calculate(t), "unsupported feature types: factor") 15 | 16 | # encode sex as numeric so filter can be used 17 | dt = t$data() 18 | dt[, sex := ifelse(dt[["sex"]] == 'm', 1, 0)] 19 | t3 = mlr3proba::as_task_surv(dt, target = "time", event = "status") 20 | f$calculate(t3) 21 | score = f$scores[["sex"]] 22 | 23 | # get manually score on sex factor 24 | l = lrn("surv.coxph") 25 | t$col_roles$feature = "sex" 26 | l$train(t) 27 | manual_score = -log10(summary(l$model)$coefficients[,"Pr(>|z|)"]) 28 | 29 | # for 2-level factors, same result is returned if 0-1 encoded 30 | expect_equal(manual_score, score) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test_filter.R: -------------------------------------------------------------------------------- 1 | test_that("Filtering an empty Task (#39)", { 2 | task = mlr_tasks$get("mtcars") 3 | f = mlr_filters$get("variance") 4 | f$calculate(task) 5 | expect_numeric(f$scores, names = "unique") 6 | 7 | task = mlr_tasks$get("mtcars")$select(character()) 8 | f = mlr_filters$get("variance") 9 | f$calculate(task) 10 | expect_numeric(f$scores, names = "unique", len = 0) 11 | 12 | no_ids = task$row_ids[0] 13 | task = mlr_tasks$get("mtcars")$filter(no_ids) 14 | f = mlr_filters$get("variance") 15 | f$calculate(task) 16 | expect_numeric(f$scores, names = "unique", len = length(task$feature_names)) 17 | expect_true(allMissing(f$scores)) 18 | }) 19 | 20 | test_that("as.data.table conversion works", { 21 | task = mlr_tasks$get("sonar") 22 | filter = mlr_filters$get("auc") 23 | filter$calculate(task) 24 | 25 | expect_silent(as.data.table(filter)) 26 | }) 27 | 28 | test_that("mlr3sugar creation works", { 29 | expect_silent(flt("correlation", method = "kendall")) 30 | }) 31 | 32 | test_that("Assertion of task type works", { 33 | task = mlr_tasks$get("iris") 34 | f = mlr_filters$get("correlation") 35 | expect_error(f$calculate(task), regexp = "type") 36 | }) 37 | 38 | 39 | test_that("nfeat is passed to praznik correctly", { 40 | skip_if_not_installed("praznik") 41 | task = tsk("iris") 42 | f = flt("disr") 43 | f$calculate(task, nfeat = 1) 44 | expect_equal(sum(!is.na(f$scores)), 1) 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test_filter_boruta.R: -------------------------------------------------------------------------------- 1 | test_that("filter boruta works", { 2 | task = tsk("sonar") 3 | f = flt("boruta") 4 | f$calculate(task) 5 | expect_filter(f, task = task) 6 | }) 7 | -------------------------------------------------------------------------------- /tests/testthat/test_filter_classif.R: -------------------------------------------------------------------------------- 1 | test_that("all classif filters return correct filter values", { 2 | task = mlr_tasks$get("sonar") 3 | task$select(head(task$feature_names, 3)) 4 | filters = mlr_filters$mget(mlr_filters$keys()) 5 | filters$permutation$param_set$values = list(nmc = 2) 6 | 7 | for (f in filters) { 8 | if ("classif" %in% f$task_types && all(require_namespaces(f$packages, quietly = TRUE))) { 9 | f$calculate(task) 10 | expect_filter(f, task = task) 11 | } 12 | } 13 | }) 14 | 15 | 16 | test_that("filters throw errors on missing values", { 17 | task = tsk("sonar") 18 | data = task$data(cols = c(task$target_names, head(task$feature_names, 3))) 19 | data$V1[1] = NA 20 | task = as_task_classif(data, target = task$target_names) 21 | 22 | filters = mlr_filters$mget(mlr_filters$keys()) 23 | 24 | for (f in filters) { 25 | if ("classif" %nin% f$task_types) { 26 | next 27 | } 28 | 29 | if (!all(require_namespaces(f$packages, quietly = TRUE))) { 30 | next 31 | } 32 | 33 | if ("missings" %in% f$properties) { 34 | f$calculate(task) 35 | } else { 36 | expect_error(f$calculate(task), "missing values") 37 | } 38 | } 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test_filter_generic.R: -------------------------------------------------------------------------------- 1 | test_that("all generic filters return correct filter values", { 2 | task = mlr_tasks$get("mtcars") 3 | filters = mlr_filters$mget(mlr_filters$keys()) 4 | 5 | for (f in filters) { 6 | if (NA %in% f$task_types && all(require_namespaces(f$packages, quietly = TRUE))) { 7 | f$calculate(task) 8 | expect_filter(f, task = task) 9 | } 10 | } 11 | }) 12 | 13 | test_that("filters throw errors on missing values", { 14 | data = tsk("mtcars")$data() 15 | data$cyl[1] = NA 16 | task = as_task_regr(data, target = "mpg") 17 | 18 | filters = mlr_filters$mget(mlr_filters$keys()) 19 | 20 | for (f in filters) { 21 | if (!is_scalar_na(f$task_types)) { 22 | next 23 | } 24 | 25 | if (!all(require_namespaces(f$packages, quietly = TRUE))) { 26 | next 27 | } 28 | 29 | if ("missings" %in% f$properties) { 30 | f$calculate(task) 31 | } else { 32 | expect_error(f$calculate(task), "missing values") 33 | } 34 | } 35 | }) 36 | -------------------------------------------------------------------------------- /tests/testthat/test_filter_regr.R: -------------------------------------------------------------------------------- 1 | test_that("all regr filters return correct filter values", { 2 | task = mlr_tasks$get("mtcars") 3 | filters = mlr_filters$mget(mlr_filters$keys()) 4 | 5 | for (f in filters) { 6 | if ("regr" %in% f$task_types && all(require_namespaces(f$packages, quietly = TRUE))) { 7 | f$calculate(task) 8 | expect_filter(f, task = task) 9 | } 10 | } 11 | }) 12 | 13 | test_that("filters throw errors on missing values", { 14 | data = tsk("mtcars")$data() 15 | data$cyl[1] = NA 16 | task = as_task_regr(data, target = "mpg") 17 | 18 | filters = mlr_filters$mget(mlr_filters$keys()) 19 | 20 | for (f in filters) { 21 | if ("regr" %nin% f$task_types) { 22 | next 23 | } 24 | 25 | if (!all(require_namespaces(f$packages, quietly = TRUE))) { 26 | next 27 | } 28 | 29 | if ("missings" %in% f$properties) { 30 | f$calculate(task) 31 | } else { 32 | expect_error(f$calculate(task), "missing values") 33 | } 34 | } 35 | }) 36 | 37 | test_that("Errors for unsupported features", { 38 | skip_if("california_housing" %nin% mlr_tasks$keys()) 39 | task = tsk("california_housing") 40 | filters = mlr_filters$mget(mlr_filters$keys()) 41 | 42 | # supported: numeric, integer 43 | # supplied: factor, integer, numeric 44 | for (f in filters) { 45 | if ("factor" %nin% f$feature_types && all(require_namespaces(f$packages, quietly = TRUE))) { 46 | expect_error(f$calculate(task)) 47 | } 48 | } 49 | }) 50 | 51 | -------------------------------------------------------------------------------- /tests/testthat/test_filter_surv.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("mlr3proba") 2 | 3 | test_that("mlr3proba learners work", { 4 | requireNamespace("mlr3proba") 5 | 6 | # needs to be fixed in mlr3proba 7 | withr::local_options(warnPartialMatchDollar = FALSE, warnPartialMatchArgs = FALSE, warnPartialMatchAttr = FALSE) 8 | 9 | task = tsk("rats") 10 | learner = lrn("surv.rpart") 11 | resampling = rsmp("holdout") 12 | 13 | f = flt("performance", learner = learner, resampling = resampling) 14 | f$calculate(task) 15 | 16 | expect_filter(f, task = task) 17 | }) 18 | 19 | test_that("filters throw errors on missing values", { 20 | task = tsk("rats")$select(c("litter", "rx")) 21 | data = task$data() 22 | data$litter[1] = NA 23 | task = mlr3proba::as_task_surv(data, target = "time", event = "status") 24 | 25 | filters = mlr_filters$mget(mlr_filters$keys()) 26 | 27 | for (f in filters) { 28 | if ("surv" %nin% f$task_types) { 29 | next 30 | } 31 | 32 | if (!all(require_namespaces(f$packages, quietly = TRUE))) { 33 | next 34 | } 35 | 36 | if ("missings" %in% f$properties) { 37 | f$calculate(task) 38 | } else { 39 | expect_error(f$calculate(task), "missing values") 40 | } 41 | } 42 | }) 43 | 44 | 45 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr3spatiotempcv.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("mlr3spatiotempcv") 2 | skip_on_cran() 3 | 4 | test_that("task detection works with mlr3spatiotempcv tasks", { 5 | pkg = "mlr3spatiotempcv" 6 | library(pkg, character.only = TRUE) # FIXME: replace with requireNamespace() 7 | task = tsk("ecuador") 8 | learner = lrn("classif.rpart") 9 | 10 | filter = flt("importance", learner = learner) 11 | expect_filter(filter$calculate(task)) 12 | 13 | filter = flt("variance") 14 | expect_filter(filter$calculate(task)) 15 | 16 | filter = flt("mim") 17 | expect_filter(filter$calculate(task)) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/test_mlr_filters.R: -------------------------------------------------------------------------------- 1 | test_that("mlr_filters", { 2 | expect_dictionary(mlr_filters, min_items = 1) 3 | }) 4 | -------------------------------------------------------------------------------- /tests/testthat/test_partial_scoring.R: -------------------------------------------------------------------------------- 1 | skip_if_not_installed("praznik") 2 | 3 | test_that("praznik 'nfeat' argument works correctly", { 4 | task = mlr_tasks$get("mtcars") 5 | filters = mlr_filters$mget(as.data.table(mlr_filters)[map_lgl(packages, 6 | is.element, 7 | el = "praznik"), key]) 8 | nfeat = 3 9 | 10 | for (f in filters) { 11 | f$calculate(task, nfeat = nfeat) 12 | expect_equal(sum(!is.na(f$scores)), nfeat) 13 | expect_filter(f, task = task) 14 | } 15 | }) 16 | --------------------------------------------------------------------------------