├── .Rbuildignore
├── .editorconfig
├── .github
└── workflows
│ ├── dev-cmd-check.yml
│ ├── pkgdown.yml
│ └── r-cmd-check.yml
├── .gitignore
├── .ignore
├── .lintr
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
├── Filter.R
├── FilterAUC.R
├── FilterAnova.R
├── FilterBoruta.R
├── FilterCMIM.R
├── FilterCarScore.R
├── FilterCarSurvScore.R
├── FilterCorrelation.R
├── FilterDISR.R
├── FilterFindCorrelation.R
├── FilterImportance.R
├── FilterInformationGain.R
├── FilterJMI.R
├── FilterJMIM.R
├── FilterKruskalTest.R
├── FilterLearner.R
├── FilterMIM.R
├── FilterMRMR.R
├── FilterNJMIM.R
├── FilterPerformance.R
├── FilterPermutation.R
├── FilterRelief.R
├── FilterSelectedFeatures.R
├── FilterUnivariateCox.R
├── FilterVariance.R
├── bibentries.R
├── flt.R
├── helper.R
├── mlr_filters.R
├── reexports.R
└── zzz.R
├── README.Rmd
├── README.md
├── man-roxygen
├── details_praznik.R
└── seealso_filter.R
├── man
├── Filter.Rd
├── figures
│ ├── logo.png
│ └── logo_navbar.png
├── flt.Rd
├── mlr3filters-package.Rd
├── mlr_filters.Rd
├── mlr_filters_anova.Rd
├── mlr_filters_auc.Rd
├── mlr_filters_boruta.Rd
├── mlr_filters_carscore.Rd
├── mlr_filters_carsurvscore.Rd
├── mlr_filters_cmim.Rd
├── mlr_filters_correlation.Rd
├── mlr_filters_disr.Rd
├── mlr_filters_find_correlation.Rd
├── mlr_filters_importance.Rd
├── mlr_filters_information_gain.Rd
├── mlr_filters_jmi.Rd
├── mlr_filters_jmim.Rd
├── mlr_filters_kruskal_test.Rd
├── mlr_filters_mim.Rd
├── mlr_filters_mrmr.Rd
├── mlr_filters_njmim.Rd
├── mlr_filters_performance.Rd
├── mlr_filters_permutation.Rd
├── mlr_filters_relief.Rd
├── mlr_filters_selected_features.Rd
├── mlr_filters_univariate_cox.Rd
├── mlr_filters_variance.Rd
└── reexports.Rd
├── mlr3filters.Rproj
├── pkgdown
├── _pkgdown.yml
└── favicon
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ └── favicon.ico
└── tests
├── testthat.R
└── testthat
├── helper.R
├── setup.R
├── teardown.R
├── test_FilterCorrelation.R
├── test_FilterFindCorrelation.R
├── test_FilterImportance.R
├── test_FilterInformationGain.R
├── test_FilterKruskalTest.R
├── test_FilterPerformance.R
├── test_FilterPermutation.R
├── test_FilterRelief.R
├── test_FilterSelectedFeatures.R
├── test_FilterUnivariateCox.R
├── test_filter.R
├── test_filter_boruta.R
├── test_filter_classif.R
├── test_filter_generic.R
├── test_filter_regr.R
├── test_filter_surv.R
├── test_mlr3spatiotempcv.R
├── test_mlr_filters.R
└── test_partial_scoring.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^README\.Rmd$
2 | ^LICENSE$
3 | ^\.github$
4 | ^.*\.Rproj$
5 | ^\.Rproj\.user$
6 | ^\.editorconfig$
7 | ^\.ignore$
8 | ^docs$
9 | ^pkgdown$
10 | ^man-roxygen$
11 | ^cran-comments.md
12 | ^\.ccache$
13 | ^codemeta\.json$
14 | ^revdep$
15 | ^\.vscode$
16 | ^\.lintr$
17 | ^cran-comments\.md$
18 | ^CRAN-SUBMISSION$
19 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | # See http://editorconfig.org
2 | root = true
3 |
4 | [*]
5 | charset = utf-8
6 | end_of_line = lf
7 | insert_final_newline = true
8 | indent_style = space
9 | trim_trailing_whitespace = true
10 |
11 | [*.{r,R,md,Rmd}]
12 | indent_size = 2
13 |
14 | [*.{c,h}]
15 | indent_size = 4
16 |
17 | [*.{cpp,hpp}]
18 | indent_size = 4
19 |
20 | [{NEWS.md,DESCRIPTION,LICENSE}]
21 | max_line_length = 80
22 |
--------------------------------------------------------------------------------
/.github/workflows/dev-cmd-check.yml:
--------------------------------------------------------------------------------
1 | # dev cmd check workflow of the mlr3 ecosystem v0.1.0
2 | # https://github.com/mlr-org/actions
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | name: dev-check
13 |
14 | jobs:
15 | check-package:
16 | runs-on: ${{ matrix.config.os }}
17 |
18 | name: ${{ matrix.config.dev-package }}
19 |
20 | env:
21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |
23 | strategy:
24 | fail-fast: false
25 | matrix:
26 | config:
27 | - {os: ubuntu-latest, r: 'release', dev-package: "mlr-org/paradox', 'mlr-org/mlr3learners', 'mlr-org/mlr3pipelines"}
28 |
29 | steps:
30 | - uses: actions/checkout@v3
31 |
32 | - uses: r-lib/actions/setup-r@v2
33 | with:
34 | r-version: ${{ matrix.config.r }}
35 |
36 | - uses: r-lib/actions/setup-r-dependencies@v2
37 | with:
38 | extra-packages: any::rcmdcheck
39 | needs: check
40 |
41 | - name: Install dev versions
42 | run: pak::pkg_install(c('${{ matrix.config.dev-package }}'))
43 | shell: Rscript {0}
44 |
45 | - uses: r-lib/actions/check-r-package@v2
46 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yml:
--------------------------------------------------------------------------------
1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0
2 | # https://github.com/mlr-org/actions
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 | release:
11 | types:
12 | - published
13 | workflow_dispatch:
14 |
15 | name: pkgdown
16 |
17 | jobs:
18 | pkgdown:
19 | runs-on: ubuntu-latest
20 |
21 | concurrency:
22 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
23 | env:
24 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25 | steps:
26 | - uses: actions/checkout@v3
27 |
28 | - uses: r-lib/actions/setup-pandoc@v2
29 |
30 | - uses: r-lib/actions/setup-r@v2
31 |
32 | - uses: r-lib/actions/setup-r-dependencies@v2
33 | with:
34 | extra-packages: any::pkgdown, local::.
35 | needs: website
36 |
37 | - name: Install template
38 | run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate")
39 | shell: Rscript {0}
40 |
41 | - name: Build site
42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
43 | shell: Rscript {0}
44 |
45 | - name: Deploy
46 | if: github.event_name != 'pull_request'
47 | uses: JamesIves/github-pages-deploy-action@v4.4.1
48 | with:
49 | clean: false
50 | branch: gh-pages
51 | folder: docs
52 |
--------------------------------------------------------------------------------
/.github/workflows/r-cmd-check.yml:
--------------------------------------------------------------------------------
1 | # r cmd check workflow of the mlr3 ecosystem v0.1.0
2 | # https://github.com/mlr-org/actions
3 | on:
4 | workflow_dispatch:
5 | push:
6 | branches:
7 | - main
8 | pull_request:
9 | branches:
10 | - main
11 |
12 | name: r-cmd-check
13 |
14 | jobs:
15 | r-cmd-check:
16 | runs-on: ${{ matrix.config.os }}
17 |
18 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
19 |
20 | env:
21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |
23 | strategy:
24 | fail-fast: false
25 | matrix:
26 | config:
27 | - {os: ubuntu-latest, r: 'devel'}
28 | - {os: ubuntu-latest, r: 'release'}
29 |
30 | steps:
31 | - uses: actions/checkout@v3
32 |
33 | - uses: r-lib/actions/setup-r@v2
34 | with:
35 | r-version: ${{ matrix.config.r }}
36 |
37 | - uses: r-lib/actions/setup-r-dependencies@v2
38 | with:
39 | extra-packages: any::rcmdcheck
40 | needs: check
41 |
42 | - uses: r-lib/actions/check-r-package@v2
43 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
2 | # Created by https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux
3 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows,visualstudiocode,r,macos,linux
4 |
5 | ### Linux ###
6 | *~
7 |
8 | # temporary files which can be created if a process still has a handle open of a deleted file
9 | .fuse_hidden*
10 |
11 | # KDE directory preferences
12 | .directory
13 |
14 | # Linux trash folder which might appear on any partition or disk
15 | .Trash-*
16 |
17 | # .nfs files are created when an open file is removed but is still being accessed
18 | .nfs*
19 |
20 | ### macOS ###
21 | # General
22 | .DS_Store
23 | .AppleDouble
24 | .LSOverride
25 |
26 | # Icon must end with two \r
27 | Icon
28 |
29 |
30 | # Thumbnails
31 | ._*
32 |
33 | # Files that might appear in the root of a volume
34 | .DocumentRevisions-V100
35 | .fseventsd
36 | .Spotlight-V100
37 | .TemporaryItems
38 | .Trashes
39 | .VolumeIcon.icns
40 | .com.apple.timemachine.donotpresent
41 |
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 |
49 | ### macOS Patch ###
50 | # iCloud generated files
51 | *.icloud
52 |
53 | ### R ###
54 | # History files
55 | .Rhistory
56 | .Rapp.history
57 |
58 | # Session Data files
59 | .RData
60 | .RDataTmp
61 |
62 | # User-specific files
63 | .Ruserdata
64 |
65 | # Example code in package build process
66 | *-Ex.R
67 |
68 | # Output files from R CMD build
69 | /*.tar.gz
70 |
71 | # Output files from R CMD check
72 | /*.Rcheck/
73 |
74 | # RStudio files
75 | .Rproj.user/
76 |
77 | # produced vignettes
78 | vignettes/*.html
79 | vignettes/*.pdf
80 |
81 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
82 | .httr-oauth
83 |
84 | # knitr and R markdown default cache directories
85 | *_cache/
86 | /cache/
87 |
88 | # Temporary files created by R markdown
89 | *.utf8.md
90 | *.knit.md
91 |
92 | # R Environment Variables
93 | .Renviron
94 |
95 | # pkgdown site
96 | docs/
97 |
98 | # translation temp files
99 | po/*~
100 |
101 | # RStudio Connect folder
102 | rsconnect/
103 |
104 | ### R.Bookdown Stack ###
105 | # R package: bookdown caching files
106 | /*_files/
107 |
108 | ### VisualStudioCode ###
109 | .vscode/*
110 | !.vscode/settings.json
111 | !.vscode/tasks.json
112 | !.vscode/launch.json
113 | !.vscode/extensions.json
114 | !.vscode/*.code-snippets
115 |
116 | # Local History for Visual Studio Code
117 | .history/
118 |
119 | # Built Visual Studio Code Extensions
120 | *.vsix
121 |
122 | ### VisualStudioCode Patch ###
123 | # Ignore all local history of files
124 | .history
125 | .ionide
126 |
127 | ### Windows ###
128 | # Windows thumbnail cache files
129 | Thumbs.db
130 | Thumbs.db:encryptable
131 | ehthumbs.db
132 | ehthumbs_vista.db
133 |
134 | # Dump file
135 | *.stackdump
136 |
137 | # Folder config file
138 | [Dd]esktop.ini
139 |
140 | # Recycle Bin used on file shares
141 | $RECYCLE.BIN/
142 |
143 | # Windows Installer files
144 | *.cab
145 | *.msi
146 | *.msix
147 | *.msm
148 | *.msp
149 |
150 | # Windows shortcuts
151 | *.lnk
152 |
153 | # End of https://www.toptal.com/developers/gitignore/api/windows,visualstudiocode,r,macos,linux
154 |
155 | # Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
156 |
157 | # R
158 | .Rprofile
159 | README.html
160 | src/*.o
161 | src/*.so
162 | src/*.dll
163 |
164 | # CRAN
165 | cran-comments.md
166 | CRAN-RELEASE
167 | CRAN-SUBMISSION
168 |
169 | # pkgdown
170 | docs/
171 |
172 | # renv
173 | renv/
174 | renv.lock
175 |
176 | # vscode
177 | .vscode
178 |
179 | # revdep
180 | revdep/
181 |
182 | # misc
183 | Meta/
184 | attic/
185 | inst/docd
186 |
--------------------------------------------------------------------------------
/.ignore:
--------------------------------------------------------------------------------
1 | man/
2 | docs/
3 | attic/
4 | pkgdown/
5 |
--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 | # lintr defaults: https://github.com/jimhester/lintr#available-linters
3 | # the following setup changes/removes certain linters
4 | assignment_linter = NULL, # do not force using <- for assignments
5 | object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names
6 | cyclocomp_linter = NULL, # do not check function complexity
7 | commented_code_linter = NULL, # allow code in comments
8 | line_length_linter = line_length_linter(120)
9 | )
10 |
11 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: mlr3filters
2 | Title: Filter Based Feature Selection for 'mlr3'
3 | Version: 0.8.1.9000
4 | Authors@R: c(
5 | person("Marc", "Becker", , "marcbecker@posteo.de", role = c("cre", "aut"),
6 | comment = c(ORCID = "0000-0002-8115-0400")),
7 | person("Patrick", "Schratz", , "patrick.schratz@gmail.com", role = "aut",
8 | comment = c(ORCID = "0000-0003-0748-6624")),
9 | person("Michel", "Lang", , "michellang@gmail.com", role = "aut",
10 | comment = c(ORCID = "0000-0001-9754-0393")),
11 | person("Bernd", "Bischl", , "bernd_bischl@gmx.net", role = "aut",
12 | comment = c(ORCID = "0000-0001-6002-6980")),
13 | person("Martin", "Binder", , "mlr.developer@mb706.com", role = "aut"),
14 | person("John", "Zobolas", , "bblodfon@gmail.com", role = "aut",
15 | comment = c(ORCID = "0000-0002-3609-8674"))
16 | )
17 | Description: Extends 'mlr3' with filter methods for feature selection.
18 | Besides standalone filter methods built-in methods of any
19 | machine-learning algorithm are supported. Partial scoring of
20 | multivariate filter methods is supported.
21 | License: LGPL-3
22 | URL: https://mlr3filters.mlr-org.com,
23 | https://github.com/mlr-org/mlr3filters
24 | BugReports: https://github.com/mlr-org/mlr3filters/issues
25 | Depends:
26 | R (>= 3.1.0)
27 | Imports:
28 | backports,
29 | checkmate,
30 | data.table,
31 | mlr3 (>= 0.12.0),
32 | mlr3misc,
33 | paradox,
34 | R6
35 | Suggests:
36 | Boruta,
37 | care,
38 | caret,
39 | carSurv,
40 | FSelectorRcpp,
41 | knitr,
42 | lgr,
43 | mlr3learners,
44 | mlr3measures,
45 | mlr3pipelines,
46 | praznik,
47 | rpart,
48 | survival,
49 | testthat (>= 3.0.0),
50 | withr
51 | Config/testthat/edition: 3
52 | Encoding: UTF-8
53 | NeedsCompilation: no
54 | Roxygen: list(markdown = TRUE, r6 = TRUE)
55 | RoxygenNote: 7.3.2
56 | Collate:
57 | 'Filter.R'
58 | 'mlr_filters.R'
59 | 'FilterAUC.R'
60 | 'FilterAnova.R'
61 | 'FilterBoruta.R'
62 | 'FilterCMIM.R'
63 | 'FilterCarScore.R'
64 | 'FilterCarSurvScore.R'
65 | 'FilterCorrelation.R'
66 | 'FilterDISR.R'
67 | 'FilterFindCorrelation.R'
68 | 'FilterLearner.R'
69 | 'FilterImportance.R'
70 | 'FilterInformationGain.R'
71 | 'FilterJMI.R'
72 | 'FilterJMIM.R'
73 | 'FilterKruskalTest.R'
74 | 'FilterMIM.R'
75 | 'FilterMRMR.R'
76 | 'FilterNJMIM.R'
77 | 'FilterPerformance.R'
78 | 'FilterPermutation.R'
79 | 'FilterRelief.R'
80 | 'FilterSelectedFeatures.R'
81 | 'FilterUnivariateCox.R'
82 | 'FilterVariance.R'
83 | 'bibentries.R'
84 | 'flt.R'
85 | 'helper.R'
86 | 'reexports.R'
87 | 'zzz.R'
88 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(as.data.table,DictionaryFilter)
4 | S3method(as.data.table,Filter)
5 | export(Filter)
6 | export(FilterAUC)
7 | export(FilterAnova)
8 | export(FilterBoruta)
9 | export(FilterCMIM)
10 | export(FilterCarScore)
11 | export(FilterCarSurvScore)
12 | export(FilterCorrelation)
13 | export(FilterDISR)
14 | export(FilterFindCorrelation)
15 | export(FilterImportance)
16 | export(FilterInformationGain)
17 | export(FilterJMI)
18 | export(FilterJMIM)
19 | export(FilterKruskalTest)
20 | export(FilterMIM)
21 | export(FilterMRMR)
22 | export(FilterNJMIM)
23 | export(FilterPerformance)
24 | export(FilterPermutation)
25 | export(FilterRelief)
26 | export(FilterSelectedFeatures)
27 | export(FilterUnivariateCox)
28 | export(FilterVariance)
29 | export(as.data.table)
30 | export(flt)
31 | export(flts)
32 | export(mlr_filters)
33 | import(checkmate)
34 | import(data.table)
35 | import(mlr3)
36 | import(mlr3misc)
37 | import(paradox)
38 | importFrom(R6,R6Class)
39 | importFrom(data.table,as.data.table)
40 | importFrom(stats,aov)
41 | importFrom(stats,kruskal.test)
42 | importFrom(stats,runif)
43 | importFrom(stats,var)
44 | importFrom(utils,bibentry)
45 | importFrom(utils,head)
46 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # mlr3filters (development version)
2 |
3 | # mlr3filters 0.8.1
4 |
5 | * compatibility: mlr3 0.22.0
6 |
7 | # mlr3filters 0.8.0
8 |
9 | * Added `FilterBoruta`
10 | * Fixed issue with `FilterPerformance` where the arg `measure` wasn't passed on
11 | * Added `FilterUnivariateCox` (thanks to @bblodfon)
12 | * Parameter value `na.rm` is properly initialized to `TRUE` (thanks to @bblodfon)
13 | * Bugfix: property `missings` is now set correctly for `FilterFindCorrelation`
14 | * Bugfix: `$hash` now works for `Filter`s
15 |
16 | # mlr3filters 0.7.1
17 |
18 | * Tagged multiple filters to be able of gracefully handling missing values.
19 | * Added more supported feature types to FilterCarScore.
20 | * Improved documentation.
21 |
22 | # mlr3filters 0.7.0
23 |
24 | * Features are now checked for missing values to improve error messages (#140)
25 | * Removed deprecated functions
26 | * Use featureless learner in defaults (#124)
27 | * Field `task_type` of class `Filter` has been renamed to `task_types`.
28 |
29 | # mlr3filters 0.6.0
30 |
31 | * Add `FilterCarSurvScore` (#120, @mllg)
32 | * Use featureless learner instead of rpart as default learner for `FilterImportance` and `FilterPerformance` (#124)
33 | * Add documentation for PipeOpFilter
34 | * Add mlr3pipelines examples to help pages (#135, @sebffischer)
35 | * Add `label` arg to `Filter` class (#121, @mllg)
36 |
37 | # mlr3filters 0.5.0
38 |
39 | * Add references to benchmark paper and praznik paper (#104)
40 | * New filter `FilterSelectedFeatures` which makes use of embedded feature selection methods of learners.
41 | See the help page for more details (#102)
42 | * Allow `NA` as task type.
43 | This makes it possible to use other tasks than `"regr"` or `"classif"` for certain filters, e.g. `FilterVariance` (#106)
44 |
45 |
46 | # mlr3filters 0.4.2
47 |
48 | * Fixes an issue where argument `nfeat` was not passed down to {praznik} filters (#97)
49 |
50 |
51 | # mlr3filters 0.4.1
52 |
53 | * Disable threading in praznik filters by default (5f24742e9b92f6a5f828c4f755be3fb53427afdb, @mllg)
54 | Enable by setting hyperparameter `threads` >= 2 or to `0` for auto-detection of available cores (#93, @mllg)
55 | * Document return type of private `.calculate()` (#92, @mllg)
56 | * Allow `NA` in returned vectors.
57 | Features with missing values as well as features with no calculated score are automatically ranked last, in a random order. (#92, @mllg)
58 | * praznik filters now also support `regr` Tasks (#90, @bommert)
59 |
60 |
61 | # mlr3filters 0.4.0
62 |
63 | * Add ReliefF filter (#86)
64 | * Fix praznik scores calculation: praznik filters are not monotone in the selected features due to their iterative fashion. E.g., the first selected feature can have a score of 5, the second selected feature a score of 10. This version replaces the praznik scores by a simple sequence (#87, @mllg)
65 |
66 |
67 | # mlr3filters 0.3.0
68 |
69 | * Add Permutation (#70)
70 | * Add `flts()` (#77)
71 | * Github Actions: set cron job to 4am to avoid potential download issues with R-devel on macOS
72 | * Filters now have a help method `$help()` which opens the respective help page (#68)
73 |
74 |
75 | # mlr3filters 0.2.0
76 |
77 | ## Internal
78 |
79 | * Use `private$.calculate` instead of public "calculate" method for Filters
80 | * switch from Travis to GitHub Actions
81 | * Use Roxygen R6 notation for docs
82 |
83 | ## Enhancements
84 |
85 | * new filter `FilterFindCorrelation` (#62, @mb706)
86 |
87 |
88 | # mlr3filters 0.1.1
89 |
90 | * Replace dependency `Metrics` with `mlr3measures`.
91 |
92 |
93 | # mlr3filters 0.1.0
94 |
95 | * Initial CRAN release.
96 |
--------------------------------------------------------------------------------
/R/FilterAUC.R:
--------------------------------------------------------------------------------
1 | #' @title AUC Filter
2 | #'
3 | #' @name mlr_filters_auc
4 | #'
5 | #' @description
6 | #' Area under the (ROC) Curve filter, analogously to [mlr3measures::auc()] from
7 | #' \CRANpkg{mlr3measures}. Missing values of the features are removed before
8 | #' calculating the AUC. If the AUC is undefined for the input, it is set to 0.5
9 | #' (random classifier). The absolute value of the difference between the AUC and
10 | #' 0.5 is used as final filter value.
11 | #'
12 | #' @references
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @family Filter
18 | #' @include Filter.R
19 | #' @template seealso_filter
20 | #' @export
21 | #' @examples
22 | #' task = mlr3::tsk("sonar")
23 | #' filter = flt("auc")
24 | #' filter$calculate(task)
25 | #' head(as.data.table(filter), 3)
26 | #'
27 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
28 | #' library("mlr3pipelines")
29 | #' task = mlr3::tsk("spam")
30 | #'
31 | #' # Note: `filter.frac` is selected randomly and should be tuned.
32 | #'
33 | #' graph = po("filter", filter = flt("auc"), filter.frac = 0.5) %>>%
34 | #' po("learner", mlr3::lrn("classif.rpart"))
35 | #'
36 | #' graph$train(task)
37 | #' }
38 | FilterAUC = R6Class("FilterAUC",
39 | inherit = Filter,
40 |
41 | public = list(
42 |
43 | #' @description Create a FilterAUC object.
44 | initialize = function() {
45 | super$initialize(
46 | id = "auc",
47 | task_types = "classif",
48 | task_properties = "twoclass",
49 | feature_types = c("integer", "numeric"),
50 | packages = "mlr3measures",
51 | label = "Area Under the ROC Curve Score",
52 | man = "mlr3filters::mlr_filters_auc"
53 | )
54 | }
55 | ),
56 |
57 | private = list(
58 | .calculate = function(task, nfeat) {
59 | y = task$truth() == task$positive
60 | x = task$data(cols = task$feature_names)
61 | score = map_dbl(x, function(x) {
62 | keep = !is.na(x)
63 | auc(y[keep], x[keep])
64 | })
65 | abs(0.5 - score)
66 | }
67 | )
68 | )
69 |
70 | #' @include mlr_filters.R
71 | mlr_filters$add("auc", FilterAUC)
72 |
73 |
74 | auc = function(truth, prob) {
75 | n_pos = sum(truth)
76 | n_neg = length(truth) - n_pos
77 | if (n_pos == 0L || n_neg == 0L) {
78 | return(0.5) # nocov
79 | }
80 | r = rank(prob, ties.method = "average")
81 | (sum(r[truth]) - n_pos * (n_pos + 1L) / 2L) / (n_pos * n_neg)
82 | }
83 |
--------------------------------------------------------------------------------
/R/FilterAnova.R:
--------------------------------------------------------------------------------
1 | #' @title ANOVA F-Test Filter
2 | #'
3 | #' @name mlr_filters_anova
4 | #'
5 | #' @description ANOVA F-Test filter calling [stats::aov()]. Note that this is
6 | #' equivalent to a \eqn{t}-test for binary classification.
7 | #'
8 | #' The filter value is `-log10(p)` where `p` is the \eqn{p}-value. This
9 | #' transformation is necessary to ensure numerical stability for very small
10 | #' \eqn{p}-values.
11 | #'
12 | #' @references
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @family Filter
18 | #' @include Filter.R
19 | #' @importFrom stats aov
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' task = mlr3::tsk("iris")
24 | #' filter = flt("anova")
25 | #' filter$calculate(task)
26 | #' head(as.data.table(filter), 3)
27 | #'
28 | #' # transform to p-value
29 | #' 10^(-filter$scores)
30 | #'
31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
32 | #' library("mlr3pipelines")
33 | #' task = mlr3::tsk("spam")
34 | #'
35 | #' # Note: `filter.frac` is selected randomly and should be tuned.
36 | #'
37 | #' graph = po("filter", filter = flt("anova"), filter.frac = 0.5) %>>%
38 | #' po("learner", mlr3::lrn("classif.rpart"))
39 | #'
40 | #' graph$train(task)
41 | #' }
42 | FilterAnova = R6Class("FilterAnova",
43 | inherit = Filter,
44 |
45 | public = list(
46 |
47 | #' @description Create a FilterAnova object.
48 | initialize = function() {
49 | super$initialize(
50 | id = "anova",
51 | packages = "stats",
52 | feature_types = c("integer", "numeric"),
53 | task_types = "classif",
54 | label = "ANOVA F-Test",
55 | man = "mlr3filters::mlr_filters_anova"
56 | )
57 | }
58 | ),
59 |
60 | private = list(
61 | .calculate = function(task, nfeat) {
62 | data = task$data()
63 | target = task$target_names
64 | features = task$feature_names
65 | p = map_dbl(features, function(fn) {
66 | f = formulate(fn, target)
67 | summary(aov(f, data = data))[[1L]][1L, "Pr(>F)"]
68 | })
69 | set_names(-log10(p), features)
70 | }
71 | )
72 | )
73 |
74 | #' @include mlr_filters.R
75 | mlr_filters$add("anova", FilterAnova)
76 |
--------------------------------------------------------------------------------
/R/FilterBoruta.R:
--------------------------------------------------------------------------------
1 | #' @title Burota Filter
2 | #'
3 | #' @name mlr_filters_boruta
4 | #'
5 | #' @description
6 | #' Filter using the Boruta algorithm for feature selection.
7 | #' If `keep = "tentative"`, confirmed and tentative features are returned.
8 | #' Note that there is no ordering in the selected features.
9 | #' Selected features get a score of 1, deselected features get a score of 0.
10 | #' The order of selected features is random.
11 | #' In combination with \CRANpkg{mlr3pipelines}, only the filter criterion `cutoff` makes sense.
12 | #'
13 | #' @section Initial parameter values:
14 | #' - `num.threads`:
15 | #' - Actual default: `NULL`, triggering auto-detection of the number of CPUs.
16 | #' - Adjusted value: 1.
17 | #' - Reason for change: Conflicting with parallelization via \CRANpkg{future}.
18 | #'
19 | #' @references
20 | #' `r format_bib("kursa_2010")`
21 | #'
22 | #' @family Filter
23 | #' @include Filter.R
24 | #' @template seealso_filter
25 | #' @export
26 | #' @examples
27 | #' \donttest{
28 | #' if (requireNamespace("Boruta")) {
29 | #' task = mlr3::tsk("sonar")
30 | #' filter = flt("boruta")
31 | #' filter$calculate(task)
32 | #' as.data.table(filter)
33 | #' }
34 | #' }
35 |
36 | FilterBoruta = R6Class("FilterBoruta",
37 | inherit = Filter,
38 |
39 | public = list(
40 |
41 | #' @description
42 | #' Creates a new instance of this [R6][R6::R6Class] class.
43 | initialize = function() {
44 |
45 | param_set = ps(
46 | pValue = p_dbl(default = 0.01),
47 | mcAdj = p_lgl(default = TRUE),
48 | maxRuns = p_int(lower = 1, default = 100),
49 | doTrace = p_int(lower = 0, upper = 4, default = 0),
50 | holdHistory = p_lgl(default = TRUE),
51 | getImp = p_uty(),
52 | keep = p_fct(levels = c("confirmed", "tentative"), default = "confirmed"),
53 | num.threads = p_int(lower = 1, default = 1)
54 | )
55 |
56 | param_set$set_values(keep = "confirmed", num.threads = 1)
57 |
58 | super$initialize(
59 | id = "boruta",
60 | task_types = c("regr", "classif"),
61 | param_set = param_set,
62 | packages = "Boruta",
63 | feature_types = c("integer", "numeric"),
64 | label = "Burota",
65 | man = "mlr3filters::mlr_filters_boruta"
66 | )
67 | }
68 | ),
69 |
70 | private = list(
71 | .calculate = function(task, nfeat) {
72 | pv = self$param_set$values
73 | data = task$data()
74 | target = task$target_names
75 | features = task$feature_names
76 | formula = formulate(target, features)
77 | keep = pv$keep
78 | pv$keep = NULL
79 |
80 | res = invoke(Boruta::Boruta, formula = formula, data = data, .args = pv)
81 |
82 | selected_features = Boruta::getSelectedAttributes(res, withTentative = (keep == "tentative"))
83 |
84 | set_names(as.numeric(features %in% selected_features), features)
85 | }
86 | )
87 | )
88 |
89 |
90 | #' @include mlr_filters.R
91 | mlr_filters$add("boruta", FilterBoruta)
92 |
--------------------------------------------------------------------------------
/R/FilterCMIM.R:
--------------------------------------------------------------------------------
1 | #' @title Minimal Conditional Mutual Information Maximization Filter
2 | #'
3 | #' @name mlr_filters_cmim
4 | #'
5 | #' @description Minimal conditional mutual information maximization filter
6 | #' calling [praznik::CMIM()] from package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("cmim")
26 | #' filter$calculate(task, nfeat = 2)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("cmim"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterCMIM = R6Class("FilterCMIM",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterCMIM object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 |
53 | super$initialize(
54 | id = "cmim",
55 | task_types = c("classif", "regr"),
56 | param_set = param_set,
57 | feature_types = c("integer", "numeric", "factor", "ordered"),
58 | packages = "praznik",
59 | label = "Minimal Conditional Mutual Information Maximization",
60 | man = "mlr3filters::mlr_filters_cmim"
61 | )
62 | }
63 | ),
64 |
65 | private = list(
66 | .calculate = function(task, nfeat) {
67 | call_praznik(self, task, praznik::CMIM, nfeat)
68 | }
69 | )
70 | )
71 |
72 | #' @include mlr_filters.R
73 | mlr_filters$add("cmim", FilterCMIM)
74 |
--------------------------------------------------------------------------------
/R/FilterCarScore.R:
--------------------------------------------------------------------------------
1 | #' @title Correlation-Adjusted Marignal Correlation Score Filter
2 | #'
3 | #' @name mlr_filters_carscore
4 | #'
5 | #' @description Calculates the Correlation-Adjusted (marginal) coRrelation scores
6 | #' (short CAR scores) implemented in [care::carscore()] in package
7 | #' \CRANpkg{care}. The CAR scores for a set of features are defined as the
8 | #' correlations between the target and the decorrelated features. The filter
9 | #' returns the absolute value of the calculated scores.
10 | #'
11 | #' Argument `verbose` defaults to `FALSE`.
12 | #'
13 | #' @family Filter
14 | #' @include Filter.R
15 | #' @template seealso_filter
16 | #' @export
17 | #' @examples
18 | #' if (requireNamespace("care")) {
19 | #' task = mlr3::tsk("mtcars")
20 | #' filter = flt("carscore")
21 | #' filter$calculate(task)
22 | #' head(as.data.table(filter), 3)
23 | #'
24 | #' ## changing the filter settings
25 | #' filter = flt("carscore")
26 | #' filter$param_set$values = list("diagonal" = TRUE)
27 | #' filter$calculate(task)
28 | #' head(as.data.table(filter), 3)
29 | #' }
30 | #'
31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "care", "rpart"), quietly = TRUE)) {
32 | #' library("mlr3pipelines")
33 | #' task = mlr3::tsk("mtcars")
34 | #'
35 | #' # Note: `filter.frac` is selected randomly and should be tuned.
36 | #'
37 | #' graph = po("filter", filter = flt("carscore"), filter.frac = 0.5) %>>%
38 | #' po("learner", mlr3::lrn("regr.rpart"))
39 | #'
40 | #' graph$train(task)
41 | #' }
42 | FilterCarScore = R6Class("FilterCarScore",
43 | inherit = Filter,
44 |
45 | public = list(
46 | #' @description Create a FilterCarScore object.
47 | initialize = function() {
48 | param_set = ps(
49 | lambda = p_dbl(lower = 0, upper = 1, default = NO_DEF),
50 | diagonal = p_lgl(default = FALSE),
51 | verbose = p_lgl(default = TRUE)
52 | )
53 | param_set$values = list(verbose = FALSE)
54 |
55 | super$initialize(
56 | id = "carscore",
57 | task_types = "regr",
58 | param_set = param_set,
59 | feature_types = c("logical", "integer", "numeric"),
60 | packages = "care",
61 | label = "Correlation-Adjusted coRrelation Score",
62 | man = "mlr3filters::mlr_filters_carscore"
63 | )
64 | }
65 | ),
66 |
67 | private = list(
68 | .calculate = function(task, nfeat) {
69 | target = task$truth()
70 | features = as_numeric_matrix(task$data(cols = task$feature_names))
71 |
72 | pv = self$param_set$values
73 | scores = invoke(care::carscore,
74 | Xtrain = features, Ytrain = target,
75 | .args = pv)
76 | set_names(abs(scores), names(scores))
77 | }
78 | )
79 | )
80 |
81 | #' @include mlr_filters.R
82 | mlr_filters$add("carscore", FilterCarScore)
83 |
--------------------------------------------------------------------------------
/R/FilterCarSurvScore.R:
--------------------------------------------------------------------------------
1 | #' @title Correlation-Adjusted Survival Score Filter
2 | #'
3 | #' @name mlr_filters_carsurvscore
4 | #'
5 | #' @description Calculates CARS scores for right-censored survival tasks.
6 | #' Calls the implementation in [carSurv::carSurvScore()] in package
7 | #' \CRANpkg{carSurv}.
8 | #'
9 | #' @references
10 | #' `r format_bib("bommert_2021")`
11 | #'
12 | #' @family Filter
13 | #' @include Filter.R
14 | #' @template seealso_filter
15 | #' @export
16 | FilterCarSurvScore = R6Class("FilterCarSurvScore",
17 | inherit = Filter,
18 |
19 | public = list(
20 | #' @description Create a FilterCarSurvScore object.
21 | initialize = function() {
22 | ps = ps(
23 | maxIPCweight = p_int(lower = 0, default = 10),
24 | denom = p_fct(c("1/n", "sum_w"), default = "1/n")
25 | )
26 | super$initialize(
27 | id = "surv.carsurvscore",
28 | packages = c("carSurv", "mlr3proba"),
29 | param_set = ps,
30 | feature_types = c("integer", "numeric"),
31 | task_types = "surv",
32 | label = "Correlation-Adjusted coRrelation Survival Score",
33 | man = "mlr3filters::mlr_filters_carsurvscore"
34 | )
35 | }
36 | ),
37 |
38 | private = list(
39 | .calculate = function(task, nfeat) {
40 | pv = self$param_set$values
41 |
42 | surv = task$truth()
43 | X = as.matrix(task$data(cols = task$feature_names))
44 | scores = invoke(carSurv::carSurvScore,
45 | obsTime = surv[, 1L],
46 | obsEvent = surv[, 2L],
47 | X = X,
48 | .args = pv
49 | )
50 |
51 | set_names(abs(scores), colnames(X))
52 | }
53 | )
54 | )
55 |
56 | #' @include mlr_filters.R
57 | mlr_filters$add("carsurvscore", FilterCarSurvScore)
58 |
--------------------------------------------------------------------------------
/R/FilterCorrelation.R:
--------------------------------------------------------------------------------
1 | #' @title Correlation Filter
2 | #'
3 | #' @name mlr_filters_correlation
4 | #'
5 | #' @description
6 | #' Simple correlation filter calling [stats::cor()].
7 | #' The filter score is the absolute value of the correlation.
8 | #'
9 | #' @note
10 | #' This filter, in its default settings, can handle missing values in the features.
11 | #' However, the resulting filter scores may be misleading or at least difficult to compare
12 | #' if some features have a large proportion of missing values.
13 | #'
14 | #' If a feature has no non-missing value, the resulting score will be `NA`.
15 | #' Missing scores appear in a random, non-deterministic order at the end of the vector of scores.
16 | #'
17 | #' @references
18 | #' For a benchmark of filter methods:
19 | #'
20 | #' `r format_bib("bommert_2020")`
21 | #'
22 | #' @family Filter
23 | #' @include Filter.R
24 | #' @template seealso_filter
25 | #' @export
26 | #' @examples
27 | #' ## Pearson (default)
28 | #' task = mlr3::tsk("mtcars")
29 | #' filter = flt("correlation")
30 | #' filter$calculate(task)
31 | #' as.data.table(filter)
32 | #'
33 | #' ## Spearman
34 | #' filter = FilterCorrelation$new()
35 | #' filter$param_set$values = list("method" = "spearman")
36 | #' filter$calculate(task)
37 | #' as.data.table(filter)
38 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
39 | #' library("mlr3pipelines")
40 | #' task = mlr3::tsk("mtcars")
41 | #'
42 | #' # Note: `filter.frac` is selected randomly and should be tuned.
43 | #'
44 | #' graph = po("filter", filter = flt("correlation"), filter.frac = 0.5) %>>%
45 | #' po("learner", mlr3::lrn("regr.rpart"))
46 | #'
47 | #' graph$train(task)
48 | #' }
49 | FilterCorrelation = R6Class("FilterCorrelation",
50 | inherit = Filter,
51 |
52 | public = list(
53 |
54 | #' @description Create a FilterCorrelation object.
55 | initialize = function() {
56 | param_set = ps(
57 | use = p_fct(c("everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"),
58 | default = "everything"),
59 | method = p_fct(c("pearson", "kendall", "spearman"), default = "pearson")
60 | )
61 |
62 | super$initialize(
63 | id = "correlation",
64 | task_types = "regr",
65 | param_set = param_set,
66 | feature_types = c("integer", "numeric"),
67 | packages = "stats",
68 | label = "Correlation",
69 | man = "mlr3filters::mlr_filters_correlation"
70 | )
71 | }
72 | ),
73 |
74 | private = list(
75 | .calculate = function(task, nfeat) {
76 | fn = task$feature_names
77 | pv = self$param_set$values
78 | score = invoke(stats::cor,
79 | x = as.matrix(task$data(cols = fn)),
80 | y = as.matrix(task$truth()),
81 | .args = pv)[, 1L]
82 | set_names(abs(score), fn)
83 | },
84 |
85 | .get_properties = function() {
86 | "missings"
87 | }
88 |
89 | )
90 | )
91 |
92 | #' @include mlr_filters.R
93 | mlr_filters$add("correlation", FilterCorrelation)
94 |
--------------------------------------------------------------------------------
/R/FilterDISR.R:
--------------------------------------------------------------------------------
1 | #' @title Double Input Symmetrical Relevance Filter
2 | #'
3 | #' @name mlr_filters_disr
4 | #'
5 | #' @description Double input symmetrical relevance filter calling
6 | #' [praznik::DISR()] from package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("disr")
26 | #' filter$calculate(task)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("disr"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterDISR = R6Class("FilterDISR",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterDISR object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 |
53 | super$initialize(
54 | id = "disr",
55 | task_types = c("classif", "regr"),
56 | param_set = param_set,
57 | feature_types = c("integer", "numeric", "factor", "ordered"),
58 | packages = "praznik",
59 | label = "Double Input Symmetrical Relevance",
60 | man = "mlr3filters::mlr_filters_disr"
61 | )
62 | }
63 | ),
64 |
65 | private = list(
66 | .calculate = function(task, nfeat) {
67 | call_praznik(self, task, praznik::DISR, nfeat)
68 | }
69 | )
70 | )
71 |
72 | #' @include mlr_filters.R
73 | mlr_filters$add("disr", FilterDISR)
74 |
--------------------------------------------------------------------------------
/R/FilterFindCorrelation.R:
--------------------------------------------------------------------------------
1 | #' @title Correlation Filter
2 | #'
3 | #' @name mlr_filters_find_correlation
4 | #'
5 | #' @description
6 | #' Simple filter emulating `caret::findCorrelation(exact = FALSE)`.
7 | #'
8 | #' This gives each feature a score between 0 and 1 that is *one minus* the
9 | #' cutoff value for which it is excluded when using [caret::findCorrelation()].
10 | #' The negative is used because [caret::findCorrelation()] excludes everything
11 | #' *above* a cutoff, while filters exclude everything below a cutoff.
12 | #' Here the filter scores are shifted by +1 to get positive values for to align
13 | #' with the way other filters work.
14 | #'
15 | #' Subsequently `caret::findCorrelation(cutoff = 0.9)` lists the same features
16 | #' that are excluded with `FilterFindCorrelation` at score 0.1 (= 1 - 0.9).
17 | #'
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' # Pearson (default)
24 | #' task = mlr3::tsk("mtcars")
25 | #' filter = flt("find_correlation")
26 | #' filter$calculate(task)
27 | #' as.data.table(filter)
28 | #'
29 | #' ## Spearman
30 | #' filter = flt("find_correlation", method = "spearman")
31 | #' filter$calculate(task)
32 | #' as.data.table(filter)
33 | #'
34 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
35 | #' library("mlr3pipelines")
36 | #' task = mlr3::tsk("spam")
37 | #'
38 | #' # Note: `filter.frac` is selected randomly and should be tuned.
39 | #'
40 | #' graph = po("filter", filter = flt("find_correlation"), filter.frac = 0.5) %>>%
41 | #' po("learner", mlr3::lrn("classif.rpart"))
42 | #'
43 | #' graph$train(task)
44 | #' }
45 | FilterFindCorrelation = R6Class("FilterFindCorrelation",
46 | inherit = Filter,
47 |
48 | public = list(
49 |
50 | #' @description Create a FilterFindCorrelation object.
51 | initialize = function() {
52 | param_set = ps(
53 | use = p_fct(c("everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"), default = "everything"),
54 | method = p_fct(levels = c("pearson", "kendall", "spearman"), default = "pearson")
55 | )
56 |
57 | super$initialize(
58 | id = "find_correlation",
59 | task_types = NA_character_,
60 | param_set = param_set,
61 | feature_types = c("integer", "numeric"),
62 | packages = "stats",
63 | label = "Correlation-based Score",
64 | man = "mlr3filters::mlr_filters_find_correlation"
65 | )
66 | }
67 | ),
68 |
69 | private = list(
70 | .calculate = function(task, nfeat) {
71 |
72 | fn = task$feature_names
73 | pv = self$param_set$values
74 | cm = invoke(stats::cor,
75 | x = task$data(cols = fn),
76 | .args = pv)
77 | cm = abs(cm)
78 | # a feature is removed as soon as it is in the higher average correlation
79 | # col in a pair (note: tie broken by removing /later/ feature first)
80 | avg_cor = colMeans(cm)
81 | # decreasing = TRUE to emulate tie breaking
82 | avg_cor_order = order(avg_cor, decreasing = TRUE)
83 | cm = cm[avg_cor_order, avg_cor_order, drop = FALSE]
84 | # Rows / Columns of cm are now ordered by correlation mean, highest first.
85 | # A feature i is excluded as soon as a lower-average-correlation feature
86 | # has correlation with i > cutoff. This means the cutoff at which i is
87 | # excluded is the max of the correlation with all lower-avg-cor features.
88 | # Therefore we look for the highest feature correlation col-wise in the
89 | # lower triangle of the ordered cm.
90 |
91 | # the lowest avg col feature is never removed by caret, so its cutoff is
92 | # 0.
93 | cm[upper.tri(cm, diag = TRUE)] = 0
94 | # The following has the correct names and values, BUT we need scores in
95 | # reverse order. Shift by 1 to get positive values.
96 | 1 - apply(cm, 2, max)
97 | },
98 | .get_properties = function() {
99 | use = self$param_set$values$use %??% "everything"
100 | if (use %in% c("complete.obs", "pairwise.complete.obs")) {
101 | "missings"
102 | } else {
103 | character(0)
104 | }
105 | }
106 | )
107 | )
108 |
109 | #' @include mlr_filters.R
110 | mlr_filters$add("find_correlation", FilterFindCorrelation)
111 |
--------------------------------------------------------------------------------
/R/FilterImportance.R:
--------------------------------------------------------------------------------
1 | #' @title Filter for Embedded Feature Selection via Variable Importance
2 | #'
3 | #' @name mlr_filters_importance
4 | #'
5 | #' @description Variable Importance filter using embedded feature selection of
6 | #' machine learning algorithms. Takes a [mlr3::Learner] which is capable of
7 | #' extracting the variable importance (property "importance"), fits the model
8 | #' and extracts the importance values to use as filter scores.
9 | #'
10 | #' @family Filter
11 | #' @include FilterLearner.R
12 | #' @template seealso_filter
13 | #' @export
14 | #' @examples
15 | #' if (requireNamespace("rpart")) {
16 | #' task = mlr3::tsk("iris")
17 | #' learner = mlr3::lrn("classif.rpart")
18 | #' filter = flt("importance", learner = learner)
19 | #' filter$calculate(task)
20 | #' as.data.table(filter)
21 | #' }
22 | #'
23 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "mlr3learners"), quietly = TRUE)) {
24 | #' library("mlr3learners")
25 | #' library("mlr3pipelines")
26 | #' task = mlr3::tsk("sonar")
27 | #'
28 | #' learner = mlr3::lrn("classif.rpart")
29 | #'
30 | #' # Note: `filter.frac` is selected randomly and should be tuned.
31 | #'
32 | #' graph = po("filter", filter = flt("importance", learner = learner), filter.frac = 0.5) %>>%
33 | #' po("learner", mlr3::lrn("classif.log_reg"))
34 | #'
35 | #' graph$train(task)
36 | #' }
37 | FilterImportance = R6Class("FilterImportance",
38 | inherit = FilterLearner,
39 |
40 | public = list(
41 |
42 | #' @field learner ([mlr3::Learner])\cr
43 | #' Learner to extract the importance values from.
44 | learner = NULL,
45 |
46 | #' @description Create a FilterImportance object.
47 | #' @param learner ([mlr3::Learner])\cr
48 | #' Learner to extract the importance values from.
49 | initialize = function(learner = mlr3::lrn("classif.featureless")) {
50 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE),
51 | properties = "importance")
52 |
53 | super$initialize(
54 | id = "importance",
55 | task_types = learner$task_type,
56 | feature_types = learner$feature_types,
57 | packages = learner$packages,
58 | param_set = learner$param_set,
59 | label = "Importance Score",
60 | man = "mlr3filters::mlr_filters_importance"
61 | )
62 | }
63 | ),
64 |
65 |
66 | private = list(
67 | .calculate = function(task, nfeat) {
68 | learner = self$learner$clone(deep = TRUE)
69 | learner = learner$train(task = task)
70 | learner$base_learner()$importance()
71 | },
72 |
73 | .get_properties = function() {
74 | intersect("missings", self$learner$properties)
75 | }
76 | )
77 | )
78 |
79 | #' @include mlr_filters.R
80 | mlr_filters$add("importance", FilterImportance)
81 |
--------------------------------------------------------------------------------
/R/FilterInformationGain.R:
--------------------------------------------------------------------------------
1 |
2 | #' @title Information Gain Filter
3 | #'
4 | #' @name mlr_filters_information_gain
5 | #'
6 | #' @description Information gain filter calling
7 | #' [FSelectorRcpp::information_gain()] in package \CRANpkg{FSelectorRcpp}. Set
8 | #' parameter `"type"` to `"gainratio"` to calculate the gain ratio, or set to
9 | #' `"symuncert"` to calculate the symmetrical uncertainty (see
10 | #' [FSelectorRcpp::information_gain()]). Default is `"infogain"`.
11 | #'
12 | #' Argument `equal` defaults to `FALSE` for classification tasks, and to
13 | #' `TRUE` for regression tasks.
14 | #'
15 | #' @family Filter
16 | #' @include Filter.R
17 | #' @template seealso_filter
18 | #' @export
19 | #' @examples
20 | #' if (requireNamespace("FSelectorRcpp")) {
21 | #' ## InfoGain (default)
22 | #' task = mlr3::tsk("sonar")
23 | #' filter = flt("information_gain")
24 | #' filter$calculate(task)
25 | #' head(filter$scores, 3)
26 | #' as.data.table(filter)
27 | #'
28 | #' ## GainRatio
29 | #'
30 | #' filterGR = flt("information_gain")
31 | #' filterGR$param_set$values = list("type" = "gainratio")
32 | #' filterGR$calculate(task)
33 | #' head(as.data.table(filterGR), 3)
34 | #'
35 | #' }
36 | #'
37 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) {
38 | #' library("mlr3pipelines")
39 | #' task = mlr3::tsk("spam")
40 | #'
41 | #' # Note: `filter.frac` is selected randomly and should be tuned.
42 | #'
43 | #' graph = po("filter", filter = flt("information_gain"), filter.frac = 0.5) %>>%
44 | #' po("learner", mlr3::lrn("classif.rpart"))
45 | #'
46 | #' graph$train(task)
47 | #'
48 | #' }
49 | FilterInformationGain = R6Class("FilterInformationGain",
50 | inherit = Filter,
51 |
52 | public = list(
53 |
54 | #' @description Create a FilterInformationGain object.
55 | initialize = function() {
56 | param_set = ps(
57 | type = p_fct(c("infogain", "gainratio", "symuncert"), default = "infogain"),
58 | equal = p_lgl(default = FALSE),
59 | discIntegers = p_lgl(default = TRUE),
60 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
61 | )
62 |
63 | super$initialize(
64 | id = "information_gain",
65 | task_types = c("classif", "regr"),
66 | param_set = param_set,
67 | feature_types = c("integer", "numeric", "factor", "ordered"),
68 | packages = "FSelectorRcpp",
69 | label = "Information Gain",
70 | man = "mlr3filters::mlr_filters_information_gain"
71 | )
72 | }
73 | ),
74 |
75 | private = list(
76 | .calculate = function(task, nfeat) {
77 | pv = self$param_set$values
78 | pv$type = pv$type %??% "infogain"
79 | pv$equal = pv$equal %??% task$task_type == "regr"
80 |
81 | x = setDF(task$data(cols = task$feature_names))
82 | y = task$truth()
83 | scores = invoke(FSelectorRcpp::information_gain, x = x, y = y, .args = pv)
84 | set_names(scores$importance, scores$attributes)
85 | },
86 |
87 | .get_properties = function() {
88 | "missings"
89 | }
90 | )
91 | )
92 |
93 | #' @include mlr_filters.R
94 | mlr_filters$add("information_gain", FilterInformationGain)
95 |
--------------------------------------------------------------------------------
/R/FilterJMI.R:
--------------------------------------------------------------------------------
1 |
2 | #' @title Joint Mutual Information Filter
3 | #'
4 | #' @name mlr_filters_jmi
5 | #'
6 | #' @description Joint mutual information filter calling [praznik::JMI()] in
7 | #' package \CRANpkg{praznik}.
8 | #'
9 | #' This filter supports partial scoring (see [Filter]).
10 | #'
11 | #' @references
12 | #' `r format_bib("kursa_2021")`
13 | #'
14 | #' For a benchmark of filter methods:
15 | #'
16 | #' `r format_bib("bommert_2020")`
17 | #'
18 | #' @template details_praznik
19 | #' @family Filter
20 | #' @include Filter.R
21 | #' @template seealso_filter
22 | #' @export
23 | #' @examples
24 | #' if (requireNamespace("praznik")) {
25 | #' task = mlr3::tsk("iris")
26 | #' filter = flt("jmi")
27 | #' filter$calculate(task, nfeat = 2)
28 | #' as.data.table(filter)
29 | #' }
30 | #'
31 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
32 | #' library("mlr3pipelines")
33 | #' task = mlr3::tsk("spam")
34 | #'
35 | #' # Note: `filter.frac` is selected randomly and should be tuned.
36 | #'
37 | #' graph = po("filter", filter = flt("jmi"), filter.frac = 0.5) %>>%
38 | #' po("learner", mlr3::lrn("classif.rpart"))
39 | #'
40 | #' graph$train(task)
41 | #' }
42 | FilterJMI = R6Class("FilterJMI",
43 | inherit = Filter,
44 |
45 | public = list(
46 |
47 | #' @description Create a FilterJMI object.
48 | initialize = function() {
49 | param_set = ps(
50 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
51 | )
52 | param_set$values = list(threads = 1L)
53 |
54 | super$initialize(
55 | id = "jmi",
56 | task_types = c("classif", "regr"),
57 | param_set = param_set,
58 | packages = "praznik",
59 | feature_types = c("integer", "numeric", "factor", "ordered"),
60 | label = "Joint Mutual Information",
61 | man = "mlr3filters::mlr_filters_jmi"
62 | )
63 | }
64 | ),
65 |
66 | private = list(
67 | .calculate = function(task, nfeat) {
68 | call_praznik(self, task, praznik::JMI, nfeat)
69 | }
70 | )
71 | )
72 |
73 | #' @include mlr_filters.R
74 | mlr_filters$add("jmi", FilterJMI)
75 |
--------------------------------------------------------------------------------
/R/FilterJMIM.R:
--------------------------------------------------------------------------------
1 | #' @title Minimal Joint Mutual Information Maximization Filter
2 | #'
3 | #' @name mlr_filters_jmim
4 | #'
5 | #' @description Minimal joint mutual information maximization filter calling
6 | #' [praznik::JMIM()] in package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("jmim")
26 | #' filter$calculate(task, nfeat = 2)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("jmim"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterJMIM = R6Class("FilterJMIM",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterJMIM object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 | super$initialize(
53 | id = "jmim",
54 | task_types = c("classif", "regr"),
55 | param_set = param_set,
56 | packages = "praznik",
57 | feature_types = c("integer", "numeric", "factor", "ordered"),
58 | label = "Minimal Joint Mutual Information Maximization",
59 | man = "mlr3filters::mlr_filters_jmim"
60 | )
61 | }
62 | ),
63 |
64 | private = list(
65 | .calculate = function(task, nfeat) {
66 | call_praznik(self, task, praznik::JMIM, nfeat)
67 | }
68 | )
69 | )
70 |
71 | #' @include mlr_filters.R
72 | mlr_filters$add("jmim", FilterJMIM)
73 |
--------------------------------------------------------------------------------
/R/FilterKruskalTest.R:
--------------------------------------------------------------------------------
1 | #' @title Kruskal-Wallis Test Filter
2 | #'
3 | #' @name mlr_filters_kruskal_test
4 | #'
5 | #' @description Kruskal-Wallis rank sum test filter calling [stats::kruskal.test()].
6 | #'
7 | #' The filter value is `-log10(p)` where `p` is the \eqn{p}-value. This
8 | #' transformation is necessary to ensure numerical stability for very small
9 | #' \eqn{p}-values.
10 |
11 | #' @note
12 | #' This filter, in its default settings, can handle missing values in the features.
13 | #' However, the resulting filter scores may be misleading or at least difficult to compare
14 | #' if some features have a large proportion of missing values.
15 | #'
16 | #' If a feature has not at least one non-missing observation per label, the resulting score will be NA.
17 | #' Missing scores appear in a random, non-deterministic order at the end of the vector of scores.
18 | #'
19 | #'
20 | #' @references
21 | #' For a benchmark of filter methods:
22 | #'
23 | #' `r format_bib("bommert_2020")`
24 | #'
25 | #' @family Filter
26 | #' @include Filter.R
27 | #' @importFrom stats kruskal.test
28 | #' @template seealso_filter
29 | #' @export
30 | #' @examples
31 | #' task = mlr3::tsk("iris")
32 | #' filter = flt("kruskal_test")
33 | #' filter$calculate(task)
34 | #' as.data.table(filter)
35 | #'
36 | #' # transform to p-value
37 | #' 10^(-filter$scores)
38 | #'
39 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
40 | #' library("mlr3pipelines")
41 | #' task = mlr3::tsk("spam")
42 | #'
43 | #' # Note: `filter.frac` is selected randomly and should be tuned.
44 | #'
45 | #' graph = po("filter", filter = flt("kruskal_test"), filter.frac = 0.5) %>>%
46 | #' po("learner", mlr3::lrn("classif.rpart"))
47 | #'
48 | #' graph$train(task)
49 | #' }
50 | FilterKruskalTest = R6Class("FilterKruskalTest",
51 | inherit = Filter,
52 |
53 | public = list(
54 |
55 | #' @description Create a FilterKruskalTest object.
56 | initialize = function() {
57 | param_set = ps(
58 | na.action = p_fct(c("na.omit", "na.fail", "na.exclude"), default = "na.omit")
59 | )
60 |
61 | super$initialize(
62 | id = "kruskal_test",
63 | task_types = "classif",
64 | param_set = param_set,
65 | packages = "stats",
66 | feature_types = c("integer", "numeric"),
67 | label = "Kruskal-Wallis Test",
68 | man = "mlr3filters::mlr_filters_kruskal_test"
69 | )
70 | }
71 | ),
72 |
73 | private = list(
74 | .calculate = function(task, nfeat) {
75 | na_action = self$param_set$values$na.action %??% "na.omit"
76 |
77 | data = task$data(cols = task$feature_names)
78 | g = task$truth()
79 |
80 | -log10(map_dbl(data, function(x) {
81 | tab = table(g[!is.na(x)])
82 |
83 | if (any(tab == 0L)) {
84 | NA_real_
85 | } else {
86 | kruskal.test(x = x, g = g, na.action = na_action)$p.value
87 | }
88 | }))
89 | },
90 |
91 | .get_properties = function() {
92 | ok = c("na.omit", "na.exclude")
93 | if ((self$param_set$values$na.action %??% "na.omit") %in% ok) "missings" else character()
94 | }
95 | )
96 | )
97 |
98 | #' @include mlr_filters.R
99 | mlr_filters$add("kruskal_test", FilterKruskalTest)
100 |
--------------------------------------------------------------------------------
/R/FilterLearner.R:
--------------------------------------------------------------------------------
1 | #' @include Filter.R
2 | FilterLearner = R6Class("FilterLearner", inherit = Filter,
3 | active = list(
4 | #' @field hash (`character(1)`)\cr
5 | #' Hash (unique identifier) for this object.
6 | hash = function(rhs) {
7 | assert_ro_binding(rhs)
8 | calculate_hash(class(self), self$id, self$param_set$values, self$learner$hash)
9 | },
10 |
11 | #' @field phash (`character(1)`)\cr
12 | #' Hash (unique identifier) for this partial object, excluding some components
13 | #' which are varied systematically during tuning (parameter values) or feature
14 | #' selection (feature names).
15 | phash = function(rhs) {
16 | assert_ro_binding(rhs)
17 | calculate_hash(class(self), self$id, self$learner$hash)
18 | }
19 | )
20 | )
21 |
--------------------------------------------------------------------------------
/R/FilterMIM.R:
--------------------------------------------------------------------------------
1 | #' @title Mutual Information Maximization Filter
2 | #'
3 | #' @name mlr_filters_mim
4 | #'
5 | #' @description Conditional mutual information based feature selection filter
6 | #' calling [praznik::MIM()] in package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("mim")
26 | #' filter$calculate(task, nfeat = 2)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("mim"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterMIM = R6Class("FilterMIM",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterMIM object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 |
53 | super$initialize(
54 | id = "mim",
55 | task_types = c("classif", "regr"),
56 | param_set = param_set,
57 | packages = "praznik",
58 | feature_types = c("integer", "numeric", "factor", "ordered"),
59 | label = "Mutual Information Maximization",
60 | man = "mlr3filters::mlr_filters_mim"
61 | )
62 | }
63 | ),
64 |
65 | private = list(
66 | .calculate = function(task, nfeat) {
67 | call_praznik(self, task, praznik::MIM, nfeat)
68 | }
69 | )
70 | )
71 |
72 | #' @include mlr_filters.R
73 | mlr_filters$add("mim", FilterMIM)
74 |
--------------------------------------------------------------------------------
/R/FilterMRMR.R:
--------------------------------------------------------------------------------
1 | #' @title Minimum Redundancy Maximal Relevancy Filter
2 | #'
3 | #' @name mlr_filters_mrmr
4 | #'
5 | #' @description Minimum redundancy maximal relevancy filter calling
6 | #' [praznik::MRMR()] in package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("mrmr")
26 | #' filter$calculate(task, nfeat = 2)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("mrmr"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterMRMR = R6Class("FilterMRMR",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterMRMR object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 |
53 | super$initialize(
54 | id = "mrmr",
55 | task_types = c("classif", "regr"),
56 | param_set = param_set,
57 | packages = "praznik",
58 | feature_types = c("integer", "numeric", "factor", "ordered"),
59 | label = "Minimum Redundancy Maximal Relevancy",
60 | man = "mlr3filters::mlr_filters_mrmr"
61 | )
62 | }
63 | ),
64 |
65 | private = list(
66 | .calculate = function(task, nfeat) {
67 | call_praznik(self, task, praznik::MRMR, nfeat)
68 | }
69 | )
70 | )
71 |
72 | #' @include mlr_filters.R
73 | mlr_filters$add("mrmr", FilterMRMR)
74 |
--------------------------------------------------------------------------------
/R/FilterNJMIM.R:
--------------------------------------------------------------------------------
1 | #' @title Minimal Normalised Joint Mutual Information Maximization Filter
2 | #'
3 | #' @name mlr_filters_njmim
4 | #'
5 | #' @description Minimal normalised joint mutual information maximization filter
6 | #' calling [praznik::NJMIM()] from package \CRANpkg{praznik}.
7 | #'
8 | #' This filter supports partial scoring (see [Filter]).
9 | #'
10 | #' @references
11 | #' `r format_bib("kursa_2021")`
12 | #'
13 | #' For a benchmark of filter methods:
14 | #'
15 | #' `r format_bib("bommert_2020")`
16 | #'
17 | #' @template details_praznik
18 | #' @family Filter
19 | #' @include Filter.R
20 | #' @template seealso_filter
21 | #' @export
22 | #' @examples
23 | #' if (requireNamespace("praznik")) {
24 | #' task = mlr3::tsk("iris")
25 | #' filter = flt("njmim")
26 | #' filter$calculate(task, nfeat = 2)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart", "praznik"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' task = mlr3::tsk("spam")
33 | #'
34 | #' # Note: `filter.frac` is selected randomly and should be tuned.
35 | #'
36 | #' graph = po("filter", filter = flt("njmim"), filter.frac = 0.5) %>>%
37 | #' po("learner", mlr3::lrn("classif.rpart"))
38 | #'
39 | #' graph$train(task)
40 | #' }
41 | FilterNJMIM = R6Class("FilterNJMIM",
42 | inherit = Filter,
43 |
44 | public = list(
45 |
46 | #' @description Create a FilterNJMIM object.
47 | initialize = function() {
48 | param_set = ps(
49 | threads = p_int(lower = 0L, default = 0L, tags = "threads")
50 | )
51 | param_set$values = list(threads = 1L)
52 | super$initialize(
53 | id = "njmim",
54 | task_types = c("classif", "regr"),
55 | param_set = param_set,
56 | packages = "praznik",
57 | feature_types = c("integer", "numeric", "factor", "ordered"),
58 | label = "Minimal Normalised Joint Mutual Information Maximization",
59 | man = "mlr3filters::mlr_filters_njmim"
60 | )
61 | }
62 | ),
63 |
64 | private = list(
65 | .calculate = function(task, nfeat) {
66 | call_praznik(self, task, praznik::NJMIM, nfeat)
67 | }
68 | )
69 | )
70 |
71 | #' @include mlr_filters.R
72 | mlr_filters$add("njmim", FilterNJMIM)
73 |
--------------------------------------------------------------------------------
/R/FilterPerformance.R:
--------------------------------------------------------------------------------
1 | #' @title Predictive Performance Filter
2 | #'
3 | #' @name mlr_filters_performance
4 | #'
5 | #' @description Filter which uses the predictive performance of a
6 | #' [mlr3::Learner] as filter score. Performs a [mlr3::resample()] for each
7 | #' feature separately. The filter score is the aggregated performance of the
8 | #' [mlr3::Measure], or the negated aggregated performance if the measure has
9 | #' to be minimized.
10 | #'
11 | #' @family Filter
12 | #' @include FilterLearner.R
13 | #' @template seealso_filter
14 | #' @export
15 | #' @examples
16 | #' if (requireNamespace("rpart")) {
17 | #' task = mlr3::tsk("iris")
18 | #' learner = mlr3::lrn("classif.rpart")
19 | #' filter = flt("performance", learner = learner)
20 | #' filter$calculate(task)
21 | #' as.data.table(filter)
22 | #' }
23 | #'
24 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
25 | #' library("mlr3pipelines")
26 | #' task = mlr3::tsk("iris")
27 | #' l = lrn("classif.rpart")
28 | #'
29 | #' # Note: `filter.frac` is selected randomly and should be tuned.
30 | #'
31 | #' graph = po("filter", filter = flt("performance", learner = l), filter.frac = 0.5) %>>%
32 | #' po("learner", mlr3::lrn("classif.rpart"))
33 | #'
34 | #' graph$train(task)
35 | #' }
36 | FilterPerformance = R6Class("FilterPerformance",
37 | inherit = FilterLearner,
38 |
39 | public = list(
40 |
41 | #' @field learner ([mlr3::Learner])\cr
42 | learner = NULL,
43 | #' @field resampling ([mlr3::Resampling])\cr
44 | resampling = NULL,
45 | #' @field measure ([mlr3::Measure])\cr
46 | measure = NULL,
47 |
48 | #' @description Create a FilterDISR object.
49 | #' @param learner ([mlr3::Learner])\cr
50 | #' [mlr3::Learner] to use for model fitting.
51 | #' @param resampling ([mlr3::Resampling])\cr
52 | #' [mlr3::Resampling] to be used within resampling.
53 | #' @param measure ([mlr3::Measure])\cr
54 | #' [mlr3::Measure] to be used for evaluating the performance.
55 | initialize = function(learner = mlr3::lrn("classif.featureless"),
56 | resampling = mlr3::rsmp("holdout"), measure = NULL) {
57 |
58 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE))
59 | self$resampling = assert_resampling(as_resampling(resampling))
60 | self$measure = assert_measure(as_measure(measure,
61 | task_type = learner$task_type), learner = learner)
62 | packages = unique(c(self$learner$packages, self$measure$packages))
63 |
64 | super$initialize(
65 | id = "performance",
66 | task_types = learner$task_type,
67 | param_set = learner$param_set,
68 | feature_types = learner$feature_types,
69 | packages = packages,
70 | label = "Predictive Performance",
71 | man = "mlr3filters::mlr_filters_performance"
72 | )
73 | }
74 | ),
75 |
76 | private = list(
77 | .calculate = function(task, nfeat) {
78 | task = task$clone()
79 | fn = task$feature_names
80 |
81 | perf = map_dbl(fn, function(x) {
82 | task$col_roles$feature = x
83 | resample(task, self$learner, self$resampling, clone = character())$
84 | aggregate(measures = self$measure)
85 | })
86 |
87 | if (self$measure$minimize) {
88 | perf = -perf
89 | }
90 |
91 | set_names(perf, fn)
92 | },
93 |
94 | .get_properties = function() {
95 | intersect("missings", self$learner$properties)
96 | }
97 | )
98 | )
99 |
100 | #' @include mlr_filters.R
101 | mlr_filters$add("performance", FilterPerformance)
102 |
--------------------------------------------------------------------------------
/R/FilterRelief.R:
--------------------------------------------------------------------------------
1 | #' @title RELIEF Filter
2 | #'
3 | #' @name mlr_filters_relief
4 | #'
5 | #' @description Information gain filter calling
6 | #' [FSelectorRcpp::relief()] in package \CRANpkg{FSelectorRcpp}.
7 | #'
8 | #' @note
9 | #' This filter can handle missing values in the features.
10 | #' However, the resulting filter scores may be misleading or at least difficult to compare
11 | #' if some features have a large proportion of missing values.
12 | #'
13 | #' If a feature has no non-missing observation, the resulting score will be (close to) 0.
14 | #'
15 | #' @family Filter
16 | #' @include Filter.R
17 | #' @template seealso_filter
18 | #' @export
19 | #' @examples
20 | #' if (requireNamespace("FSelectorRcpp")) {
21 | #' ## Relief (default)
22 | #' task = mlr3::tsk("iris")
23 | #' filter = flt("relief")
24 | #' filter$calculate(task)
25 | #' head(filter$scores, 3)
26 | #' as.data.table(filter)
27 | #' }
28 | #'
29 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "FSelectorRcpp", "rpart"), quietly = TRUE)) {
30 | #' library("mlr3pipelines")
31 | #' task = mlr3::tsk("iris")
32 | #'
33 | #' # Note: `filter.frac` is selected randomly and should be tuned.
34 | #'
35 | #' graph = po("filter", filter = flt("relief"), filter.frac = 0.5) %>>%
36 | #' po("learner", mlr3::lrn("classif.rpart"))
37 | #'
38 | #' graph$train(task)
39 | #' }
40 | FilterRelief = R6Class("FilterRelief",
41 | inherit = Filter,
42 |
43 | public = list(
44 |
45 | #' @description Create a FilterRelief object.
46 | initialize = function() {
47 | param_set = ps(
48 | neighboursCount = p_int(lower = 1L, default = 5L),
49 | sampleSize = p_int(lower = 1L, default = 10L)
50 | )
51 |
52 | super$initialize(
53 | id = "relief",
54 | task_types = c("classif", "regr"),
55 | param_set = param_set,
56 | feature_types = c("integer", "numeric", "factor", "ordered"),
57 | packages = "FSelectorRcpp",
58 | label = "RELIEF",
59 | man = "mlr3filters::mlr_filters_relief"
60 | )
61 | }
62 | ),
63 |
64 | private = list(
65 | .get_properties = function() {
66 | "missings"
67 | },
68 |
69 | .calculate = function(task, nfeat) {
70 | pv = self$param_set$values
71 |
72 | x = setDF(task$data(cols = task$feature_names))
73 | y = task$truth()
74 | scores = invoke(FSelectorRcpp::relief, x = x, y = y, .args = pv)
75 | set_names(scores$importance, scores$attributes)
76 | }
77 | )
78 | )
79 |
80 | #' @include mlr_filters.R
81 | mlr_filters$add("relief", FilterRelief)
82 |
--------------------------------------------------------------------------------
/R/FilterSelectedFeatures.R:
--------------------------------------------------------------------------------
1 | #' @title Filter for Embedded Feature Selection
2 | #'
3 | #' @name mlr_filters_selected_features
4 | #'
5 | #' @description
6 | #' Filter using embedded feature selection of machine learning algorithms.
7 | #' Takes a [mlr3::Learner] which is capable of extracting the selected features
8 | #' (property "selected_features"), fits the model and extracts the selected
9 | #' features.
10 | #'
11 | #' Note that contrary to [mlr_filters_importance], there is no ordering in
12 | #' the selected features. Selected features get a score of 1, deselected
13 | #' features get a score of 0. The order of selected features is random and
14 | #' different from the order in the learner. In combination with
15 | #' \CRANpkg{mlr3pipelines}, only the filter criterion `cutoff` makes sense.
16 | #'
17 | #' @family Filter
18 | #' @include Filter.R
19 | #' @template seealso_filter
20 | #' @export
21 | #' @examples
22 | #' if (requireNamespace("rpart")) {
23 | #' task = mlr3::tsk("iris")
24 | #' learner = mlr3::lrn("classif.rpart")
25 | #' filter = flt("selected_features", learner = learner)
26 | #' filter$calculate(task)
27 | #' as.data.table(filter)
28 | #' }
29 | #'
30 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners", "rpart"), quietly = TRUE)) {
31 | #' library("mlr3pipelines")
32 | #' library("mlr3learners")
33 | #' task = mlr3::tsk("sonar")
34 | #'
35 | #' filter = flt("selected_features", learner = lrn("classif.rpart"))
36 | #'
37 | #' # Note: All filter scores are either 0 or 1, i.e. setting `filter.cutoff = 0.5` means that
38 | #' # we select all "selected features".
39 | #'
40 | #' graph = po("filter", filter = filter, filter.cutoff = 0.5) %>>%
41 | #' po("learner", mlr3::lrn("classif.log_reg"))
42 | #'
43 | #' graph$train(task)
44 | #' }
45 | FilterSelectedFeatures = R6Class("FilterSelectedFeatures",
46 | inherit = FilterLearner,
47 |
48 | public = list(
49 |
50 | #' @field learner ([mlr3::Learner])\cr
51 | #' Learner to extract the importance values from.
52 | learner = NULL,
53 |
54 | #' @description Create a FilterImportance object.
55 | #' @param learner ([mlr3::Learner])\cr
56 | #' Learner to extract the selected features from.
57 | initialize = function(learner = mlr3::lrn("classif.featureless")) {
58 | self$learner = learner = assert_learner(as_learner(learner, clone = TRUE),
59 | properties = "selected_features")
60 |
61 | super$initialize(
62 | id = "selected_features",
63 | task_types = learner$task_type,
64 | feature_types = learner$feature_types,
65 | packages = learner$packages,
66 | param_set = learner$param_set,
67 | label = "Embedded Feature Selection",
68 | man = "mlr3filters::mlr_filters_selected_features"
69 | )
70 | }
71 | ),
72 |
73 | private = list(
74 | .calculate = function(task, nfeat) {
75 | learner = self$learner$clone(deep = TRUE)
76 | learner = learner$train(task = task)
77 | score = named_vector(task$feature_names, init = 0)
78 | replace(score, names(score) %in% learner$selected_features(), 1)
79 | },
80 |
81 | .get_properties = function() {
82 | intersect("missings", self$learner$properties)
83 | }
84 | )
85 | )
86 |
87 | #' @include mlr_filters.R
88 | mlr_filters$add("selected_features", FilterSelectedFeatures)
89 |
--------------------------------------------------------------------------------
/R/FilterUnivariateCox.R:
--------------------------------------------------------------------------------
1 | #' @title Univariate Cox Survival Filter
2 | #'
3 | #' @name mlr_filters_univariate_cox
4 | #'
5 | #' @description Calculates scores for assessing the relationship between
6 | #' individual features and the time-to-event outcome (right-censored survival
7 | #' data) using a univariate Cox proportional hazards model.
8 | #' The goal is to determine which features have a statistically significant
9 | #' association with the event of interest, typically in the context of clinical
10 | #' or biomedical research.
11 | #'
12 | #' This filter fits a [Cox Proportional Hazards][survival::coxph()] model using
13 | #' each feature independently and extracts the \eqn{p}-value that quantifies the
14 | #' significance of the feature's impact on survival. The filter value is
15 | #' `-log10(p)` where `p` is the \eqn{p}-value. This transformation is necessary
16 | #' to ensure numerical stability for very small \eqn{p}-values. Also higher
17 | #' values denote more important features. The filter works only for numeric
18 | #' features so please ensure that factor variables are properly encoded, e.g.
19 | #' using [PipeOpEncode][mlr3pipelines::PipeOpEncode].
20 | #'
21 | #' @family Filter
22 | #' @include Filter.R
23 | #' @template seealso_filter
24 | #' @export
25 | #' @examples
26 | #'
27 | #' filter = flt("univariate_cox")
28 | #' filter
29 | #'
30 | FilterUnivariateCox = R6Class("FilterUnivariateCox",
31 | inherit = Filter,
32 | public = list(
33 | #' @description Create a FilterUnivariateCox object.
34 | initialize = function() {
35 | super$initialize(
36 | id = "surv.univariate_cox",
37 | packages = "survival",
38 | param_set = ps(),
39 | feature_types = c("integer", "numeric", "logical"),
40 | task_types = "surv",
41 | label = "Univariate Cox Survival Score",
42 | man = "mlr3filters::mlr_filters_univariate_cox"
43 | )
44 | }
45 | ),
46 |
47 | private = list(
48 | .calculate = function(task, nfeat) {
49 | features = task$feature_names
50 | targets = task$data(cols = task$target_names)
51 |
52 | scores = map_dbl(features, function(feature) {
53 | model = invoke(
54 | survival::coxph,
55 | formula = task$formula(rhs = feature),
56 | data = cbind(task$data(cols = feature), targets)
57 | )
58 | pval = summary(model)$coefficients[, "Pr(>|z|)"]
59 | -log10(pval) # smaller p-values => larger scores
60 | })
61 |
62 | set_names(scores, features)
63 | }
64 | )
65 | )
66 |
67 | #' @include mlr_filters.R
68 | mlr_filters$add("univariate_cox", FilterUnivariateCox)
69 |
--------------------------------------------------------------------------------
/R/FilterVariance.R:
--------------------------------------------------------------------------------
1 | #' @title Variance Filter
2 | #'
3 | #' @name mlr_filters_variance
4 | #'
5 | #' @description Variance filter calling `stats::var()`.
6 | #'
7 | #' Argument `na.rm` defaults to `TRUE` here.
8 | #'
9 | #' @references
10 | #' For a benchmark of filter methods:
11 | #'
12 | #' `r format_bib("bommert_2020")`
13 | #'
14 | #' @family Filter
15 | #' @include Filter.R
16 | #' @importFrom stats var
17 | #' @template seealso_filter
18 | #' @export
19 | #' @examples
20 | #' task = mlr3::tsk("mtcars")
21 | #' filter = flt("variance")
22 | #' filter$calculate(task)
23 | #' head(filter$scores, 3)
24 | #' as.data.table(filter)
25 | #'
26 | #' if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
27 | #' library("mlr3pipelines")
28 | #' task = mlr3::tsk("spam")
29 | #'
30 | #' # Note: `filter.frac` is selected randomly and should be tuned.
31 | #'
32 | #' graph = po("filter", filter = flt("variance"), filter.frac = 0.5) %>>%
33 | #' po("learner", mlr3::lrn("classif.rpart"))
34 | #'
35 | #' graph$train(task)
36 | #' }
37 | FilterVariance = R6Class("FilterVariance",
38 | inherit = Filter,
39 |
40 | public = list(
41 |
42 | #' @description Create a FilterVariance object.
43 | initialize = function() {
44 | param_set = ps(
45 | na.rm = p_lgl(default = TRUE)
46 | )
47 | param_set$values = list(na.rm = TRUE)
48 |
49 | super$initialize(
50 | id = "variance",
51 | task_types = NA_character_,
52 | param_set = param_set,
53 | packages = "stats",
54 | feature_types = c("integer", "numeric"),
55 | label = "Variance",
56 | man = "mlr3filters::mlr_filters_variance"
57 | )
58 | }
59 | ),
60 |
61 | private = list(
62 | .calculate = function(task, nfeat) {
63 | na_rm = self$param_set$values$na.rm %??% TRUE
64 | map_dbl(task$data(cols = task$feature_names), var, na.rm = na_rm)
65 | },
66 |
67 | .get_properties = function() {
68 | if (isTRUE(self$param_set$values$na.rm)) "missings" else character()
69 | }
70 | )
71 | )
72 |
73 | #' @include mlr_filters.R
74 | mlr_filters$add("variance", FilterVariance)
75 |
--------------------------------------------------------------------------------
/R/bibentries.R:
--------------------------------------------------------------------------------
1 | #' @importFrom utils bibentry
2 | bibentries = c(
3 | bommert_2020 = bibentry("article",
4 | doi = "10.1016/j.csda.2019.106839",
5 | year = "2020",
6 | month = "3",
7 | publisher = "Elsevier {BV}",
8 | volume = "143",
9 | pages = "106839",
10 | author = "Andrea Bommert and Xudong Sun and Bernd Bischl and J\u00f6rg Rahnenf\u00fchrer and Michel Lang",
11 | title = "Benchmark for filter methods for feature selection in high-dimensional classification data",
12 | journal = "Computational Statistics & Data Analysis"
13 | ),
14 |
15 | kursa_2021 = bibentry("article",
16 | doi = "10.1016/j.softx.2021.100819",
17 | year = "2021",
18 | month = "12",
19 | publisher = "Elsevier {BV}",
20 | volume = "16",
21 | pages = "100819",
22 | author = "Miron B. Kursa",
23 | title = "Praznik: High performance information-based feature selection",
24 | journal = "{SoftwareX}"
25 | ),
26 |
27 | bommert_2021 = bibentry("article",
28 | doi = "10.1093/bib/bbab354",
29 | year = "2021",
30 | month = "9",
31 | publisher = "Oxford University Press ({OUP})",
32 | volume = "23",
33 | number = "1",
34 | author = "Andrea Bommert and Thomas Welchowski and Matthias Schmid and J\u00f6rg Rahnenf\u00fchrer",
35 | title = "Benchmark of filter methods for feature selection in high-dimensional gene expression survival data",
36 | journal = "Briefings in Bioinformatics"
37 | ),
38 |
39 | kursa_2010 = bibentry("article",
40 | title = "Feature Selection with the Boruta Package",
41 | volume = "36",
42 | number = "11",
43 | journal = "Journal of Statistical Software",
44 | author = "Miron B. Kursa and Witold R. Rudnicki",
45 | year = "2010",
46 | pages = "1-13")
47 | )
48 |
--------------------------------------------------------------------------------
/R/flt.R:
--------------------------------------------------------------------------------
1 | #' @title Syntactic Sugar for Filter Construction
2 | #'
3 | #' @description
4 | #' These functions complements [mlr_filters] with a function in the spirit of [mlr3::mlr_sugar].
5 | #'
6 | #' @inheritParams mlr3::mlr_sugar
7 | #' @return [Filter].
8 | #' @export
9 | #' @examples
10 | #' flt("correlation", method = "kendall")
11 | #' flts(c("mrmr", "jmim"))
12 | flt = function(.key, ...) {
13 | dictionary_sugar_get(mlr_filters, .key, ...)
14 | }
15 |
16 | #' @rdname flt
17 | #' @export
18 | flts = function(.keys, ...) {
19 | dictionary_sugar_mget(mlr_filters, .keys, ...)
20 | }
21 |
--------------------------------------------------------------------------------
/R/helper.R:
--------------------------------------------------------------------------------
1 | call_praznik = function(self, task, fun, nfeat) {
2 | selection = invoke(fun,
3 | X = task$data(cols = task$feature_names),
4 | Y = task$truth(),
5 | k = nfeat,
6 | .args = self$param_set$get_values()
7 | )$selection
8 |
9 | set_names(seq(from = 1, to = 0, length.out = length(selection)), names(selection))
10 | }
11 |
12 | catn = function(..., file = "") {
13 | cat(paste0(..., collapse = "\n"), "\n", sep = "", file = file)
14 | }
15 |
16 | as_numeric_matrix = function(x) {
17 | x = as.matrix(x)
18 | if (is.logical(x)) {
19 | storage.mode(x) = "double"
20 | }
21 | x
22 | }
23 |
24 | test_matching_task_type = function(task_type, object, class) {
25 | fget = function(tab, i, j, key) {
26 | x = tab[[key]]
27 | tab[[j]][x %chin% i]
28 | }
29 |
30 | if (is.null(task_type) || object$task_type == task_type) {
31 | return(TRUE)
32 | }
33 |
34 | cl_task_type = fget(mlr_reflections$task_types, task_type, class, "type")
35 | if (inherits(object, cl_task_type)) {
36 | return(TRUE)
37 | }
38 |
39 | cl_object = fget(mlr_reflections$task_types, object$task_type, class, "type")
40 | return(cl_task_type == cl_object)
41 | }
42 |
--------------------------------------------------------------------------------
/R/mlr_filters.R:
--------------------------------------------------------------------------------
1 | #' @title Dictionary of Filters
2 | #'
3 | #' @format [R6::R6Class] object
4 | #' @description
5 | #' A simple [mlr3misc::Dictionary] storing objects of class [Filter].
6 | #' Each Filter has an associated help page, see `mlr_filters_[id]`.
7 | #'
8 | #' This dictionary can get populated with additional filters by add-on packages.
9 | #'
10 | #' For a more convenient way to retrieve and construct filters, see [flt()].
11 | #' @section Usage:
12 | #'
13 | #' See [mlr3misc::Dictionary].
14 | #'
15 | #' @family Dictionary
16 | #' @family Filter
17 | #' @export
18 | #' @examples
19 | #' mlr_filters$keys()
20 | #' as.data.table(mlr_filters)
21 | #' mlr_filters$get("mim")
22 | #' flt("anova")
23 | mlr_filters = DictionaryFilter = R6Class("DictionaryFilter",
24 | inherit = mlr3misc::Dictionary,
25 | cloneable = FALSE,
26 | )$new()
27 |
28 |
29 | #' @export
30 | as.data.table.DictionaryFilter = function(x, ..., objects = FALSE) {
31 | assert_flag(objects)
32 |
33 | setkeyv(map_dtr(x$keys(), function(key) {
34 | f = x$get(key)
35 | insert_named(
36 | list(key = key, label = f$label, task_types = list(f$task_types),
37 | task_properties = list(f$task_properties), params = list(f$param_set$ids()),
38 | feature_types = list(f$feature_types), packages = list(f$packages)),
39 | if (objects) list(object = list(f))
40 | )
41 | }), "key")[]
42 | }
43 |
--------------------------------------------------------------------------------
/R/reexports.R:
--------------------------------------------------------------------------------
1 | #' @importFrom data.table as.data.table
2 | #' @export
3 | data.table::as.data.table
4 |
--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | #' @import data.table
2 | #' @import checkmate
3 | #' @import paradox
4 | #' @import mlr3misc
5 | #' @import mlr3
6 | #' @importFrom R6 R6Class
7 | #' @importFrom utils head
8 | "_PACKAGE"
9 |
10 | .onLoad = function(libname, pkgname) {
11 | # nolint
12 | # nocov start
13 | backports::import(pkgname)
14 | } # nocov end
15 |
16 | leanify_package()
17 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 | # mlr3filters
6 |
7 | Package website: [release](https://mlr3filters.mlr-org.com/) | [dev](https://mlr3filters.mlr-org.com/dev/)
8 |
9 | {mlr3filters} adds feature selection filters to [mlr3](https://mlr3.mlr-org.com).
10 | The implemented filters can be used stand-alone, or as part of a machine learning pipeline in combination with
11 | [mlr3pipelines](https://mlr3pipelines.mlr-org.com) and the [filter operator](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_filter.html).
12 |
13 | Wrapper methods for feature selection are implemented in [mlr3fselect](https://mlr3fselect.mlr-org.com).
14 | Learners which support the extraction feature importance scores can be combined with a filter from this package for embedded feature selection.
15 |
16 |
17 | [](https://github.com/mlr-org/mlr3filters/actions/workflows/r-cmd-check.yml)
18 | [](https://cran.r-project.org/package=mlr3filters)
19 | [](https://stackoverflow.com/questions/tagged/mlr3)
20 | [](https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/)
21 |
22 |
23 | ## Installation
24 |
25 | CRAN version
26 |
27 | ```{r eval = FALSE}
28 | install.packages("mlr3filters")
29 | ```
30 |
31 | Development version
32 |
33 | ```{r, eval = FALSE}
34 | remotes::install_github("mlr-org/mlr3filters")
35 | ```
36 |
37 | ## Filters
38 |
39 | ### Filter Example
40 |
41 | ```{r}
42 | set.seed(1)
43 | library("mlr3")
44 | library("mlr3filters")
45 |
46 | task = tsk("sonar")
47 | filter = flt("auc")
48 | head(as.data.table(filter$calculate(task)))
49 | ```
50 |
51 | ### Implemented Filters
52 |
53 | ```{r echo = FALSE, message=FALSE}
54 | library("mlr3misc")
55 | library("mlr3filters")
56 | library("data.table")
57 |
58 | link_cran = function(pkg) {
59 | mlr3misc::map(pkg, function(.x) {
60 | mlr3misc::map_chr(.x, function(.y) {
61 | if (unlist(.y) %in% getOption("defaultPackages")) {
62 | .y
63 | } else {
64 | sprintf("[%1$s](https://cran.r-project.org/package=%1$s)", .y)
65 | }
66 | })
67 | })
68 | }
69 |
70 | tab = as.data.table(mlr_filters)[, !c("params", "task_properties")]
71 | tab[, task_types := sapply(task_types, function(x) if (is_scalar_na(x)) "Universal" else paste(capitalize(x), collapse = " & "))]
72 | tab[, feature_types := sapply(feature_types, function(x) paste(capitalize(x), collapse = ", "))]
73 | tab[, packages := sapply(packages, function(x) paste(link_cran(x), collapse = ", "))]
74 |
75 | # manually change the task type for specific filters
76 | learner_based = c("performance", "permutation", "importance", "selected_features")
77 | tab[key %in% learner_based, task_types := "Universal"]
78 | tab[key %in% learner_based, packages := ""]
79 |
80 |
81 | setnames(tab,
82 | old = c("key", "task_types", "feature_types", "packages"),
83 | new = c("Name", "Task Types", "Feature Types", "Package")
84 | )
85 |
86 | knitr::kable(tab, format = "markdown")
87 | ```
88 |
89 | ### Variable Importance Filters
90 |
91 | The following learners allow the extraction of variable importance and therefore are supported by `FilterImportance`:
92 |
93 | ```{r echo=FALSE, warning=FALSE}
94 | library("mlr3learners")
95 | tab = as.data.table(mlr_learners)
96 | tab[sapply(properties, is.element, el = "importance"), key]
97 | ```
98 |
99 | If your learner is not listed here but capable of extracting variable importance from the fitted model, the reason is most likely that it is not yet integrated in the package [mlr3learners](https://github.com/mlr-org/mlr3learners) or the [extra learner extension](https://github.com/mlr-org/mlr3extralearners).
100 | Please open an issue so we can add your package.
101 |
102 | Some learners need to have their variable importance measure "activated" during learner creation.
103 | For example, to use the "impurity" measure of Random Forest via the {ranger} package:
104 |
105 | ```{r}
106 | task = tsk("iris")
107 | lrn = lrn("classif.ranger", seed = 42)
108 | lrn$param_set$values = list(importance = "impurity")
109 |
110 | filter = flt("importance", learner = lrn)
111 | filter$calculate(task)
112 | head(as.data.table(filter), 3)
113 | ```
114 |
115 | ### Performance Filter
116 |
117 | `FilterPerformance` is a univariate filter method which calls `resample()` with every predictor variable in the dataset and ranks the final outcome using the supplied measure.
118 | Any learner can be passed to this filter with `classif.rpart` being the default.
119 | Of course, also regression learners can be passed if the task is of type "regr".
120 |
121 |
122 | ### Filter-based Feature Selection
123 |
124 | In many cases filtering is only one step in the modeling pipeline.
125 | To select features based on filter values, one can use [`PipeOpFilter`](https://mlr3pipelines.mlr-org.com/reference/mlr_pipeops_filter.html) from [mlr3pipelines](https://github.com/mlr-org/mlr3pipelines).
126 |
127 | ```{r, results='hide'}
128 | library(mlr3pipelines)
129 | task = tsk("spam")
130 |
131 | # the `filter.frac` should be tuned
132 | graph = po("filter", filter = flt("auc"), filter.frac = 0.5) %>>%
133 | po("learner", lrn("classif.rpart"))
134 |
135 | learner = as_learner(graph)
136 | rr = resample(task, learner, rsmp("holdout"))
137 | ```
138 |
--------------------------------------------------------------------------------
/man-roxygen/details_praznik.R:
--------------------------------------------------------------------------------
1 | #' @details
2 | #' As the scores calculated by the \CRANpkg{praznik} package are not monotone due
3 | #' to the greedy forward fashion, the returned scores simply reflect the selection order:
4 | #' `1`, `(k-1)/k`, ..., `1/k` where `k` is the number of selected features.
5 | #'
6 | #' Threading is disabled by default (hyperparameter `threads` is set to 1).
7 | #' Set to a number `>= 2` to enable threading, or to `0` for auto-detecting the number
8 | #' of available cores.
9 |
--------------------------------------------------------------------------------
/man-roxygen/seealso_filter.R:
--------------------------------------------------------------------------------
1 | #' @seealso
2 | #' * [PipeOpFilter][mlr3pipelines::PipeOpFilter] for filter-based feature selection.
3 | #' * [Dictionary][mlr3misc::Dictionary] of [Filters][Filter]: [mlr_filters]
4 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/man/figures/logo.png
--------------------------------------------------------------------------------
/man/figures/logo_navbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3filters/3019b3338ec91007833271edb1318fc04f1a7d54/man/figures/logo_navbar.png
--------------------------------------------------------------------------------
/man/flt.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/flt.R
3 | \name{flt}
4 | \alias{flt}
5 | \alias{flts}
6 | \title{Syntactic Sugar for Filter Construction}
7 | \usage{
8 | flt(.key, ...)
9 |
10 | flts(.keys, ...)
11 | }
12 | \arguments{
13 | \item{.key}{(\code{character(1)})\cr
14 | Key passed to the respective \link[mlr3misc:Dictionary]{dictionary} to retrieve the object.}
15 |
16 | \item{...}{(any)\cr
17 | Additional arguments.}
18 |
19 | \item{.keys}{(\code{character()})\cr
20 | Keys passed to the respective \link[mlr3misc:Dictionary]{dictionary} to retrieve multiple objects.}
21 | }
22 | \value{
23 | \link{Filter}.
24 | }
25 | \description{
26 | These functions complements \link{mlr_filters} with a function in the spirit of \link[mlr3:mlr_sugar]{mlr3::mlr_sugar}.
27 | }
28 | \examples{
29 | flt("correlation", method = "kendall")
30 | flts(c("mrmr", "jmim"))
31 | }
32 |
--------------------------------------------------------------------------------
/man/mlr3filters-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/zzz.R
3 | \docType{package}
4 | \name{mlr3filters-package}
5 | \alias{mlr3filters}
6 | \alias{mlr3filters-package}
7 | \title{mlr3filters: Filter Based Feature Selection for 'mlr3'}
8 | \description{
9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 |
11 | Extends 'mlr3' with filter methods for feature selection. Besides standalone filter methods built-in methods of any machine-learning algorithm are supported. Partial scoring of multivariate filter methods is supported.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 | \item \url{https://mlr3filters.mlr-org.com}
17 | \item \url{https://github.com/mlr-org/mlr3filters}
18 | \item Report bugs at \url{https://github.com/mlr-org/mlr3filters/issues}
19 | }
20 |
21 | }
22 | \author{
23 | \strong{Maintainer}: Marc Becker \email{marcbecker@posteo.de} (\href{https://orcid.org/0000-0002-8115-0400}{ORCID})
24 |
25 | Authors:
26 | \itemize{
27 | \item Patrick Schratz \email{patrick.schratz@gmail.com} (\href{https://orcid.org/0000-0003-0748-6624}{ORCID})
28 | \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID})
29 | \item Bernd Bischl \email{bernd_bischl@gmx.net} (\href{https://orcid.org/0000-0001-6002-6980}{ORCID})
30 | \item Martin Binder \email{mlr.developer@mb706.com}
31 | \item John Zobolas \email{bblodfon@gmail.com} (\href{https://orcid.org/0000-0002-3609-8674}{ORCID})
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/man/mlr_filters.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/mlr_filters.R
3 | \docType{data}
4 | \name{mlr_filters}
5 | \alias{mlr_filters}
6 | \title{Dictionary of Filters}
7 | \format{
8 | \link[R6:R6Class]{R6::R6Class} object
9 | }
10 | \usage{
11 | mlr_filters
12 | }
13 | \description{
14 | A simple \link[mlr3misc:Dictionary]{mlr3misc::Dictionary} storing objects of class \link{Filter}.
15 | Each Filter has an associated help page, see \code{mlr_filters_[id]}.
16 |
17 | This dictionary can get populated with additional filters by add-on packages.
18 |
19 | For a more convenient way to retrieve and construct filters, see \code{\link[=flt]{flt()}}.
20 | }
21 | \section{Usage}{
22 |
23 |
24 | See \link[mlr3misc:Dictionary]{mlr3misc::Dictionary}.
25 | }
26 |
27 | \examples{
28 | mlr_filters$keys()
29 | as.data.table(mlr_filters)
30 | mlr_filters$get("mim")
31 | flt("anova")
32 | }
33 | \seealso{
34 | Other Filter:
35 | \code{\link{Filter}},
36 | \code{\link{mlr_filters_anova}},
37 | \code{\link{mlr_filters_auc}},
38 | \code{\link{mlr_filters_boruta}},
39 | \code{\link{mlr_filters_carscore}},
40 | \code{\link{mlr_filters_carsurvscore}},
41 | \code{\link{mlr_filters_cmim}},
42 | \code{\link{mlr_filters_correlation}},
43 | \code{\link{mlr_filters_disr}},
44 | \code{\link{mlr_filters_find_correlation}},
45 | \code{\link{mlr_filters_importance}},
46 | \code{\link{mlr_filters_information_gain}},
47 | \code{\link{mlr_filters_jmi}},
48 | \code{\link{mlr_filters_jmim}},
49 | \code{\link{mlr_filters_kruskal_test}},
50 | \code{\link{mlr_filters_mim}},
51 | \code{\link{mlr_filters_mrmr}},
52 | \code{\link{mlr_filters_njmim}},
53 | \code{\link{mlr_filters_performance}},
54 | \code{\link{mlr_filters_permutation}},
55 | \code{\link{mlr_filters_relief}},
56 | \code{\link{mlr_filters_selected_features}},
57 | \code{\link{mlr_filters_univariate_cox}},
58 | \code{\link{mlr_filters_variance}}
59 | }
60 | \concept{Dictionary}
61 | \concept{Filter}
62 | \keyword{datasets}
63 |
--------------------------------------------------------------------------------
/man/mlr_filters_anova.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/FilterAnova.R
3 | \name{mlr_filters_anova}
4 | \alias{mlr_filters_anova}
5 | \alias{FilterAnova}
6 | \title{ANOVA F-Test Filter}
7 | \description{
8 | ANOVA F-Test filter calling \code{\link[stats:aov]{stats::aov()}}. Note that this is
9 | equivalent to a \eqn{t}-test for binary classification.
10 |
11 | The filter value is \code{-log10(p)} where \code{p} is the \eqn{p}-value. This
12 | transformation is necessary to ensure numerical stability for very small
13 | \eqn{p}-values.
14 | }
15 | \examples{
16 | task = mlr3::tsk("iris")
17 | filter = flt("anova")
18 | filter$calculate(task)
19 | head(as.data.table(filter), 3)
20 |
21 | # transform to p-value
22 | 10^(-filter$scores)
23 |
24 | if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
25 | library("mlr3pipelines")
26 | task = mlr3::tsk("spam")
27 |
28 | # Note: `filter.frac` is selected randomly and should be tuned.
29 |
30 | graph = po("filter", filter = flt("anova"), filter.frac = 0.5) \%>>\%
31 | po("learner", mlr3::lrn("classif.rpart"))
32 |
33 | graph$train(task)
34 | }
35 | }
36 | \references{
37 | For a benchmark of filter methods:
38 |
39 | Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020).
40 | \dQuote{Benchmark for filter methods for feature selection in high-dimensional classification data.}
41 | \emph{Computational Statistics & Data Analysis}, \bold{143}, 106839.
42 | \doi{10.1016/j.csda.2019.106839}.
43 | }
44 | \seealso{
45 | \itemize{
46 | \item \link[mlr3pipelines:mlr_pipeops_filter]{PipeOpFilter} for filter-based feature selection.
47 | \item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Filter]{Filters}: \link{mlr_filters}
48 | }
49 |
50 | Other Filter:
51 | \code{\link{Filter}},
52 | \code{\link{mlr_filters}},
53 | \code{\link{mlr_filters_auc}},
54 | \code{\link{mlr_filters_boruta}},
55 | \code{\link{mlr_filters_carscore}},
56 | \code{\link{mlr_filters_carsurvscore}},
57 | \code{\link{mlr_filters_cmim}},
58 | \code{\link{mlr_filters_correlation}},
59 | \code{\link{mlr_filters_disr}},
60 | \code{\link{mlr_filters_find_correlation}},
61 | \code{\link{mlr_filters_importance}},
62 | \code{\link{mlr_filters_information_gain}},
63 | \code{\link{mlr_filters_jmi}},
64 | \code{\link{mlr_filters_jmim}},
65 | \code{\link{mlr_filters_kruskal_test}},
66 | \code{\link{mlr_filters_mim}},
67 | \code{\link{mlr_filters_mrmr}},
68 | \code{\link{mlr_filters_njmim}},
69 | \code{\link{mlr_filters_performance}},
70 | \code{\link{mlr_filters_permutation}},
71 | \code{\link{mlr_filters_relief}},
72 | \code{\link{mlr_filters_selected_features}},
73 | \code{\link{mlr_filters_univariate_cox}},
74 | \code{\link{mlr_filters_variance}}
75 | }
76 | \concept{Filter}
77 | \section{Super class}{
78 | \code{\link[mlr3filters:Filter]{mlr3filters::Filter}} -> \code{FilterAnova}
79 | }
80 | \section{Methods}{
81 | \subsection{Public methods}{
82 | \itemize{
83 | \item \href{#method-FilterAnova-new}{\code{FilterAnova$new()}}
84 | \item \href{#method-FilterAnova-clone}{\code{FilterAnova$clone()}}
85 | }
86 | }
87 | \if{html}{\out{
88 | Inherited methods
89 |
90 |
95 | mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()
mlr3filters::Filter$calculate()
mlr3filters::Filter$format()
mlr3filters::Filter$help()
mlr3filters::Filter$print()