├── .Rbuildignore
├── .editorconfig
├── .github
    └── workflows
    │   ├── pkgdown.yml
    │   └── r-cmd-check.yml
├── .gitignore
├── .ignore
├── .lintr
├── CRAN-SUBMISSION
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── MeasureFairness.R
    ├── MeasureFairnessComposite.R
    ├── MeasureFairnessConstraint.R
    ├── MeasurePositiveProbability.R
    ├── MeasureSubgroup.R
    ├── PipeOpEOd.R
    ├── PipeOpExplicitPta.R
    ├── PipeOpReweighing.R
    ├── aaa.R
    ├── adult.R
    ├── assertions.R
    ├── bibentries.R
    ├── compare_metrics.R
    ├── compas.R
    ├── compute_metrics.R
    ├── fairness_accuracy_tradeoff.R
    ├── fairness_prediction_density.R
    ├── fairness_tensor.R
    ├── helpers.R
    ├── learner_fairml_classif_fairfgrrm.R
    ├── learner_fairml_classif_fairzlrm.R
    ├── learner_fairml_regr_fairfrrm.R
    ├── learner_fairml_regr_fairnclm.R
    ├── learner_fairml_regr_fairzlm.R
    ├── measure_operations.R
    ├── mlr_learners_fairness.R
    ├── mlr_measures_fairness.R
    ├── reports.R
    └── zzz.R
├── README.Rmd
├── README.md
├── attic
    ├── PipeOpEOd.R
    ├── compare.R
    ├── create_learners.R
    ├── make_reports.R
    ├── rfc_bias_mitigation.md
    ├── rfc_metrics.md
    ├── rfc_template.md
    ├── rfc_visualization.md
    └── tests_edge_cases.R
├── cran-comments.md
├── data-raw
    ├── adult-test-raw.csv
    ├── adult-train-raw.csv
    ├── adult.R
    ├── compas-scores-two-years.csv
    └── compas.R
├── data
    ├── adult_test.rda
    ├── adult_train.rda
    └── compas.rda
├── inst
    ├── references.bib
    └── rmarkdown
    │   └── templates
    │       ├── datasheets
    │           ├── resources
    │           │   └── template.html
    │           ├── skeleton
    │           │   ├── LICENSE
    │           │   ├── references.bib
    │           │   ├── skeleton.Rmd
    │           │   └── style.css
    │           └── template.yaml
    │       ├── fairness_report
    │           ├── skeleton
    │           │   ├── LICENSE
    │           │   ├── references.bib
    │           │   └── skeleton.Rmd
    │           └── template.yaml
    │       └── modelcards
    │           ├── skeleton
    │               ├── LICENSE
    │               ├── references.bib
    │               ├── skeleton.Rmd
    │               └── style.css
    │           └── template.yaml
├── man-roxygen
    ├── class_learner.R
    ├── example.R
    ├── field_base_measure.R
    ├── field_operation.R
    ├── intersect.R
    ├── param_base_measure.R
    ├── pta.R
    └── seealso_learner.R
├── man
    ├── MeasureFairness.Rd
    ├── MeasureFairnessComposite.Rd
    ├── MeasureFairnessConstraint.Rd
    ├── MeasureSubgroup.Rd
    ├── adult.Rd
    ├── compas.Rd
    ├── compute_metrics.Rd
    ├── fairness_accuracy_tradeoff.Rd
    ├── fairness_compare_metrics.Rd
    ├── fairness_prediction_density.Rd
    ├── fairness_tensor.Rd
    ├── figures
    │   ├── scale_mlr3.png
    │   └── unnamed-chunk-6-1.png
    ├── groupdiff_tau.Rd
    ├── groupwise_metrics.Rd
    ├── mlr3fairness-package.Rd
    ├── mlr_learners_classif.fairfgrrm.Rd
    ├── mlr_learners_classif.fairzlrm.Rd
    ├── mlr_learners_fairness.Rd
    ├── mlr_learners_regr.fairfrrm.Rd
    ├── mlr_learners_regr.fairnclm.Rd
    ├── mlr_learners_regr.fairzlm.Rd
    ├── mlr_measures_fairness.Rd
    ├── mlr_measures_positive_probability.Rd
    ├── mlr_pipeops_equalized_odds.Rd
    ├── mlr_pipeops_explicit_pta.Rd
    ├── mlr_pipeops_reweighing.Rd
    ├── report_datasheet.Rd
    ├── report_fairness.Rd
    ├── report_modelcard.Rd
    └── task_summary.Rd
├── mlr3fairness.Rproj
├── paper
    ├── .gitignore
    ├── RJournal.sty
    ├── RJwrapper.bbl
    ├── Rlogo.pdf
    ├── mlr3fairness.Rmd
    ├── mlr3fairness.bib
    ├── mlr3fairness.html
    └── presentations
    │   ├── dagstat_2022.Rmd
    │   ├── dagstat_2022.html
    │   ├── images
    │       ├── center.Rmd
    │       ├── center.png
    │       ├── center.svg
    │       └── qr.png
    │   ├── references.bib
    │   └── style.css
├── pkgdown
    ├── _pkgdown.yml
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── tests
    ├── testthat.R
    └── testthat
    │   ├── helper_data.R
    │   ├── helper_learner_tests.R
    │   ├── helper_test.R
    │   ├── test_datasets.R
    │   ├── test_fairness_tensor.R
    │   ├── test_learners_fairml.R
    │   ├── test_learners_fairml_ptas.R
    │   ├── test_measure_subgroup.R
    │   ├── test_measures.R
    │   ├── test_measures_operations.R
    │   ├── test_pipeop_eod.R
    │   ├── test_pipeop_explicit_pta.R
    │   ├── test_pipeop_reweighing.R
    │   ├── test_report_modelcard_datasheet.R
    │   ├── test_use_modelcard_datasheet.R
    │   ├── test_visualizations.R
    │   └── test_write_files.R
└── vignettes
    ├── .gitignore
    ├── debiasing-vignette.Rmd
    ├── measures-vignette.Rmd
    ├── reports-vignette.Rmd
    └── visualization-vignette.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^LICENSE$
 2 | ^.ignore$
 3 | ^.editorconfig$
 4 | ^.gitignore$
 5 | ^.git$
 6 | ^.github$
 7 | ^.*\.Rproj$
 8 | ^\.Rproj\.user$
 9 | ^man-roxygen$
10 | ^attic$
11 | ^pkgdown$
12 | ^inst/extdata/.+\.R$
13 | ^README\.Rmd$
14 | ^revdep$
15 | ^paper$
16 | ^README.html$
17 | ^\.pre-commit-config\.yaml$
18 | ^\.ccache$
19 | ^\.github$
20 | ^\.lintr$
21 | ^tic\.R$
22 | ^docs$
23 | ^\.vscode$
24 | ^codemeta\.json$
25 | ^data-raw$
26 | ^doc$
27 | ^Meta$
28 | ^README_files$
29 | ^figure$
30 | ^rmarkdown$
31 | ^documentation$
32 | ^cran-comments\.md$
33 | ^CRAN-SUBMISSION$
34 | ^'NULL'$


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # See http://editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | charset = utf-8
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | trim_trailing_whitespace = true
10 | 
11 | [*.{r,R,md,Rmd}]
12 | indent_size = 2
13 | 
14 | [*.{c,h}]
15 | indent_size = 4
16 | 
17 | [*.{cpp,hpp}]
18 | indent_size = 4
19 | 
20 | [{NEWS.md,DESCRIPTION,LICENSE}]
21 | max_line_length = 80
22 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | # pkgdown workflow of the mlr3 ecosystem v0.1.0
 2 | # https://github.com/mlr-org/actions
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 |   release:
11 |     types:
12 |       - published
13 |   workflow_dispatch:
14 | 
15 | name: pkgdown
16 | 
17 | jobs:
18 |   pkgdown:
19 |     runs-on: ubuntu-latest
20 | 
21 |     concurrency:
22 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
23 |     env:
24 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25 |     steps:
26 |       - uses: actions/checkout@v3
27 | 
28 |       - uses: r-lib/actions/setup-pandoc@v2
29 | 
30 |       - uses: r-lib/actions/setup-r@v2
31 | 
32 |       - uses: r-lib/actions/setup-r-dependencies@v2
33 |         with:
34 |           extra-packages: any::pkgdown, local::.
35 |           needs: website
36 | 
37 |       - name: Install template
38 |         run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate")
39 |         shell: Rscript {0}
40 | 
41 |       - name: Build site
42 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
43 |         shell: Rscript {0}
44 | 
45 |       - name: Deploy
46 |         if: github.event_name != 'pull_request'
47 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
48 |         with:
49 |           clean: false
50 |           branch: gh-pages
51 |           folder: docs
52 | 


--------------------------------------------------------------------------------
/.github/workflows/r-cmd-check.yml:
--------------------------------------------------------------------------------
 1 | # r cmd check workflow of the mlr3 ecosystem v0.3.1
 2 | # https://github.com/mlr-org/actions
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       debug_enabled:
 7 |         type: boolean
 8 |         description: 'Run the build with tmate debugging enabled'
 9 |         required: false
10 |         default: false
11 |   push:
12 |     branches:
13 |       - main
14 |   pull_request:
15 |     branches:
16 |       - main
17 | 
18 | name: r-cmd-check
19 | 
20 | jobs:
21 |   r-cmd-check:
22 |     runs-on: ${{ matrix.config.os }}
23 | 
24 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
25 | 
26 |     env:
27 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
28 | 
29 |     strategy:
30 |       fail-fast: false
31 |       matrix:
32 |         config:
33 |           - {os: ubuntu-latest,   r: 'devel'}
34 |           - {os: ubuntu-latest,   r: 'release'}
35 | 
36 |     steps:
37 |       - uses: actions/checkout@v3
38 | 
39 |       - uses: r-lib/actions/setup-pandoc@v2
40 | 
41 |       - uses: r-lib/actions/setup-r@v2
42 |         with:
43 |           r-version: ${{ matrix.config.r }}
44 | 
45 |       - uses: r-lib/actions/setup-r-dependencies@v2
46 |         with:
47 |           extra-packages: any::rcmdcheck
48 |           needs: check
49 | 
50 |       - uses: mxschmitt/action-tmate@v3
51 |         if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }}
52 |         with:
53 |           limit-access-to-actor: true
54 | 
55 |       - uses: r-lib/actions/check-r-package@v2
56 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # History files
  2 | .Rhistory
  3 | .Rapp.history
  4 | 
  5 | # Session Data files
  6 | .RData
  7 | 
  8 | # User-specific files
  9 | .Ruserdata
 10 | 
 11 | # Example code in package build process
 12 | *-Ex.R
 13 | 
 14 | # Output files from R CMD build
 15 | /*.tar.gz
 16 | 
 17 | # Output files from R CMD check
 18 | /*.Rcheck/
 19 | 
 20 | # RStudio files
 21 | .Rproj.user/
 22 | 
 23 | # produced vignettes
 24 | vignettes/*.html
 25 | vignettes/*.pdf
 26 | 
 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 28 | .httr-oauth
 29 | 
 30 | # knitr and R markdown default cache directories
 31 | *_cache/
 32 | /cache/
 33 | 
 34 | # Temporary files created by R markdown
 35 | *.utf8.md
 36 | *.knit.md
 37 | 
 38 | # R Environment Variables
 39 | .Renviron
 40 | 
 41 | # pkgdown site
 42 | docs/
 43 | 
 44 | # General
 45 | .DS_Store
 46 | .AppleDouble
 47 | .LSOverride
 48 | 
 49 | # Icon must end with two \r
 50 | Icon
 51 | 
 52 | 
 53 | 
 54 | # Thumbnails
 55 | ._*
 56 | 
 57 | # Files that might appear in the root of a volume
 58 | .DocumentRevisions-V100
 59 | .fseventsd
 60 | .Spotlight-V100
 61 | .TemporaryItems
 62 | .Trashes
 63 | .VolumeIcon.icns
 64 | .com.apple.timemachine.donotpresent
 65 | 
 66 | # Directories potentially created on remote AFP share
 67 | .AppleDB
 68 | .AppleDesktop
 69 | Network Trash Folder
 70 | Temporary Items
 71 | .apdisk
 72 | 
 73 | # Windows thumbnail cache files
 74 | Thumbs.db
 75 | Thumbs.db:encryptable
 76 | ehthumbs.db
 77 | ehthumbs_vista.db
 78 | 
 79 | # Dump file
 80 | *.stackdump
 81 | 
 82 | # Folder config file
 83 | [Dd]esktop.ini
 84 | 
 85 | # Recycle Bin used on file shares
 86 | $RECYCLE.BIN/
 87 | 
 88 | # Windows Installer files
 89 | *.cab
 90 | *.msi
 91 | *.msix
 92 | *.msm
 93 | *.msp
 94 | 
 95 | # Windows shortcuts
 96 | *.lnk
 97 | 
 98 | .vscode/*
 99 | !.vscode/settings.json
100 | !.vscode/tasks.json
101 | !.vscode/launch.json
102 | !.vscode/extensions.json
103 | *.code-workspace
104 | .vscode
105 | /doc/
106 | /Meta/
107 | 
108 | # Ignore local symlinks
109 | rmarkdown
110 | !inst/rmarkdown
111 | inst/doc
112 | 
113 | documentation
114 | 
115 | paper/submission
116 | paper/submission.zip
117 | 


--------------------------------------------------------------------------------
/.ignore:
--------------------------------------------------------------------------------
1 | man/
2 | attic/
3 | pkgdown/
4 | revdep/
5 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
 1 | linters: linters_with_defaults(
 2 |     # lintr defaults: https://github.com/jimhester/lintr#available-linters
 3 |     # the following setup changes/removes certain linters
 4 |     assignment_linter = NULL, # do not force using <- for assignments
 5 |     object_name_linter = object_name_linter(c("snake_case", "CamelCase")), # only allow snake case and camel case object names
 6 |     cyclocomp_linter = NULL, # do not check function complexity
 7 |     commented_code_linter = NULL, # allow code in comments
 8 |     line_length_linter = line_length_linter(120)
 9 |     )
10 | 
11 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.3.0
2 | Date: 2022-05-10 11:41:20 UTC
3 | SHA: 7f715c247a256d3a09c175fe541b8aac120744c5
4 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: mlr3fairness
 2 | Type: Package
 3 | Title: Fairness Auditing and Debiasing for 'mlr3'
 4 | Version: 0.3.2
 5 | Authors@R:
 6 |     c(
 7 |       person(given = "Florian",
 8 |              family = "Pfisterer",
 9 |              role = c("cre", "aut"),
10 |              email = "pfistererf@googlemail.com",
11 |              comment = c(ORCID = "0000-0001-8867-762X")),
12 |       person(given = "Wei",
13 |                family = "Siyi",
14 |                role = "aut",
15 |                email = "weisiyi2@gmail.com"),
16 |       person(given = "Michel",
17 |              family = "Lang",
18 |              role = "aut",
19 |              email = "michellang@gmail.com",
20 |              comment = c(ORCID = "0000-0001-9754-0393"))
21 |     )
22 | Description:
23 |     Integrates fairness auditing and bias mitigation methods for the 'mlr3' ecosystem.
24 |     This includes fairness metrics, reporting tools, visualizations and bias mitigation techniques such as
25 |     "Reweighing" described in 'Kamiran, Calders' (2012) <doi:10.1007/s10115-011-0463-8>  and
26 |     "Equalized Odds" described in 'Hardt et al.' (2016) <https://papers.nips.cc/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Paper.pdf>.
27 |     Integration with 'mlr3' allows for auditing of ML models as well as convenient joint tuning of
28 |     machine learning algorithms and debiasing methods.
29 | URL: https://mlr3fairness.mlr-org.com, https://github.com/mlr-org/mlr3fairness
30 | BugReports: https://github.com/mlr-org/mlr3fairness/issues
31 | License: LGPL-3
32 | Encoding: UTF-8
33 | LazyData: true
34 | Depends:
35 |     R (>= 3.4.0)
36 | Imports:
37 |     checkmate,
38 |     R6 (>= 2.4.1),
39 |     data.table (>= 1.13.6),
40 |     paradox,
41 |     mlr3 (>= 0.13.0),
42 |     mlr3measures,
43 |     mlr3misc,
44 |     mlr3pipelines,
45 |     mlr3learners,
46 |     rlang,
47 |     ggplot2
48 | Suggests:
49 |     testthat (>= 3.1.0),
50 |     CVXR,
51 |     patchwork,
52 |     rpart,
53 |     ranger,
54 |     mlr3viz,
55 |     linprog,
56 |     rmarkdown,
57 |     knitr,
58 |     posterdown,
59 |     kableExtra,
60 |     fairml,
61 |     iml
62 | RoxygenNote: 7.3.1
63 | Config/testthat/edition: 3
64 | Config/testthat/parallel: false
65 | Roxygen: list(markdown = TRUE)
66 | VignetteBuilder: knitr
67 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(compare_metrics,BenchmarkResult)
 4 | S3method(compare_metrics,PredictionClassif)
 5 | S3method(compare_metrics,ResampleResult)
 6 | S3method(fairness_accuracy_tradeoff,BenchmarkResult)
 7 | S3method(fairness_accuracy_tradeoff,PredictionClassif)
 8 | S3method(fairness_accuracy_tradeoff,ResampleResult)
 9 | S3method(fairness_prediction_density,BenchmarkResult)
10 | S3method(fairness_prediction_density,PredictionClassif)
11 | S3method(fairness_prediction_density,ResampleResult)
12 | S3method(fairness_tensor,PredictionClassif)
13 | S3method(fairness_tensor,ResampleResult)
14 | S3method(fairness_tensor,data.table)
15 | export(LearnerClassifFairfgrrm)
16 | export(LearnerClassifFairzlrm)
17 | export(LearnerRegrFairfrrm)
18 | export(LearnerRegrFairnclm)
19 | export(LearnerRegrFairzlm)
20 | export(MeasureFairness)
21 | export(MeasureFairnessComposite)
22 | export(MeasureFairnessConstraint)
23 | export(MeasurePositiveProbability)
24 | export(MeasureSubgroup)
25 | export(PipeOpEOd)
26 | export(PipeOpExplicitPta)
27 | export(PipeOpReweighingOversampling)
28 | export(PipeOpReweighingWeights)
29 | export(compare_metrics)
30 | export(compute_metrics)
31 | export(fairness_accuracy_tradeoff)
32 | export(fairness_prediction_density)
33 | export(fairness_tensor)
34 | export(groupdiff_absdiff)
35 | export(groupdiff_diff)
36 | export(groupdiff_tau)
37 | export(groupwise_metrics)
38 | export(mlr_learners_fairness)
39 | export(mlr_measures_fairness)
40 | export(report_datasheet)
41 | export(report_fairness)
42 | export(report_modelcard)
43 | export(task_summary)
44 | import(R6)
45 | import(checkmate)
46 | import(data.table)
47 | import(ggplot2)
48 | import(mlr3)
49 | import(mlr3learners)
50 | import(mlr3measures)
51 | import(mlr3misc)
52 | import(mlr3pipelines)
53 | import(paradox)
54 | importFrom(rlang,.data)
55 | importFrom(stats,dist)
56 | importFrom(stats,na.omit)
57 | importFrom(stats,predict)
58 | importFrom(stats,runif)
59 | importFrom(stats,setNames)
60 | importFrom(utils,bibentry)
61 | importFrom(utils,data)
62 | importFrom(utils,getFromNamespace)
63 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # mlr3fairness 0.3.2
 2 | * The `Task` printer now prints the protected attribute.
 3 | * `fairness_ tensor` has a new argument `normalize` that controls normalization.
 4 |   Possible values are `"all"` (global) `"group"` (by group) and `"none"` (no normalization).
 5 | *  Fix: unloading `leaerners`, `tasks`, `measures` and `pipeops` now removes them from dictionary.
 6 | 
 7 | # mlr3fairness 0.3.1
 8 | 
 9 | * Minor update to improve stability of unit tests and vignette building on CRAN.
10 | 
11 | # mlr3fairness 0.3.0
12 | 
13 | * CRAN release version
14 | * Minor improvements for `groupwise_metrics`, can now `intersect` multiple protected attributes.
15 | * New metric, `fairness.pp` that allows for computing predictive parity.
16 | * New metric, `fairness.cv` that allows for computing the Calders-Wevers gap.
17 | * Add `PipeOpExplicitPta` thtat copies the `pta` column into a separate column.
18 | * Minor bug fixes and improved tests for multiple learnes and pipeops.
19 | 
20 | # mlr3fairness 0.2.0
21 | 
22 | * Added 3 types of reports: `report_modelcard`, `report_datasheet`, `report_fairness`
23 | * Added several new fairness metrics
24 | * Added 5 new learners (2 classification 3 regression) from package `fairml`.
25 |   * classification: `classif.fairzlrm` | `classif.fairfgrrm`
26 |   * regression:     `regr.fairnclm` | `regr.fairzlm` | `regr.fairfrrm`.
27 | * Added `MeasureSubgroup` and `groupwise_metrics` that allow for inspecting dis-aggregated fairness metrics. 
28 | 
29 | # mlr3fairness 0.1.0
30 | 
31 | * Added a `NEWS.md` file to track changes to the package.
32 | 
33 | 


--------------------------------------------------------------------------------
/R/MeasureFairness.R:
--------------------------------------------------------------------------------
 1 | #' @title Base Measure for Fairness
 2 | #'
 3 | #' @description
 4 | #' This measure extends [mlr3::Measure()] with statistical group fairness:
 5 | #' A common approach to quantifying a model's fairness is to compute the difference between a
 6 | #' protected and an unprotected group according w.r.t. some performance metric, e.g.
 7 | #' `classification error` ([mlr_measures_classif.ce]) or `false positive rate`
 8 | #' ([mlr_measures_classif.fpr]).
 9 | #' The operation for comparison (e.g., difference or quotient) can be specified using the `operation`
10 | #' parameter, e.g. [groupdiff_absdiff()] or [groupdiff_tau()].
11 | #'
12 | #' Composite measures encompasing multiple fairness metrics can be built using
13 | #' [MeasureFairnessComposite].
14 | #'
15 | #' Some popular predefined measures can be found in the [dictionary][mlr3misc::Dictionary] [mlr_measures].
16 | #'
17 | #' @template pta
18 | #' 
19 | #' @seealso [MeasureFairnessComposite]
20 | #' @export
21 | #' @examples
22 | #' library("mlr3")
23 | #' # Create MeasureFairness to measure the Predictive Parity.
24 | #' t = tsk("adult_train")
25 | #' learner = lrn("classif.rpart", cp = .01)
26 | #' learner$train(t)
27 | #' measure = msr("fairness", base_measure = msr("classif.ppv"))
28 | #' predictions = learner$predict(t)
29 | #' predictions$score(measure, task = t)
30 | MeasureFairness = R6::R6Class("MeasureFairness", inherit = Measure,
31 |   public = list(
32 |     #' @template field_base_measure
33 |     base_measure = NULL,
34 | 
35 |     #' @template field_operation
36 |     operation = NULL,
37 | 
38 |     #' @description
39 |     #' Creates a new instance of this [R6][R6::R6Class] class.
40 |     #'
41 |     #' @param id (`character`)\cr
42 |     #'   The measure's id. Set to 'fairness.<base_measure_id>' if ommited.
43 |     #' @template param_base_measure
44 |     #' @param operation (`function`)\cr
45 |     #'   The operation used to compute the difference. A function that returns
46 |     #'   a single value given input: computed metric for each subgroup.
47 |     #'   Defaults to [groupdiff_absdiff].
48 |     #' @param minimize (`logical()`)\cr
49 |     #'   Should the measure be minimized? Defaults to `TRUE`.
50 |     #' @param range (`numeric(2)`)\cr
51 |     #'   Range of the resulting measure. Defaults to `c(-Inf, Inf)`.
52 |     initialize = function(id = NULL, base_measure, operation = groupdiff_absdiff, minimize = TRUE,
53 |       range = c(-Inf, Inf)) {
54 |       self$operation = assert_function(operation)
55 |       self$base_measure = assert_measure(as_measure(base_measure))
56 | 
57 |       if (is.null(id)) {
58 |         id = replace_prefix(base_measure$id, mlr_reflections$task_types$type, "fairness.")
59 |       }
60 |       super$initialize(
61 |         id = id,
62 |         range = range,
63 |         task_type = self$base_measure$task_type,
64 |         properties = "requires_task",
65 |         minimize = minimize,
66 |         predict_type = base_measure$predict_type,
67 |         packages = "mlr3fairness",
68 |         man = "mlr_measures_fairness"
69 |       )
70 |     }
71 |   ),
72 | 
73 |   private = list(
74 |     .score = function(prediction, task, ...) {
75 |       assert_pta_task(task)
76 |       mvals = score_groupwise(prediction, self$base_measure, task, ...)
77 |       invoke(self$operation, mvals)
78 |     }
79 |   )
80 | )
81 | 
82 | mlr_measures$add("fairness", MeasureFairness)
83 | 


--------------------------------------------------------------------------------
/R/MeasureFairnessComposite.R:
--------------------------------------------------------------------------------
 1 | #' @title Composite Fairness Measure
 2 | #'
 3 | #' @description
 4 | #' Computes a composite measure from multiple fairness metrics and aggregates them
 5 | #' using `aggfun` (defaulting to [mean()]).
 6 | #'
 7 | #' @template pta
 8 | #' 
 9 | #' @export
10 | #' @examples
11 | #' library("mlr3")
12 | #' # Equalized Odds Metric
13 | #' MeasureFairnessComposite$new(measures = msrs(c("fairness.fpr", "fairness.tpr")))
14 | #'
15 | #' # Other metrics e.g. based on negative rates
16 | #' MeasureFairnessComposite$new(measures = msrs(c("fairness.fnr", "fairness.tnr")))
17 | MeasureFairnessComposite = R6::R6Class("MeasureFairnessComposite", inherit = Measure,
18 |   public = list(
19 |     #' @description
20 |     #' Creates a new instance of this [R6][R6::R6Class] class.
21 |     #'
22 |     #' @param id (`character(1)`)\cr
23 |     #'   Id of the measure. Defaults to the concatenation of ids in `measure`.
24 |     #' @param measures (list of [MeasureFairness])\cr
25 |     #'   List of fairness measures to aggregate.
26 |     #' @param aggfun (`function()`)\cr
27 |     #'   Aggregation function used to aggregate results from respective measures. Defaults to `sum`.
28 |     #' @param operation (`function()`)\cr
29 |     #'   The operation used to compute the difference. A function that returns
30 |     #'   a single value given input: computed metric for each subgroup.
31 |     #'   Defaults to `groupdiff_absdiff`.
32 |     #'   See `MeasureFairness` for more information.
33 |     #' @param minimize (`logical(1)`)\cr
34 |     #'   Should the measure be minimized? Defaults to `TRUE`.
35 |     #' @param range (`numeric(2)`)\cr
36 |     #'   Range of the resulting measure. Defaults to `c(-Inf, Inf)`.
37 |     initialize = function(id = NULL, measures, aggfun = function(x) mean(x),
38 |       operation = groupdiff_absdiff, minimize = TRUE, range = c(-Inf, Inf)) {
39 | 
40 |       private$.measures = assert_measures(as_measures(measures))
41 |       private$.aggfun = assert_function(aggfun)
42 |       assert_true(all(map_chr(private$.measures, "task_type") == private$.measures[[1]]$task_type))
43 | 
44 |       if (is.null(id)) {
45 |         id = paste0(replace_prefix(ids(measures), c(mlr_reflections$task_types$type, "fairness"), ""),
46 |           collapse = "_")
47 |       }
48 | 
49 |       super$initialize(
50 |         id = sprintf("fairness.%s", assert_string(id)),
51 |         range = range,
52 |         properties = "requires_task",
53 |         minimize = minimize,
54 |         predict_type = unique(unlist(map(measures, "predict_type"))),
55 |         task_type = private$.measures[[1]]$task_type,
56 |         packages = "mlr3fairness",
57 |         man = "mlr_measures_fairness_composite"
58 |       )
59 |     }
60 |   ),
61 | 
62 |   private = list(
63 |     .measures = NULL,
64 |     .aggfun = NULL,
65 |     .score = function(prediction, task, ...) {
66 |       private$.aggfun(
67 |         map_dbl(private$.measures, function(m) {
68 |           prediction$score(m, task = task, ...)
69 |         })
70 |       )
71 |     }
72 |   )
73 | )
74 | 


--------------------------------------------------------------------------------
/R/MeasureFairnessConstraint.R:
--------------------------------------------------------------------------------
 1 | #' @title Fairness Constraint Measure
 2 | #'
 3 | #' @description
 4 | #'   This measure allows constructing for 'constraint' measures of the following form:\cr
 5 | #'   \deqn{min performance subject to fairness < \epsilon}
 6 | #'
 7 | #' @template pta
 8 | #' @seealso mlr_measures_fairness
 9 | #' @export
10 | #' @examples
11 | #' # Accuracy subject to equalized odds fairness constraint:
12 | #' library("mlr3")
13 | #' t = tsk("adult_train")
14 | #' learner = lrn("classif.rpart", cp = .01)
15 | #' learner$train(t)
16 | #' measure = msr("fairness.constraint", id = "acc_tpr", msr("classif.acc"), msr("fairness.tpr"))
17 | #' predictions = learner$predict(t)
18 | #' predictions$score(measure, task = t)
19 | MeasureFairnessConstraint = R6::R6Class("MeasureFairnessConstraint", inherit = Measure,
20 |   public = list(
21 |     #' @field performance_measure (`Measure()`)\cr
22 |     #' The performance measure to be used.
23 |     performance_measure = NULL,
24 |     #' @field fairness_measure (`Measure()`)\cr
25 |     #' The fairness measure to be used.
26 |     fairness_measure = NULL,
27 |     #' @field epsilon (`numeric`)\cr
28 |     #' Deviation from perfect fairness that is allowed.
29 |     epsilon = NULL,
30 | 
31 |     #' @description
32 |     #' Creates a new instance of this [R6][R6::R6Class] class.
33 |     #' @param id (`character`)\cr
34 |     #'   The measure's id. Set to 'fairness.<base_measure_id>' if ommited.
35 |     #' @param performance_measure (`Measure()`)\cr
36 |     #' The measure used to measure performance (e.g. accuracy).
37 |     #' @param fairness_measure (`Measure()`)\cr
38 |     #' The measure used to measure fairness (e.g. equalized odds).
39 |     #' @param epsilon (`numeric`)\cr
40 |     #' Allowed divergence from perfect fairness. Initialized to 0.01.
41 |     #' @param range (`numeric`)\cr
42 |     #' Range of the resulting measure. Defaults to `c(-Inf, Inf)`.
43 |     initialize = function(
44 |       id = NULL,  performance_measure, fairness_measure, epsilon = 0.01, range = c(-Inf, Inf)) {
45 |       self$performance_measure = assert_measure(performance_measure)
46 |       self$fairness_measure = assert_measure(fairness_measure)
47 |       assert_true(all(self$performance_measure$task_type == self$fairness_measure$task_type))
48 |       self$epsilon = assert_number(epsilon)
49 | 
50 |       # fix up prefixes: regr|classif|... to fairness
51 |       metrics_short = gsub(
52 |         paste0(c(mlr_reflections$task_types$type, "fairness"), collapse = "|"),
53 |         "", c(performance_measure$id, fairness_measure$id))
54 |       if (is.null(id)) {
55 |         id = paste0("fairness.", paste0(gsub("\\.", "", metrics_short), collapse = "_"), "_cstrt")
56 |       }
57 |       super$initialize(
58 |         id = id,
59 |         range = assert_numeric(range, len = 2),
60 |         properties = "requires_task",
61 |         task_type = self$performance_measure$task_type,
62 |         minimize = assert_flag(self$performance_measure$minimize),
63 |         predict_type = performance_measure$predict_type,
64 |         packages = "mlr3fairness",
65 |         man = "mlr_measures_fairness_constraint"
66 |       )
67 |     }
68 |   ),
69 | 
70 |   private = list(
71 |     .score = function(prediction, task, ...) {
72 |       assert_pta_task(task)
73 |       eps = self$epsilon
74 |       if (!self$fairness_measure$minimize) { # nocov start
75 |         stop("Only minimized fairness measures are currently supported!")
76 |       } # nocov end
77 |       fair = self$fairness_measure$score(prediction, task, ...)
78 |       perf = self$performance_measure$score(prediction, task, ...)
79 | 
80 |       assert_number(perf, lower = 0)
81 |       prange = self$performance_measure$range
82 |       frange = self$fairness_measure$range
83 |       opt_fairness = ifelse(self$fairness_measure$minimize, min(frange), max(frange))
84 | 
85 |       if (is.infinite(opt_fairness)) warning("Fairness measure has infinite range!")
86 |       is_fair = abs(opt_fairness - fair) < eps
87 |       if (self$minimize) {
88 |         out = (!is_fair) * (max(prange) + fair) + (is_fair) * perf
89 |       } else {
90 |         out = (!is_fair) * (min(prange) - fair) + (is_fair) * perf
91 |       }
92 |       return(out)
93 |     }
94 |   )
95 | )
96 | 


--------------------------------------------------------------------------------
/R/MeasurePositiveProbability.R:
--------------------------------------------------------------------------------
 1 | #' @title Positive Probability Measure
 2 | #' @name mlr_measures_positive_probability
 3 | #'
 4 | #' @description
 5 | #' Return the probabiliy of a positive prediction, often known as 'Calders-Wevers' gap.
 6 | #' This is defined as count of positive predictions divided by the number of observations.
 7 | #'
 8 | #' @export
 9 | #' @examples
10 | #' library("mlr3")
11 | #' # Create Positive Probability Measure
12 | #' t = tsk("adult_train")
13 | #' learner = lrn("classif.rpart", cp = .01)
14 | #' learner$train(t)
15 | #' measure = msr("classif.pp")
16 | #' predictions = learner$predict(t)
17 | #' predictions$score(measure, task = t)
18 | MeasurePositiveProbability = R6::R6Class("MeasurePositiveProbability",
19 |   inherit = mlr3::Measure,
20 |   public = list(
21 | 
22 |     #' @description
23 |     #' Initialize a Measure Positive Probability Object
24 |     initialize = function() {
25 |       super$initialize(
26 |         id = "classif.pp",
27 |         predict_type = "response",
28 |         range = c(0, 1),
29 |         minimize = FALSE,
30 |         task_type = "classif"
31 |       )
32 |     }
33 |   ),
34 | 
35 |   private = list(
36 |     .score = function(prediction, task, ...) {
37 |       mean(prediction$response == task$positive)
38 |     }
39 |   )
40 | )
41 | 
42 | mlr_measures$add("classif.pp", MeasurePositiveProbability)
43 | 


--------------------------------------------------------------------------------
/R/MeasureSubgroup.R:
--------------------------------------------------------------------------------
  1 | #' @title Evaluate a metric on a subgroup
  2 | #'
  3 | #' @description
  4 | #' Allows for calculation of arbitrary [mlr3::Measure()]s on a selected sub-group.
  5 | #'
  6 | #' @seealso [MeasureFairness], [groupwise_metrics]
  7 | #' @export
  8 | #' @examples
  9 | #' library("mlr3")
 10 | #' # Create MeasureFairness to measure the Predictive Parity.
 11 | #' t = tsk("adult_train")
 12 | #' learner = lrn("classif.rpart", cp = .01)
 13 | #' learner$train(t)
 14 | #' measure = msr("subgroup", base_measure = msr("classif.acc"), subgroup = "Female")
 15 | #' predictions = learner$predict(t)
 16 | #' predictions$score(measure, task = t)
 17 | MeasureSubgroup = R6::R6Class("MeasureSubgroup", inherit = Measure,
 18 |   public = list(
 19 |     #' @template field_base_measure
 20 |     base_measure = NULL,
 21 | 
 22 |     #' @field subgroup (`character`)|(`integer`)\cr
 23 |     #' Subgroup identifier.
 24 |     subgroup = NULL,
 25 | 
 26 |     #' @field intersect (`logical`)\cr
 27 |     #' Should groups be intersected?
 28 |     intersect = NULL,
 29 | 
 30 |     #' @description
 31 |     #' Creates a new instance of this [R6][R6::R6Class] class.
 32 |     #'
 33 |     #' @param id (`character`)\cr
 34 |     #'   The measure's id. Set to 'fairness.<base_measure_id>' if ommited.
 35 |     #' @param base_measure (`Measure()`)\cr
 36 |     #'   The measure used to measure fairness.
 37 |     #' @param subgroup (`character`)|(`integer`)\cr
 38 |     #' Subgroup identifier. Either value for the protected attribute or position in `task$levels`.
 39 |     #' @param intersect [`logical`] \cr
 40 |     #'  Should multiple pta groups be intersected? Defaults to `TRUE`.
 41 |     #'  Only relevant if more than one `pta` columns are provided.
 42 |     initialize = function(id = NULL, base_measure, subgroup, intersect = TRUE) {
 43 |       self$base_measure = assert_measure(as_measure(base_measure))
 44 |       assert(check_character(subgroup), check_integer(subgroup))
 45 |       self$subgroup = subgroup
 46 |       self$intersect = assert_flag(intersect)
 47 | 
 48 |       if (is.null(id)) {
 49 |         id = replace_prefix(base_measure$id, mlr_reflections$task_types$type, "subgroup.")
 50 |         id = paste(id, self$subgroup, sep = "_")
 51 |       }
 52 |       super$initialize(
 53 |         id = id,
 54 |         range = self$base_measure$range,
 55 |         task_type = self$base_measure$task_type,
 56 |         properties = "requires_task",
 57 |         minimize = self$base_measure$minimize,
 58 |         predict_type = self$base_measure$predict_type,
 59 |         packages = "mlr3fairness",
 60 |         man = "mlr_measures_subgroup"
 61 |       )
 62 |     }
 63 |   ),
 64 | 
 65 |   private = list(
 66 |     .score = function(prediction, task, ...) {
 67 |       assert_pta_task(task)
 68 | 
 69 |       groups = get_pta(task, prediction$row_ids, intersect = self$intersect)
 70 |       nms = copy(names(groups))
 71 |       # Convert to a data.table
 72 |       if (!is.data.table(self$subgroup)) {
 73 |         self$subgroup = as.data.table(setNames(list(self$subgroup), nms))
 74 |       }
 75 |       assert_subset(unlist(self$subgroup), unlist(map(groups, function(x) {unique(as.character(x))})))
 76 |       groups[, row_ids := prediction$row_ids]
 77 |       rws = intersect(prediction$row_ids, groups[self$subgroup, on = nms]$row_ids)
 78 |       prediction$clone()$filter(rws)$score(self$base_measure, task = task, ...)
 79 |     }
 80 |   )
 81 | )
 82 | 
 83 | #' @title Evaluate a metric on each protected subgroup in a task.
 84 | #'
 85 | #' @description
 86 | #' Instantiates one new measure per protected attribute group in a task.
 87 | #' Each metric is then evaluated only on predictions made for the given specific subgroup.
 88 | #'
 89 | #' @template param_base_measure
 90 | #' @param task [`Task`] \cr
 91 | #'   [mlr3::Task()] to instantiate measures for.
 92 | #' @param intersect [`logical`] \cr
 93 | #'  Should multiple pta groups be intersected? Defaults to `TRUE`.
 94 | #'  Only relevant if more than one `pta` columns are provided.
 95 | #' @seealso [MeasureSubgroup]
 96 | #' @export
 97 | #' @examples
 98 | #'   library("mlr3")
 99 | #'   t = tsk("compas")
100 | #'   l = lrn("classif.rpart")
101 | #'   m = groupwise_metrics(msr("classif.acc"), t)
102 | #'   l$train(t)$predict(t)$score(m, t)
103 | #' @return `list` \cr
104 | #' List of [mlr3::Measure]s.
105 | groupwise_metrics = function(base_measure, task, intersect = TRUE) {
106 |   assert_pta_task(task)
107 |   base_measure = assert_measure(as_measure(base_measure))
108 |   pta = get_pta(task, rows = NULL, intersect = intersect)
109 |   unique_groups = unlist(map(pta, function(x) as.character(unique(x))))
110 |   map(unique_groups, MeasureSubgroup$new, base_measure = base_measure, id = NULL, intersect = intersect)
111 | }
112 | 


--------------------------------------------------------------------------------
/R/PipeOpExplicitPta.R:
--------------------------------------------------------------------------------
 1 | #' @title PipeOpExplicitPta
 2 | #' 
 3 | #' @usage NULL
 4 | #' @name mlr_pipeops_explicit_pta
 5 | #' @format [R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`].
 6 | #'
 7 | #' @description
 8 | #'   Turns the column with column role 'pta' into an explicit separate column prefixed with ".._internal_pta_".
 9 | #'   This keeps it from getting changed or adapted by subsequent pipelines that operate on the feature pta.
10 | #' 
11 | #' @section Construction:
12 | #' ```
13 | #' PipeOpExplicitPta$new(id = "reweighing", param_vals = list())
14 | #' ```
15 | #' * `id` (`character(1)`).
16 | #' * `param_vals` (`list()`)
17 | #' 
18 | #' @section Input and Output Channels:
19 | #' Input and output channels are inherited from [PipeOpTaskPreproc]. Instead of a [Task][mlr3::Task], a
20 | #' [TaskClassif][mlr3::TaskClassif] is used as input and output during training and prediction.
21 | #'
22 | #' The output during training is the input [Task][mlr3::Task] with added weights column according
23 | #' to target class. The output during prediction is the unchanged input.
24 | #'
25 | #
26 | #' @section State:
27 | #' The `$state` is a named `list` with the `$state` elements inherited from [PipeOpTaskPreproc][mlr3pipelines::PipeOpTaskPreproc].
28 | #'
29 | #' @section Parameters:
30 | #' The PipeOp does not have any hyperparameters.
31 | #' 
32 | #' @section Internals:
33 | #' Copies the existing pta column to a new column.
34 | #' 
35 | #' @section Fields:
36 | #' Only fields inherited from [PipeOpTaskPreproc]/[`PipeOp`].
37 | #'
38 | #' @section Methods:
39 | #' Methods inherited from [PipeOpTaskPreproc][mlr3pipelines::PipeOpTaskPreproc]/[PipeOp][mlr3pipelines::PipeOp].
40 | #' 
41 | #' @family PipeOps
42 | #' @seealso https://mlr3book.mlr-org.com/list-pipeops.html
43 | #' @export
44 | #' @examples
45 | #' library("mlr3")
46 | #' library("mlr3pipelines")
47 | #' epta = po("explicit_pta")
48 | #' new = epta$train(list(tsk("adult_train")))
49 | PipeOpExplicitPta = R6Class("PipeOpExplicitPta",
50 |   inherit = PipeOpTaskPreproc,
51 |   public = list(
52 |     #' @description
53 |     #' Creates a new instance of this [R6][R6::R6Class][PipeOp] R6 class.
54 |     #'
55 |     #' @param id `character` \cr
56 |     #'   The PipeOps identifier in the PipeOps library.
57 |     #' @param param_vals `list` \cr
58 |     #'   The parameter values to be set. See `Parameters`.
59 |     initialize = function(id = "explicit_pta", param_vals = list()) {
60 |       super$initialize(id, param_set = ParamSet$new(), param_vals = param_vals)
61 |     }
62 |   ),
63 | 
64 |   private = list(
65 |     .train_task = function(task) {
66 |         private$.transform_task(task)
67 |     },
68 | 
69 |     .predict_task = function(task) {
70 |         private$.transform_task(task)
71 |     },
72 | 
73 |     .transform_task = function(task) {
74 |       assert_pta_task(task)
75 |       pta = task$col_roles$pta
76 |       if (any(startsWith(task$feature_names, "..internal_pta"))) {
77 |           stop("Task already has an explicit ..internal_pta column")
78 |       }
79 |       dt = task$data(cols = c(task$backend$primary_key, pta))
80 |       newpta = paste0("..internal_pta_", pta)
81 |       setnames(dt, pta, newpta)
82 |       task$cbind(dt)
83 |       task$set_col_roles(newpta, "pta")  
84 |       task$set_col_roles(pta, remove_from =  "pta")  
85 |       return(task)
86 |     }
87 |   )
88 | )
89 | 


--------------------------------------------------------------------------------
/R/aaa.R:
--------------------------------------------------------------------------------
1 | 
2 | learners = list()
3 | pipeops = list()
4 | measures = list()
5 | tasks = list()
6 | 


--------------------------------------------------------------------------------
/R/adult.R:
--------------------------------------------------------------------------------
 1 | #' @title Adult Dataset
 2 | #'
 3 | #' @name adult
 4 | #' @aliases adult_test
 5 | #' @aliases adult_train
 6 | #'
 7 | #' @description
 8 | #' Dataset used to predict whether income exceeds $50K/yr based on census data.
 9 | #' Also known as "Census Income" dataset
10 | #' Train dataset contains 13 features and 30178 observations.
11 | #' Test dataset contains 13 features and 15315 observations.
12 | #' Target column is "target": A binary factor where 1: <=50K and 2: >50K for annual income.
13 | #' The column `"sex"` is set as protected attribute.
14 | #' 
15 | #' @section Derived tasks:
16 | #' * `adult_train`: Original train split for the adult task available at UCI.
17 | #' * `adult_test`: Original test split for the adult task available at UCI.
18 | #' 
19 | #' @section Using Adult - Known Problems:
20 | #' The adult dataset has several known limitations such as its age, limited documentation, and outdated feature encodings (Ding et al., 2021). 
21 | #' Furthermore, the selected threshold (income <=50K) has strong implications on the outcome of analysis, such that 
22 | #' "In many cases, the $50k threshold understates and misrepresents the broader picture" (Ding et al., 2021). 
23 | #' As a result, conclusions w.r.t. real-world implications are severely limited.
24 | #' 
25 | #' We decide to replicate the dataset here, as it is a widely used benchmark dataset and it can still serve this purpose.
26 | #'
27 | #' @section Pre-processing:
28 | #' * `fnlwgt` Remove final weight, which is the number of people the census believes the entry represents
29 | #' * `native-country` Remove Native Country, which is the country of origin for an individual
30 | #' * Rows containing `NA` in workclass and occupation have been removed.
31 | #' * Pre-processing inspired by article: @url https://cseweb.ucsd.edu//classes/sp15/cse190-c/reports/sp15/048.pdf
32 | #'
33 | #' @section Metadata:
34 | #' * (integer) age: The age of the individuals
35 | #' * (factor) workclass: A general term to represent the employment status of an individual
36 | #' * (factor) education: The highest level of education achieved by an individual.
37 | #' * (integer) education_num: the highest level of education achieved in numerical form.
38 | #' * (factor) marital_status: marital status of an individual.
39 | #' * (factor) occupation: the general type of occupation of an individual
40 | #' * (factor) relationship: whether the individual is in a relationship-
41 | #' * (factor) race: Descriptions of an individual’s race
42 | #' * (factor) sex: the biological sex of the individual
43 | #' * (integer) captain-gain: capital gains for an individual
44 | #' * (integer) captain-loss: capital loss for an individual
45 | #' * (integer) hours-per-week: the hours an individual has reported to work per week
46 | #' * (factor) target: whether or not an individual makes more than $50,000 annually
47 | #'
48 | #' @source
49 | #' `r format_bib("dua_2019")`
50 | #' `r format_bib("ding2021retiring")`
51 | #'
52 | #' @docType data
53 | #' @keywords data
54 | #' @examples
55 | #' library("mlr3")
56 | #' data("adult_test", package = "mlr3fairness")
57 | #' data("adult_train", package = "mlr3fairness")
58 | NULL
59 | 
60 | get_adult_task_train = function() { # nocov start
61 |   b = as_data_backend(mlr3fairness::adult_train)
62 |   task = mlr3::TaskClassif$new("adult_train", b, target = "target")
63 |   task$col_roles$pta = "sex"
64 |   b$hash = task$man = "mlr3fairness::mlr_tasks_adult_train"
65 |   task
66 | } # nocov end
67 | 
68 | get_adult_task_test = function() { # nocov start
69 |   b = as_data_backend(mlr3fairness::adult_test)
70 |   task = mlr3::TaskClassif$new("adult_test", b, target = "target")
71 |   task$col_roles$pta = "sex"
72 |   b$hash = task$man = "mlr3fairness::mlr_tasks_adult_test"
73 |   task
74 | } # nocov end
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/R/assertions.R:
--------------------------------------------------------------------------------
 1 | #' @title Assertions for mlr3fairness
 2 | #'
 3 | #' @description
 4 | #' Functions intended to be used in packages extending \pkg{mlr3fairness}.
 5 | #' Most assertion functions ensure the right class attribute, and optionally additional properties.
 6 | #'
 7 | #' If an assertion fails, an exception is raised.
 8 | #' Otherwise, the input object is returned invisibly.
 9 | #'
10 | #' @noRd
11 | NULL
12 | 
13 | # Assert task contains a pta column. 
14 | # Single: Has only one pta column.
15 | assert_pta_task = function(task, measure = NULL, single = FALSE) {
16 |   if (length(task$col_roles$pta) == 0L) {
17 |     stopf("Task '%s' must have a column with role 'pta' (protected attribute) for fairness operations", task$id)
18 |   }
19 |   if (single && length(task$col_roles$pta) > 1L) {
20 |     stopf("The operation only allows for a single column specified as 'pta'.", task$id)
21 |   }
22 |   assert_task(task)
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/R/bibentries.R:
--------------------------------------------------------------------------------
  1 | #' @importFrom utils bibentry
  2 | bibentries = c(
  3 |   dua_2019 = bibentry("misc",
  4 |     author = "Dua, Dheeru and Graff, Casey",
  5 |     year = "2017",
  6 |     title = "UCI Machine Learning Repository",
  7 |     url = "http://archive.ics.uci.edu/ml/",
  8 |     institution = "University of California, Irvine, School of Information and Computer Sciences"
  9 |   ),
 10 | 
 11 |   hardt_2016 = bibentry("inproceedings",
 12 |     author = "Moritz Hardt and Eric Price and Nathan Srebro",
 13 |     title = "Equality of Opportunity in Supervised Learning",
 14 |     url = "https://papers.nips.cc/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Paper.pdf",
 15 |     booktitle = "Advances in Neural Information Processing Systems",
 16 |     volume = "29",
 17 |     year = "2016",
 18 |     pages = "3315--3323"
 19 |   ),
 20 | 
 21 |   pleiss_2017 = bibentry("inproceedings",
 22 |     author = "Pleiss, Geoff and Raghavan, Manish and Wu, Felix and Kleinberg, Jon and Weinberger, Kilian Q",
 23 |     booktitle = "Advances in Neural Information Processing Systems",
 24 |     editor = "I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett",
 25 |     publisher = "Curran Associates, Inc.",
 26 |     title = "On Fairness and Calibration",
 27 |     url = "https://proceedings.neurips.cc/paper/2017/file/b8b9c74ac526fffbeb2d39ab038d1cd7-Paper.pdf",
 28 |     volume = "30",
 29 |     year = "2017"
 30 |   ),
 31 | 
 32 |   kamiran12 = bibentry("article",
 33 |     title="Data preprocessing techniques for classification without discrimination",
 34 |     author="Kamiran, Faisal and Calders, Toon",
 35 |     journal="Knowledge and Information Systems",
 36 |     volume="33",
 37 |     number="1",
 38 |     pages="1--33",
 39 |     year="2012",
 40 |     publisher="Springer"
 41 |   ),
 42 | 
 43 |   datasheets = bibentry("article",
 44 |     title="Datasheets for datasets",
 45 |     author="Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daume III, Hal and Crawford, Kate",
 46 |     journal="arXiv preprint arXiv:1803.09010",
 47 |     year="2018"
 48 |   ),
 49 | 
 50 |   modelcards = bibentry("inproceedings",
 51 |     title="Model cards for model reporting",
 52 |     author="Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit",
 53 |     booktitle="Proceedings of the conference on fairness, accountability, and transparency",
 54 |     pages="220--229",
 55 |     year="2019"
 56 |   ),
 57 | 
 58 |   komiyama = bibentry("inproceedings",
 59 |     title="Nonconvex Optimization for Regression with Fairness Constraints",
 60 |     author="Komiyama J, Takeda A, Honda J, Shimao H",
 61 |     booktitle="Proceedings of the 35th International Conference on Machine Learning (ICML), PMLR 80",
 62 |     pages="2737-2746",
 63 |     year="2018"
 64 |   ),
 65 | 
 66 |   zafar19a = bibentry("inproceedings",
 67 |     title="Fairness Constraints: a Flexible Approach for Fair Classification",
 68 |     author="Zafar BJ, Valera I, Gomez-Rodriguez M, Gummadi KP",
 69 |     booktitle="Journal of Machine Learning Research, 30",
 70 |     pages="1-42",
 71 |     year="2019"
 72 |   ),
 73 | 
 74 |   scutari21 = bibentry("article",
 75 |     title="Achieving Fairness with a Simple Ridge Penalty",
 76 |     author="Marco Scutari and Francesca Panero and Manuel Proissl",
 77 |     journal="arXiv preprint arXiv:2105.13817",
 78 |     year="2021"
 79 |   ),
 80 | 
 81 |   saleiro18 = bibentry("article",
 82 |     title="Aequitas: A bias and fairness audit toolkit",
 83 |     author="Saleiro, Pedro and Kuester, Benedict and Hinkson, Loren and London, Jesse and Stevens, Abby and Anisfeld, Ari and Rodolfa, Kit T and Ghani, Rayid",
 84 |     journal="arXiv preprint arXiv:1811.05577",
 85 |     year="2018"
 86 |   ),
 87 | 
 88 |   bao2021s = bibentry("inproceedings",
 89 |     title="It's COMPASlicated: The Messy Relationship between RAI Datasets and Algorithmic Fairness Benchmarks",
 90 |     author="Bao, Michelle and Zhou, Angela and Zottola, Samantha A and Brubach, Brian and Desmarais, Sarah and Horowitz, Aaron Seth and Lum, Kristian and Venkatasubramanian, Suresh",
 91 |     booktitle="Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)",
 92 |     year="2021"
 93 |   ),
 94 | 
 95 |   ding2021retiring = bibentry("inproceedings",
 96 |     title="Retiring adult: New datasets for fair machine learning",
 97 |     author="Ding, Frances and Hardt, Moritz and Miller, John and Schmidt, Ludwig",
 98 |     booktitle="Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)",
 99 |     year="2021"
100 |   )
101 | 
102 | )
103 | 


--------------------------------------------------------------------------------
/R/compare_metrics.R:
--------------------------------------------------------------------------------
 1 | #' Compare different metrics
 2 | #' 
 3 | #' @rdname fairness_compare_metrics
 4 | #'
 5 | #' @description
 6 | #' Compare learners with respect to to one or multiple metrics.
 7 | #' Metrics can but be but are not limited to fairness metrics.
 8 | #' 
 9 | #' @template pta
10 | #'
11 | #' @param object ([PredictionClassif] | [BenchmarkResult] | [ResampleResult])\cr
12 | #'   The object to create a plot for.
13 | #'   * If provided a ([PredictionClassif]).
14 | #'     Then the visualization will compare the fairness metrics among the binary level from protected field
15 | #'     through bar plots.
16 | #'   * If provided a ([ResampleResult]).
17 | #'     Then the visualization will generate the boxplots for fairness metrics, and compare them among
18 | #'     the binary level from protected field.
19 | #'   * If provided a ([BenchmarkResult]).
20 | #'     Then the visualization will generate the boxplots for fairness metrics, and compare them among
21 | #'     both the binary level from protected field and the models implemented.
22 | #' @param ...
23 | #' The arguments to be passed to methods, such as:
24 | #'   * `fairness_measures` (list of [Measure])\cr
25 | #'     The fairness measures that will evaluated on object, could be single [Measure] or list of [Measure]s.
26 | #'     Default measure set to be `msr("fairness.acc")`.
27 | #'   * `task` ([TaskClassif])\cr
28 | #'     The data task that contains the protected column, only required when object is ([PredictionClassif]).
29 | #'
30 | #' @export
31 | #' @return A 'ggplot2' object.
32 | #' @examples
33 | #' library("mlr3")
34 | #' library("mlr3learners")
35 | #'
36 | #' # Setup the Fairness Measures and tasks
37 | #' task = tsk("adult_train")$filter(1:500)
38 | #' learner = lrn("classif.ranger", predict_type = "prob")
39 | #' learner$train(task)
40 | #' predictions = learner$predict(task)
41 | #' design = benchmark_grid(
42 | #'   tasks = task,
43 | #'   learners = lrns(c("classif.ranger", "classif.rpart"),
44 | #'     predict_type = "prob", predict_sets = c("train", "test")),
45 | #'   resamplings = rsmps("cv", folds = 3)
46 | #' )
47 | #'
48 | #' bmr = benchmark(design)
49 | #' fairness_measure = msr("fairness.tpr")
50 | #' fairness_measures = msrs(c("fairness.tpr", "fairness.fnr", "fairness.acc"))
51 | #'
52 | #' # Predictions
53 | #' compare_metrics(predictions, fairness_measure, task)
54 | #' compare_metrics(predictions, fairness_measures, task)
55 | #'
56 | #' # BenchmarkResult and ResamplingResult
57 | #' compare_metrics(bmr, fairness_measure)
58 | #' compare_metrics(bmr, fairness_measures)
59 | compare_metrics = function(object, ...) {
60 |   UseMethod("compare_metrics")
61 | }
62 | 
63 | #' @export
64 | compare_metrics.PredictionClassif = function(object, measures = msr("fairness.acc"), task, ...) { # nolint
65 |   measures = as_measures(measures)
66 |   scores = setDT(as.data.frame(t(object$score(measures, task = task, ...))))
67 |   data = melt(scores[, ids(measures), with = FALSE], measure.vars = names(scores))
68 |   ggplot(data, aes(x = variable, y = value)) +
69 |     geom_bar(stat = "identity") +
70 |     xlab("Metrics") +
71 |     ylab("Value") +
72 |     theme(legend.position = "none") +
73 |     scale_fill_hue(c = 100, l = 60)
74 | }
75 | 
76 | #' @export
77 | compare_metrics.BenchmarkResult = function(object, measures = msr("fairness.acc"), ...) { # nolint
78 |   measures = as_measures(measures)
79 |   scores = object$aggregate(measures, ...)
80 |   data = melt(scores[, c(ids(measures), "learner_id", "task_id"), with = FALSE], id.vars = c("learner_id", "task_id"))
81 |   ggplot(data, aes(x = learner_id, y = value, fill = variable)) +
82 |     geom_bar(stat = "identity", position = "dodge") +
83 |     xlab("Metrics") +
84 |     ylab("Value") +
85 |     scale_fill_hue(name = "Metric", c = 100, l = 60) +
86 |     facet_wrap(~task_id)
87 | }
88 | 
89 | #' @export
90 | compare_metrics.ResampleResult = function(object, measures = msr("fairness.acc"), ...) { # nolint
91 |   object = as_benchmark_result(object)
92 |   compare_metrics(object, measures)
93 | }
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/R/compute_metrics.R:
--------------------------------------------------------------------------------
 1 | #' Compute metrics for non-mlr3 predictions.
 2 | #' 
 3 | #' @description 
 4 | #' Allows computing metrics for predictions that do not stem from mlr3, and were 
 5 | #' e.g. being made by models outside of mlr3.
 6 | #' Currently only `classif` and `regr` - style predictions are supported.
 7 | #' 
 8 | #' @template pta
 9 | #' 
10 | #' @param data (`data.table`) \cr The dataset used for predicting.
11 | #' @param target (`character`) \cr The name of the target variable. Must be available in `data`.
12 | #' @param protected_attribute (`character`) \cr The name(s) of the protected attributes(s). Must be available in `data`.
13 | #' @param prediction (`vector`) \cr A vector containing predictions. 
14 | #' @param metrics (`Metric`|`list`) \cr (List of) mlr3 metrics to apply.
15 | #' @export
16 | #' @examples
17 | #' library("mlr3")
18 | #' # Get adult data as a data.table
19 | #' train = tsk("adult_train")$data()
20 | #' mod = rpart::rpart(target ~ ., train)
21 | #' 
22 | #' # Predict on test data
23 | #' test = tsk("adult_test")$data()
24 | #' yhat = predict(mod, test, type = "vector")
25 | #' 
26 | #' # Convert to a factor with the same levels
27 | #' yhat = as.factor(yhat)
28 | #' levels(yhat) = levels(test$target)
29 | #' 
30 | #' compute_metrics(
31 | #'   data = test, 
32 | #'   target = "target",
33 | #'   prediction = yhat,
34 | #'   protected_attribute = "sex",
35 | #'   metrics = msr("fairness.acc")
36 | #' )
37 | compute_metrics = function(data, target, protected_attribute, prediction, metrics = NULL) {
38 |   assert_data_frame(data)
39 |   assert_choice(target, colnames(data))
40 |   assert_vector(prediction)
41 |   assert_choice(protected_attribute, colnames(data))
42 | 
43 |   if (inherits(data[[target]], "factor")) {
44 |     t = as_task_classif(data, target = target)
45 |   } else if (class(data[[target]]) %in% c("integer", "numeric")) {
46 |     t = as_task_regr(data, target = target)
47 |   } else {
48 |     stop("compute_metrics currently only handles classif (factor) or regr (integer|numeric) targets!")
49 |   }
50 |   t$col_roles$pta = protected_attribute
51 |   
52 |   df = data.table(
53 |     "row_ids"  = t$row_ids,
54 |     "truth" = t$truth(),
55 |     "response" = prediction
56 |   )
57 | 
58 |   if (inherits(data[[target]], "factor")) {
59 |     assert_factor(prediction, levels = t$levels(target)[[1]])
60 |     prd = as_prediction_classif(df)
61 |   } else if (class(data[[target]]) %in% c("integer", "numeric"))  {
62 |     prd = as_prediction_regr(df)
63 |   }
64 | 
65 |   prd$score(metrics, task = t)
66 | }


--------------------------------------------------------------------------------
/R/fairness_accuracy_tradeoff.R:
--------------------------------------------------------------------------------
  1 | #' Plot Fairness Accuracy Trade-offs
  2 | #'
  3 | #' @description
  4 | #' Provides visualization wrt. trade-offs between fairness and accuracy metrics across learners and
  5 | #' resampling iterations.
  6 | #' This can assist in gauging the optimal model from a set of options along with estimates of variance
  7 | #' (through individual resampling iterations).
  8 | #'
  9 | #' @template pta
 10 | #'
 11 | #' @param object ([PredictionClassif] | [BenchmarkResult] | [ResampleResult])\cr
 12 | #'   The binary class prediction object that will be evaluated.
 13 | #'   * If provided a [PredictionClassif].
 14 | #'     Then only one point will indicate the accuracy and fairness metrics for the current predictions.
 15 | #'     Requires also passing a [Task].
 16 | #'   * If provided a [ResampleResult].
 17 | #'     Then the plot will compare the accuracy and fairness metrics for the same model,
 18 | #'     but different resampling iterations  as well as the aggregate indicated by a cross.
 19 | #'   * If provided a [BenchmarkResult].
 20 | #'     Then the plot will compare the accuracy and fairness metrics for all models and all resampling iterations.
 21 | #'     Points are colored according to the learner_id and faceted by task_id.
 22 | #'     The aggregated score is indicated by a cross.
 23 | #'
 24 | #' @param ...
 25 | #'   Arguments to be passed to methods. Such as:
 26 | #'   * `fairness_measure` ([Measure])\cr
 27 | #'     The fairness measures that will evaluated.
 28 | #'     Default measure set to be `msr("fairness.fpr")`
 29 | #'   * `accuracy_measure` ([Measure])\cr
 30 | #'     The accuracy measure that will evaluated.
 31 | #'     Default measure set to be [msr("classif.acc")][mlr3::MeasureClassif].
 32 | #'   * `task` ([TaskClassif])\cr
 33 | #'     The data task that contains the protected column, only required when the class of object is ([PredictionClassif])
 34 | #'
 35 | #' @export
 36 | #' @return A 'ggplot2' object.
 37 | #' @examples
 38 | #' library("mlr3")
 39 | #' library("mlr3learners")
 40 | #' library("ggplot2")
 41 | #'
 42 | #' # Setup the Fairness measure and tasks
 43 | #' task = tsk("adult_train")$filter(1:500)
 44 | #' learner = lrn("classif.ranger", predict_type = "prob")
 45 | #' fairness_measure = msr("fairness.tpr")
 46 | #' 
 47 | #' # Example 1 - A single prediction
 48 | #' learner$train(task)
 49 | #' predictions = learner$predict(task)
 50 | #' fairness_accuracy_tradeoff(predictions, fairness_measure, task = task)
 51 | #' 
 52 | #' # Example2 - A benchmark
 53 | #' design = benchmark_grid(
 54 | #'   tasks = task,
 55 | #'   learners = lrns(c("classif.featureless", "classif.rpart"),
 56 | #'     predict_type = "prob", predict_sets = c("train", "test")),
 57 | #'   resamplings = rsmps("cv", folds = 2)
 58 | #' )
 59 | #' bmr = benchmark(design)
 60 | #' fairness_accuracy_tradeoff(bmr, fairness_measure)
 61 | fairness_accuracy_tradeoff = function(object, ...) {
 62 |   UseMethod("fairness_accuracy_tradeoff")
 63 | }
 64 | 
 65 | #' @export
 66 | fairness_accuracy_tradeoff.PredictionClassif = function(object, fairness_measure = msr("fairness.fpr"), acc_measure = msr("classif.acc"), task, ...) { # nolint
 67 |   assert_measure(fairness_measure)
 68 |   assert_measure(acc_measure)
 69 |   data = as.data.frame(t(object$score(list(acc_measure, fairness_measure), task = task)))
 70 | 
 71 |   ggplot(data, aes(x = .data[[acc_measure$id]], y = .data[[fairness_measure$id]])) +
 72 |     geom_point()
 73 | }
 74 | 
 75 | #' @export
 76 | fairness_accuracy_tradeoff.BenchmarkResult = function(object, fairness_measure = msr("fairness.fpr"), acc_measure = msr("classif.acc"), plot_scores = TRUE, ...) { # nolint
 77 |   assert_measure(fairness_measure)
 78 |   assert_measure(acc_measure)
 79 |   assert_flag(plot_scores)
 80 |   data = object$aggregate(list(acc_measure, fairness_measure))
 81 |   data = insert_named(data, list(aggi = 1, agg = "mean"))
 82 | 
 83 |   if (plot_scores) {
 84 |     tmp = object$score(list(acc_measure, fairness_measure))[, "aggi" := 0][, agg := "replication"]
 85 |     data = rbind(data, insert_named(tmp, list("aggi" = 0, agg = "replication")), fill = TRUE)
 86 |   }
 87 | 
 88 | 
 89 |   ggplot(data,
 90 |     aes(x = .data[[acc_measure$id]], y = .data[[fairness_measure$id]], colour = .data[["learner_id"]], size = .data[["aggi"]], alpha = .data[["aggi"]], pch = .data[["agg"]])) +
 91 |     geom_point() +
 92 |     scale_alpha(range = c(0.5, 1)) +
 93 |     scale_size(range = c(3, 6)) +
 94 |     scale_shape_manual(name = "Aggregation", values = c(4, 16)) +
 95 |     guides(size = "none", alpha = "none") +
 96 |     facet_wrap(~task_id)
 97 | }
 98 | 
 99 | #' @export
100 | fairness_accuracy_tradeoff.ResampleResult = function(object, fairness_measure = msr("fairness.fpr"), acc_measure = msr("classif.acc"), ...) { # nolint
101 |   object = as_benchmark_result(object)
102 |   fairness_accuracy_tradeoff(object, fairness_measure, acc_measure)
103 | }
104 | 


--------------------------------------------------------------------------------
/R/fairness_tensor.R:
--------------------------------------------------------------------------------
 1 | #' @title Compute the Fairness Tensor given a Prediction and a Task
 2 | #'
 3 | #' @description
 4 | #' A fairness tensor is a list of groupwise confusion matrices.
 5 | #' 
 6 | #' @template pta
 7 | #'
 8 | #' @param object ([data.table()] | [PredictionClassif] | [ResampleResult])\cr
 9 | #'   A data.table with columns `truth` and `prediction`,
10 | #'   a [PredictionClassif] or a [ResampleResult].
11 | #' @param normalize (`character`)\cr
12 | #'   How should the fairness tensor be normalized? 
13 | #'   "all" normalizes entries by dividing by dataset size,
14 | #'   "group" normalizes entries by dividing by group size and
15 | #'   "none" does not conduct any normalization at all.
16 | #' @param ... `any`\cr
17 | #'   Currently not used.
18 | #' @return
19 | #'   `list()` of confusion matrix for every group in `"pta"`.
20 | #' @export
21 | #' @examples
22 | #' library("mlr3")
23 | #' task = tsk("compas")
24 | #' prediction = lrn("classif.rpart")$train(task)$predict(task)
25 | #' fairness_tensor(prediction, task = task)
26 | fairness_tensor = function(object, normalize = "all", ...) {
27 |   UseMethod("fairness_tensor")
28 | }
29 | 
30 | #' @rdname fairness_tensor
31 | #' @param task ([TaskClassif])\cr
32 | #'   A [TaskClassif]. Needs `col_role` `"pta"` to be set.
33 | #' @export
34 | fairness_tensor.data.table = function(object, normalize = "all", task, ...) { # nolint
35 |   assert_names(colnames(object), must.include = c("truth", "prediction"))
36 | 
37 |   dt = data.table(
38 |     row_ids = object$row_ids %??% seq_len(nrow(object)),
39 |     truth = object$truth,
40 |     response = object$prediction
41 |   )
42 |   prd = as_prediction_classif(dt[, c("row_ids", "truth", "response")])
43 |   fairness_tensor(prd, task = task)
44 | }
45 | 
46 | #' @rdname fairness_tensor
47 | #' @export
48 | fairness_tensor.PredictionClassif = function(object, normalize = "all", task, ...) { # nolint
49 |   assert_pta_task(task)
50 |   assert_choice(normalize, c("all", "group", "none"))
51 |   get_confusion = function(row_ids) {
52 |     object$clone()$filter(row_ids)$confusion
53 |   }
54 | 
55 |   cols = c(task$backend$primary_key, task$col_roles$pta)
56 |   data = task$data(cols = cols)
57 |   tensors = map(split(data, by = cols[2L], keep.by = FALSE), function(x) get_confusion(x[[1L]]))
58 |   if (normalize == "all") {
59 |     map(tensors, function(x) {x / nrow(data)})
60 |   } else if (normalize == "group") {
61 |     map(tensors, function(x) x / sum(x))
62 |   } else if (normalize == "none") {
63 |     return(tensors)
64 |   }
65 | }
66 | 
67 | 
68 | #' @rdname fairness_tensor
69 | #' @export
70 | fairness_tensor.ResampleResult = function(object, normalize = "all", ...) { # nolint
71 |   fairness_tensor(object$prediction(), task = object$task, ...)
72 | }
73 | 


--------------------------------------------------------------------------------
/R/learner_fairml_classif_fairfgrrm.R:
--------------------------------------------------------------------------------
 1 | #' @title Classification Fair Generalized Ridge Regression Learner
 2 | #' @author pfistfl
 3 | #' @name mlr_learners_classif.fairfgrrm
 4 | #' 
 5 | #' @details 
 6 | #' Fair generalized ridge regression model implemented via package `fairml`.
 7 | #' The 'unfairness' parameter is set to 0.05 as a default.
 8 | #'
 9 | #' @template class_learner
10 | #' @templateVar id classif.fairfgrrm
11 | #' @templateVar caller fgrrm
12 | #'
13 | #' @references
14 | #' `r format_bib("scutari21")`
15 | #'
16 | #' @template seealso_learner
17 | #' @template example
18 | #' @export
19 | LearnerClassifFairfgrrm = R6Class("LearnerClassifFairfgrrm",
20 |   inherit = LearnerClassif,
21 | 
22 |   public = list(
23 |     #' @description
24 |     #' Creates a new instance of this [R6][R6::R6Class] class.
25 |     initialize = function() {
26 |       ps = ps(
27 |         lambda = p_dbl(lower = 0, upper = Inf, tags = "train", default = 0),
28 |         definition = p_fct(levels = c("sp-komiyama", "eo-komiyama"), default = "sp-komiyama", tags = "train"),
29 |         save.auxiliary = p_lgl(default = FALSE, tags = "train"),
30 |         unfairness = p_dbl(lower = 0, upper = 1, tags = "train"),
31 |         family = p_fct(levels = c("gaussian", "binomial"), tags = "train", default = "binomial"),
32 |         intersect = p_lgl(default = TRUE, tags = c("train", "predict"))
33 |       )
34 |       ps$values = list(unfairness = 0.05, intersect = FALSE)
35 | 
36 |       super$initialize(
37 |         id = "classif.fairfgrrm",
38 |         packages = "fairml",
39 |         feature_types = c("integer", "numeric", "factor", "ordered"),
40 |         predict_types = c("response", "prob"),
41 |         properties = "twoclass",
42 |         param_set = ps,
43 |         man = "mlr3fairness::mlr_learners_classif.fairfgrrm"
44 |       )
45 |     }
46 |   ),
47 | 
48 |   private = list(
49 | 
50 |     .train = function(task) {
51 |       assert_pta_task(task)
52 |       # get parameters for training
53 |       pars = self$param_set$get_values(tags = "train")
54 | 
55 |       # set column names to ensure consistency in fit and predict
56 |       self$state$feature_names = task$feature_names
57 |       pta = task$col_roles$pta
58 |       r = task$truth()
59 | 
60 |       s = get_pta(task, intersect = pars$intersect)
61 |       s = map_dtc(s, as.numeric)
62 |       pars = remove_named(pars, "intersect")
63 |       p = task$data(cols = setdiff(task$feature_names, pta))
64 |       p = int_to_numeric(p)
65 | 
66 |       mlr3misc::invoke(fairml::fgrrm, response = r, predictors = p, sensitive = s, .args = pars)
67 |     },
68 | 
69 |     .predict = function(task) {
70 |       # get parameters with tag "predict"
71 |       pars = self$param_set$get_values(tags = "predict")
72 |       pta = task$col_roles$pta
73 |       s = get_pta(task, intersect = pars$intersect)
74 |       s = map_dtc(s, as.numeric)
75 |       p = task$data(cols = setdiff(self$state$feature_names, pta))
76 |       p = int_to_numeric(p)
77 | 
78 |       if (self$predict_type == "response") {
79 |         pred = mlr3misc::invoke(predict, self$model, new.predictors = p, new.sensitive = s, type = "class")
80 |         list(response = drop(pred))
81 |       } else {
82 |         prob = mlr3misc::invoke(predict, self$model, new.predictors = p, new.sensitive = s, type = "response")
83 |         if (length(task$class_names) == 2L) {
84 |           prob = pprob_to_matrix(prob, task)
85 |         } else {
86 |           prob = prob[, , 1L]
87 |         }
88 |         list(prob = prob)
89 |       }
90 |     }
91 |   )
92 | )
93 | 


--------------------------------------------------------------------------------
/R/learner_fairml_classif_fairzlrm.R:
--------------------------------------------------------------------------------
 1 | #' @title Classification Fair Logistic Regression With Covariance Constraints Learner
 2 | #' @author pfistfl
 3 | #' @details
 4 | #' Generalized fair regression model from Zafar et al., 2019 implemented via package `fairml`.
 5 | #' The 'unfairness' parameter is set to 0.05 as a default.
 6 | #' The optimized fairness metric is statistical parity.
 7 | #'
 8 | #' @name mlr_learners_classif.fairzlrm
 9 | #'
10 | #' @template class_learner
11 | #' @templateVar id classif.fairzlrm
12 | #' @templateVar caller zlrm
13 | #'
14 | #' @references
15 | #' `r format_bib("zafar19a")`
16 | #'
17 | #' @template seealso_learner
18 | #' @template example
19 | #' @export
20 | LearnerClassifFairzlrm = R6Class("LearnerClassifFairzlrm",
21 |   inherit = LearnerClassif,
22 | 
23 |   public = list(
24 |     #' @description
25 |     #' Creates a new instance of this [R6][R6::R6Class] class.
26 |     initialize = function() {
27 |       ps = ps(
28 |         unfairness = p_dbl(lower = 0, upper = 1, tags = "train"),
29 |         intersect = p_lgl(default = TRUE, tags = c("train", "predict"))
30 |       )
31 |       ps$values = list(unfairness = 0.05, intersect = FALSE)
32 |       super$initialize(
33 |         id = "classif.fairzlrm",
34 |         packages = c("fairml", "CVXR"),
35 |         feature_types = c("integer", "numeric", "factor", "ordered"),
36 |         predict_types = c("response", "prob"),
37 |         properties = "twoclass",
38 |         param_set = ps,
39 |         man = "mlr3fairness::mlr_learners_classif.fairzlrm"
40 |       )
41 |     }
42 |   ),
43 | 
44 |   private = list(
45 | 
46 |     .train = function(task) {
47 |       assert_pta_task(task)
48 |       # get parameters for training
49 |       pars = self$param_set$get_values(tags = "train")
50 | 
51 |       # set column names to ensure consistency in fit and predict
52 |       self$state$feature_names = task$feature_names
53 |       pta = task$col_roles$pta
54 |       r = task$truth()
55 |       s = get_pta(task, intersect = pars$intersect)
56 |       pars = remove_named(pars, "intersect")
57 |       p = task$data(cols = setdiff(task$feature_names, pta))
58 |       p = int_to_numeric(p)
59 |       # use the mlr3misc::invoke function (it's similar to do.call())
60 |       mlr3misc::invoke(fairml::zlrm, response = r, sensitive = s,
61 |         predictors = p, .args = pars)
62 |     },
63 | 
64 |     .predict = function(task) {
65 |       pta = task$col_roles$pta
66 |       pars = self$param_set$get_values(tags = "predict")
67 |       s = get_pta(task, intersect = pars$intersect)
68 |       p = task$data(cols = setdiff(self$state$feature_names, pta))
69 |       p = int_to_numeric(p)
70 |       if (self$predict_type == "response") {
71 |         pred = mlr3misc::invoke(predict, self$model, new.predictors = p, type = "class")
72 |         list(response = drop(pred))
73 |       } else {
74 |         prob = mlr3misc::invoke(predict, self$model, new.predictors = p, type = "response")
75 |         if (length(task$class_names) == 2L) {
76 |           prob = pprob_to_matrix(prob, task)
77 |         } else {
78 |           prob = prob[, , 1L]
79 |         }
80 |         list(prob = prob)
81 |       }
82 |     }
83 |   )
84 | )
85 | 
86 | 


--------------------------------------------------------------------------------
/R/learner_fairml_regr_fairfrrm.R:
--------------------------------------------------------------------------------
 1 | #' @title Regression Fair Ridge Regression Learner
 2 | #' 
 3 | #' @details 
 4 | #' Fair ridge regression learner implemented via package `fairml`.
 5 | #' The 'unfairness' parameter has been initialized to 0.05.
 6 | #' @template intersect
 7 | #'
 8 | #' @author pfistfl
 9 | #' @name mlr_learners_regr.fairfrrm
10 | #'
11 | #' @template class_learner
12 | #' @templateVar id regr.fairfrrm
13 | #' @templateVar caller frrm
14 | #'
15 | #' @references
16 | #' `r format_bib("scutari21")`
17 | #'
18 | #' @template seealso_learner
19 | #' @template example
20 | #' @export
21 | LearnerRegrFairfrrm = R6Class("LearnerRegrFairfrrm",
22 |   inherit = LearnerRegr,
23 | 
24 |   public = list(
25 |     #' @description
26 |     #' Creates a new instance of this [R6][R6::R6Class] class.
27 |     initialize = function() {
28 |       ps = ps(
29 |         lambda = p_dbl(lower = 0, upper = Inf, tags = "train", default = 0),
30 |         definition = p_fct(levels = c("sp-komiyama", "eo-komiyama"), default = "sp-komiyama", tags = "train"),
31 |         save.auxiliary = p_lgl(default = FALSE, tags = "train"),
32 |         unfairness = p_dbl(lower = 0, upper = 1, tags = "train")
33 |       )
34 |       ps$values = list(unfairness = .05)
35 |       super$initialize(
36 |         id = "regr.fairfrrm",
37 |         packages = "fairml",
38 |         feature_types = c("integer", "numeric", "factor", "ordered"),
39 |         predict_types = c("response"),
40 |         param_set = ps,
41 |         man = "mlr3fairness::mlr_learners_regr.fairfrrm"
42 |       )
43 |     }
44 |   ),
45 | 
46 |   private = list(
47 | 
48 |     .train = function(task) {
49 |       assert_pta_task(task)
50 |       # get parameters for training
51 |       pars = self$param_set$get_values(tags = "train")
52 | 
53 |       # set column names to ensure consistency in fit and predict
54 |       self$state$feature_names = task$feature_names
55 |       pta = task$col_roles$pta
56 |       r = as.numeric(task$truth())
57 |       s = get_pta(task, intersect = FALSE)
58 |       p = task$data(cols = setdiff(task$feature_names, pta))
59 |       p = int_to_numeric(p)
60 |       mlr3misc::invoke(fairml::frrm, response = r, predictors = p, sensitive = s, .args = pars)
61 |     },
62 | 
63 |     .predict = function(task) {
64 |       # get parameters with tag "predict"
65 |       pars = self$param_set$get_values(tags = "predict")
66 |       pta = task$col_roles$pta
67 |       s = get_pta(task, intersect = FALSE)
68 |       p = task$data(cols = setdiff(self$state$feature_names, pta))
69 |       p = int_to_numeric(p)
70 |       pred = mlr3misc::invoke(predict, self$model, new.predictors = p, new.sensitive = s, .args = pars)
71 |       list(response = pred)
72 |     }
73 |   )
74 | )
75 | 


--------------------------------------------------------------------------------
/R/learner_fairml_regr_fairnclm.R:
--------------------------------------------------------------------------------
 1 | #' @title Regression Non-convex Fair Regression Learner
 2 | #' @author pfistfl
 3 | #' 
 4 | #' @details 
 5 | #' Fair regression model based on nonconvex optimization from Komiyama et al. (2018).
 6 | #' Implemented via package `fairml`.
 7 | #' The 'unfairness' parameter is set to 0.05 as a default.
 8 | #' 
 9 | #' @name mlr_learners_regr.fairnclm
10 | #' @template class_learner
11 | #' @templateVar id regr.fairnclm
12 | #' @templateVar caller nclm
13 | #' @references
14 | #' `r format_bib("komiyama")`
15 | #'
16 | #' @template seealso_learner
17 | #' @template example
18 | #' @export
19 | LearnerRegrFairnclm = R6Class("LearnerRegrFairnclm",
20 |   inherit = LearnerRegr,
21 | 
22 |   public = list(
23 |     #' @description
24 |     #' Creates a new instance of this [R6][R6::R6Class] class.
25 |     initialize = function() {
26 |       ps = ps(
27 |         lambda = p_dbl(lower = 0, upper = Inf, tags = "train", default = 0),
28 |         save.auxiliary = p_lgl(default = FALSE, tags = "train"),
29 |         covfun = p_uty(tags = "train", default = "stats::cov"),
30 |         unfairness = p_dbl(lower = 0, upper = 1, tags = "train")
31 |       )
32 |       ps$values = list(unfairness = 0.05)
33 | 
34 |       super$initialize(
35 |         id = "regr.fairnclm",
36 |         packages = "fairml",
37 |         feature_types = c("integer", "numeric", "factor", "ordered"),
38 |         predict_types = c("response"),
39 |         param_set = ps,
40 |         man = "mlr3fairness::mlr_learners_regr.fairnclm"
41 |       )
42 |     }
43 |   ),
44 | 
45 |   private = list(
46 | 
47 |     .train = function(task) {
48 |       assert_pta_task(task)
49 |       # get parameters for training
50 |       pars = self$param_set$get_values(tags = "train")
51 | 
52 |       # set column names to ensure consistency in fit and predict
53 |       self$state$feature_names = task$feature_names
54 |       pta = task$col_roles$pta
55 |       r = as.numeric(task$truth())
56 |       s = get_pta(task, intersect = FALSE)
57 | 
58 |       pars = remove_named(pars, "intersect")
59 |       p = task$data(cols = setdiff(task$feature_names, pta))
60 |       p = int_to_numeric(p)
61 |       mlr3misc::invoke(fairml::nclm, response = r, predictors = p, sensitive = s, .args = pars)
62 |     },
63 | 
64 |     .predict = function(task) {
65 |       # get parameters with tag "predict"
66 |       pars = self$param_set$get_values(tags = "predict")
67 | 
68 |       pta = task$col_roles$pta
69 |       s = get_pta(task, intersect = FALSE)
70 |       p = task$data(cols = setdiff(self$state$feature_names, pta))
71 |       ints = colnames(keep(p, is.integer))
72 |       p = int_to_numeric(p)
73 |       pred = mlr3misc::invoke(predict, self$model, new.predictors = p, new.sensitive = s, .args = pars)
74 |       list(response = pred)
75 |     }
76 |   )
77 | )
78 | 


--------------------------------------------------------------------------------
/R/learner_fairml_regr_fairzlm.R:
--------------------------------------------------------------------------------
 1 | #' @title Regression Fair Regression With Covariance Constraints Learner
 2 | #' @author pfistfl
 3 | #' @details
 4 | #' Fair regression model from Zafar et al., 2019 implemented via package `fairml`.
 5 | #' The 'unfairness' parameter is set to 0.05 as a default.
 6 | #' The optimized fairness metric is statistical parity.
 7 | #'
 8 | #' @name mlr_learners_regr.fairzlm
 9 | #'
10 | #' @template class_learner
11 | #' @templateVar id regr.fairzlm
12 | #' @templateVar caller zlm
13 | #'
14 | #' @references
15 | #' `r format_bib("zafar19a")`
16 | #'
17 | #' @template seealso_learner
18 | #' @template example
19 | #' @export
20 | LearnerRegrFairzlm = R6Class("LearnerRegrFairzlm",
21 |   inherit = LearnerRegr,
22 | 
23 |   public = list(
24 |     #' @description
25 |     #' Creates a new instance of this [R6][R6::R6Class] class.
26 |     initialize = function() {
27 |       ps = ps(
28 |         unfairness = p_dbl(lower = 0, upper = 1, tags = "train"),
29 |         intersect = p_lgl(default = TRUE, tags = c("train", "predict"))
30 |       )
31 |       ps$values = list(unfairness = 0.05, intersect = FALSE)
32 |       super$initialize(
33 |         id = "regr.fairzlm",
34 |         packages = c("fairml", "CVXR"),
35 |         feature_types = c("integer", "numeric", "factor", "ordered"),
36 |         predict_types = c("response"),
37 |         param_set = ps,
38 |         man = "mlr3fairness::mlr_learners_regr.fairzlm"
39 |       )
40 |     }
41 |   ),
42 | 
43 |   private = list(
44 | 
45 |     .train = function(task) {
46 |       assert_pta_task(task)
47 |       # get parameters for training
48 |       pars = self$param_set$get_values(tags = "train")
49 | 
50 |       # set column names to ensure consistency in fit and predict
51 |       self$state$feature_names = task$feature_names
52 | 
53 |       pta = task$col_roles$pta
54 |       r = as.numeric(task$truth())
55 |       s = get_pta(task, intersect = pars$intersect)
56 |       pars = remove_named(pars, "intersect")
57 |       p = task$data(cols = setdiff(task$feature_names, pta))
58 |       p = int_to_numeric(p)
59 |       # use the mlr3misc::invoke function (it's similar to do.call())
60 |       mlr3misc::invoke(fairml::zlm, response = r, sensitive = s, predictors = p, .args = pars)
61 |     },
62 | 
63 |     .predict = function(task) {
64 |       # get parameters with tag "predict"
65 |       pars = self$param_set$get_values(tags = "predict")
66 |       pta = task$col_roles$pta
67 |       s = get_pta(task, intersect = pars$intersect)
68 |       pars = remove_named(pars, "intersect")
69 |       p = task$data(cols = setdiff(self$state$feature_names, pta))
70 |       p = int_to_numeric(p)
71 |       pred = mlr3misc::invoke(predict, self$model, new.predictors = p, .args = pars)
72 |       list(response = pred)
73 |     }
74 |   )
75 | )
76 | 


--------------------------------------------------------------------------------
/R/measure_operations.R:
--------------------------------------------------------------------------------
 1 | #' @title Groupwise Operations
 2 | #'
 3 | #' @description
 4 | #' `groupdiff_tau()` computes \eqn{min(x/y, y/x)}, i.e. the smallest symmetric ratio between \eqn{x} and eqn{y}
 5 | #' that is smaller than 1. If \eqn{x} is a vector, the symmetric ratio between all
 6 | #' elements in \eqn{x} is computed.
 7 | #'
 8 | #' `groupdiff_absdiff()` computes \eqn{max(abs(x-y, y-x))}, i.e. the smallest absolute difference
 9 | #' between \eqn{x} and \eqn{y}.
10 | #' If \eqn{x} is a vector, the symmetric absolute difference between all elements in \eqn{x} is computed.
11 | #'
12 | #' @template pta
13 | #'
14 | #' @param x (`numeric()`)\cr
15 | #'   Measured performance in group 1, 2, ...
16 | #' @export
17 | #' @return A single `numeric`.
18 | #' @examples
19 | #' groupdiff_tau(1:3)
20 | #' groupdiff_diff(1:3)
21 | #' groupdiff_absdiff(1:3)
22 | groupdiff_tau = function(x) {
23 |   assert_numeric(x, min.len = 2L)
24 |   if (anyMissing(x)) {
25 |     return(NA)
26 |   }
27 | 
28 |   if (all(x == 0)) {
29 |     return(0)
30 |   }
31 | 
32 |   mat = outer(x, x, FUN = "/")
33 |   mat = mat[mat <= 1 & !diag(x) & !is.nan(mat)]
34 |   min(mat, na.rm = TRUE)
35 | }
36 | 
37 | #' @export
38 | #' @rdname groupdiff_tau
39 | groupdiff_absdiff = function(x) {
40 |   assert_numeric(x, min.len = 2L)
41 |   if (anyMissing(x)) {
42 |     return(NA)
43 |   }
44 | 
45 |   if (all(x == 0)) {
46 |     return(0)
47 |   }
48 | 
49 |   max(dist(x, method = "manhattan"), na.rm = TRUE)
50 | }
51 | 
52 | #' @export
53 | #' @rdname groupdiff_tau
54 | groupdiff_diff = function(x) {
55 |   assert_numeric(x, min.len = 2L)
56 |   if (anyMissing(x)) {
57 |     return(NA)
58 |   }
59 | 
60 |   if (all(x == 0)) {
61 |     return(0)
62 |   }
63 | 
64 |   # All pairwise differences
65 |   xs = outer(x, x, "-")
66 |   xs = xs[upper.tri(xs)]
67 |   # Get the one with the maximum difference.
68 |   xs[which.max(abs(xs))]
69 | }
70 | 


--------------------------------------------------------------------------------
/R/mlr_learners_fairness.R:
--------------------------------------------------------------------------------
 1 | #' @title Fair Learners in mlr3
 2 | #' @name mlr_learners_fairness
 3 | #'
 4 | #' @section Predefined measures:
 5 | #' \CRANpkg{mlr3fairness} comes with a set of predefined fairn learners listed below:
 6 | #'
 7 | #' `r tabular(mlr_learners_fairness)`
 8 | #' 
 9 | #' @template pta
10 | #' @export
11 | #' @return A data.table containing an overview of available fair learners.
12 | #' @examples
13 | #' library("mlr3")
14 | #' # Available learners:
15 | #' mlr_learners_fairness
16 | mlr_learners_fairness = rowwise_table(
17 |   ~key, ~package, ~reference,
18 |   "regr.fairfrrm", "fairml", "Scutari et al., 2021",
19 |   "classif.fairfgrrm", "fairml", "Scutari et al., 2021",
20 |   "regr.fairzlm", "fairml", "Zafar et al., 2019",
21 |   "classif.fairzlrm", "fairml", "Zafar et al., 2019",
22 |   "regr.fairnclm", "fairml", "Komiyama et al., 2018"
23 | )
24 | 


--------------------------------------------------------------------------------
/R/mlr_measures_fairness.R:
--------------------------------------------------------------------------------
 1 | #' @title Fairness Measures in mlr3
 2 | #' @name mlr_measures_fairness
 3 | #'
 4 | #' @section Predefined measures:
 5 | #' \CRANpkg{mlr3fairness} comes with a set of predefined fairness measures as listed below.
 6 | #' For full flexibility, [MeasureFairness] can be used to construct classical
 7 | #' group fairness measures based on a difference between a performance metrics across groups
 8 | #' by combining a performance measure with an operation for measuring differences.
 9 | #' Furthermore [MeasureSubgroup] can be used to measure performance in a given subgroup, or alternatively
10 | #' groupwise_metrics(measure, task) to instantiate a measure for each subgroup in a [Task].
11 | #'
12 | #' `r tabular(mlr_measures_fairness)`
13 | #'
14 | #' @export
15 | #' @return A data.table containing an overview of available fairness metrics.
16 | #' @examples
17 | #' library("mlr3")
18 | #' # Predefined measures:
19 | #' mlr_measures_fairness$key
20 | mlr_measures_fairness = rowwise_table(
21 |   ~key, ~description,
22 |   "fairness.acc", "Absolute differences in accuracy across groups",
23 |   "fairness.mse", "Absolute differences in mean squared error across groups",
24 | 
25 |   "fairness.fnr", "Absolute differences in false negative rates across groups",
26 |   "fairness.fpr", "Absolute differences in false positive rates across groups",
27 |   "fairness.tnr", "Absolute differences in true negative rates across groups",
28 |   "fairness.tpr", "Absolute differences in true positive rates across groups",
29 | 
30 |   "fairness.npv", "Absolute differences in negative predictive values across groups",
31 |   "fairness.ppv", "Absolute differences in positive predictive values across groups ",
32 |   "fairness.fomr", "Absolute differences in false omission rates across groups ",
33 | 
34 |   "fairness.fp", "Absolute differences in false positives across groups",
35 |   "fairness.tp", "Absolute differences in true positives across groups",
36 |   "fairness.tn", "Absolute differences in true negatives across groups",
37 |   "fairness.fn", "Absolute differences in false negatives across groups",
38 | 
39 |   "fairness.cv", "Difference in positive class prediction, also known as Calders-Wevers gap or demographic parity",
40 |   "fairness.eod", "Equalized Odds: Mean of absolute differences between true positive and false positive rates across groups",
41 |   "fairness.pp", "Predictive Parity: Mean of absolute differences between ppv and npv across groups",
42 | 
43 |   "fairness.acc_eod=.05", "Accuracy under equalized odds < 0.05 constraint",
44 |   "fairness.acc_ppv=.05", "Accuracy under ppv difference < 0.05 constraint"
45 | )
46 | 


--------------------------------------------------------------------------------
/R/reports.R:
--------------------------------------------------------------------------------
 1 | #' Create a Datasheet for Documenting a Dataset
 2 | #'
 3 | #' Creates a new \CRANpkg{rmarkdown} template with a skeleton questionnaire for dataset documentation.
 4 | #' Uses the awesome markdown template created by Chris Garbin
 5 | #' \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
 6 | #'
 7 | #' @param filename (`character(1)`)\cr
 8 | #'   File path or name for new file that should be created.
 9 | #' @param edit (`logical(1)`)\cr
10 | #'   `TRUE` to edit the template immediately.
11 | #' @param build (`logical(1)`)\cr
12 | #'   Should the report be built after creation? Initialized to `FALSE`.
13 | #' @references
14 | #' `r format_bib("datasheets")`
15 | #' @family fairness_reports
16 | #' @export
17 | #' @return Invisibly returns the path to the newly created file(s).
18 | #' @examples
19 | #'   report_file = tempfile()
20 | #'   report_datasheet(report_file)
21 | report_datasheet = function(filename = "datasheet.Rmd", edit = FALSE, build = FALSE) {
22 |   assert_path_for_output(filename)
23 |   assert_flag(edit)
24 |   assert_flag(build)
25 |   fp = rmarkdown::draft(filename, template = "datasheets", package = "mlr3fairness", create_dir = TRUE, edit = edit)
26 |   if (build) rmarkdown::render(fp)
27 |   invisible(fp)
28 | }
29 | 
30 | #' Create a Modelcard
31 | #'
32 | #' Creates a new \CRANpkg{rmarkdown} template with a skeleton questionnaire for a model card.
33 | #' Uses the awesome markdown template created by Chris Garbin
34 | #' \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
35 | #'
36 | #' @inheritParams report_datasheet
37 | #' @references
38 | #' `r format_bib("modelcards")`
39 | #' @family fairness_reports
40 | #' @export
41 | #' @return Invisibly returns the path to the newly created file(s).
42 | #' @examples
43 | #'   report_file = tempfile()
44 | #'   report_modelcard(report_file)
45 | report_modelcard = function(filename = "modelcard.Rmd", edit = FALSE, build = FALSE) {
46 |   assert_path_for_output(filename)
47 |   assert_flag(edit)
48 |   assert_flag(build)
49 |   fp = rmarkdown::draft(filename, template = "modelcards", package = "mlr3fairness", create_dir = TRUE, edit = edit)
50 |   if (build) rmarkdown::render(fp)
51 |   invisible(fp)
52 | }
53 | 
54 | #' Create a Fairness Report
55 | #'
56 | #' Creates a new \CRANpkg{rmarkdown} template with a skeleton of  reported metrics and visualizations.
57 | #' Uses the awesome markdown template created by Chris Garbin
58 | #' \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
59 | #'
60 | #' @inheritParams report_datasheet
61 | #' @param objects (`list()`)\cr
62 | #'   A named list of objects required for the fairness report.
63 | #'   Objects are saved as `<name>.rds` in the new folder created for the report.
64 | #'   * `task` :: The [`Task`] a report should be created for.
65 | #'   * `resample_result` ::  A [mlr3::ResampleResult] result to be analyzed.
66 | #'   * `...` :: any other objects passed on for the report.
67 | #' @param check_objects (`logical(1)`)\cr
68 | #'   Should items in `objects` be checked? If `FALSE`, no checks on `object` are performed.
69 | #' @family fairness_reports
70 | #' @export
71 | #' @return Invisibly returns the path to the newly created file(s).
72 | #' @examples
73 | #'   library("mlr3")
74 | #'   report_file = tempfile()
75 | #'   task = tsk("compas")
76 | #'   learner = lrn("classif.rpart", predict_type = "prob")
77 | #'   rr = resample(task, learner, rsmp("cv", folds = 3L))
78 | #'   report_fairness(report_file, list(task = task, resample_result = rr))
79 | report_fairness = function(filename = "fairness_report.Rmd", objects, edit = FALSE, check_objects = FALSE, build = FALSE) {
80 |   assert_path_for_output(filename)
81 |   assert_list(objects, names = "unique")
82 |   assert_flag(edit)
83 |   assert_flag(check_objects)
84 |   assert_flag(build)
85 |   if (check_objects) {
86 |     assert_subset(c("resample_result", "task"), names(objects))
87 |     assert_resample_result(objects$resample_result)
88 |     assert_task(objects$task)
89 |   }
90 | 
91 |   filepath = rmarkdown::draft(filename, template = "fairness_report", package = "mlr3fairness", create_dir = TRUE, edit = edit)
92 |   write_files(objects, dirname(filepath))
93 |   if (build) rmarkdown::render(filepath)
94 |   invisible(filepath)
95 | }
96 | 


--------------------------------------------------------------------------------
/attic/PipeOpEOd.R:
--------------------------------------------------------------------------------
  1 | if (FALSE) {
  2 |   library(mlr3fairness)
  3 |   t = tsk("compas")
  4 |   l = po("learner_cv", lrn("classif.rpart", cp = 0.001, maxdepth = 8))
  5 |   ot = l$train(list(t))[[1]]
  6 |   pta = ot$col_roles$pta
  7 |   tgt = ot$col_roles$target
  8 |   prd = ot$col_roles$feature
  9 |   pos = ot$positive
 10 |   prv = ot$levels(pta)[[pta]][1]
 11 |   dt = ot$data(cols = c(pta, tgt, prd))
 12 |   dt[, colnames(dt) := map(.SD, as.factor), .SDcols = colnames(dt)]
 13 |   table(dt$two_year_recid, dt$classif.rpart.response, dt$sex)
 14 |   br = dt[, .N, by = pta]
 15 |   sbr = br[get(pta) == prv][["N"]]
 16 |   obr = br[get(pta) != prv][["N"]]
 17 | 
 18 |   r = dt[, map(list(fpr, fnr, tpr, tnr, .N), function(fn) fn(get(tgt), get(prd), pos)), by = pta]
 19 |   names(r) = c(pta, c("fpr", "fnr", "tpr", "tnr", "base_rate"))
 20 |   r[, base_rate := base_rate / nrow(dt)]
 21 |   r
 22 | 
 23 |   is_prv = dt[,get(pta) == prv]
 24 | 
 25 |   # Compute priviledged/unpriviledged pos. and negative samples
 26 |   sconst = dt[is_prv, get(prd) == pos]
 27 |   sflip =  dt[is_prv, get(prd) != pos]
 28 |   oconst = dt[!is_prv, get(prd) == pos]
 29 |   oflip =  dt[!is_prv, get(prd) != pos]
 30 | 
 31 |   y_true = dt[[tgt]]
 32 | 
 33 |   sm_tn = (y_true[is_prv] != pos) & sflip
 34 |   sm_fn = (y_true[is_prv] == pos) & sflip
 35 |   sm_fp = (y_true[is_prv] != pos) & sconst
 36 |   sm_tp = (y_true[is_prv] == pos) & sconst
 37 |   om_tn = (y_true[!is_prv] != pos) & oflip
 38 |   om_fn = (y_true[!is_prv] == pos) & oflip
 39 |   om_fp = (y_true[!is_prv] != pos) & oconst
 40 |   om_tp = (y_true[!is_prv] == pos) & oconst
 41 | 
 42 |   # Inequality constraints (upper, lower)
 43 |   A_ineq = rbind(
 44 |     c( 1,  0,  0,  0),
 45 |     c(-1,  0,  0,  0),
 46 |     c( 0,  1,  0,  0),
 47 |     c( 0, -1,  0,  0),
 48 |     c( 0,  0,  1,  0),
 49 |     c( 0,  0, -1,  0),
 50 |     c( 0,  0,  0,  1),
 51 |     c( 0,  0,  0, -1)
 52 |   )
 53 |   b_ineq = c(1, 0, 1, 0, 1, 0, 1, 0)
 54 | 
 55 |   # Equality constraints
 56 |   A_eq = cbind(c(
 57 |       mean(sconst*sm_tp) - mean(sflip  * sm_tp) / sbr,
 58 |       mean(sflip*sm_fn)  - mean(sconst * sm_fn) / sbr,
 59 |       mean(oflip*om_tp)  - mean(oconst * om_tp) / obr,
 60 |       mean(oconst*om_fn) - mean(oflip  * om_fn) / obr),
 61 |     c(
 62 |       mean(sconst*sm_fp) - mean(sflip  * sm_fp) / (1-sbr),
 63 |       mean(sflip*sm_tn)  - mean(sconst * sm_tn) / (1-sbr),
 64 |       mean(oflip*om_fp)  - mean(oconst * om_fp) / (1-obr),
 65 |       mean(oconst*om_tn) - mean(oflip  * om_tn) / (1-obr)
 66 |     )
 67 |   )
 68 |   b_eq = c(
 69 |     (mean(oflip*om_tp) + mean(oconst*om_fn)) / obr     - (mean(sflip*sm_tp) + mean(sconst*sm_fn)) / sbr,
 70 |     (mean(oflip*om_fp) + mean(oconst*om_tn)) / (1-obr) - (mean(sflip*sm_fp) + mean(sconst*sm_tn)) / (1-sbr)
 71 |   )
 72 | 
 73 | 
 74 |   Amat = rbind(
 75 |     cbind(A_ineq, t(matrix(0, nrow = nrow(A_eq), ncol = nrow(A_ineq)))),
 76 |     cbind(matrix(0, nrow = ncol(A_eq), ncol = nrow(A_eq)), t(A_eq))
 77 |   )
 78 |   Amat = rbind(A_ineq, t(A_eq))
 79 |   bvec = c(b_ineq, b_eq)
 80 |   cvec = c(
 81 |     r$fpr[1] - r$tpr[1],
 82 |     r$tnr[1] - r$fnr[1],
 83 |     r$fpr[2] - r$fpr[2],
 84 |     r$tnr[2] - r$fnr[2]
 85 |   )
 86 |   const_dir = c(rep("<=", length(b_ineq)), rep("==", length(b_eq)))
 87 |   solveLP(cvec, bvec, Amat, const.dir = const_dir, lpSolve = TRUE, maxiter = 1e4, zero=1e-16)
 88 |   self$coefficients
 89 |   library(reticulate)
 90 | 
 91 |   o = import("scipy.optimize")
 92 |   o$linprog(cvec[1:4], A_ineq, b_ineq, t(A_eq), b_eq)
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | # FACT approach:
100 | 
101 | ns = map_dbl(ft, sum)
102 | ms = map_dbl(ft, function(x) sum(x[,1]))
103 | n = sum(ns)
104 | z = unlist(ft) / n
105 | 
106 | Acst = rbind(
107 |   c(1,1,1,1,0,0,0,0),
108 |   c(1,1,0,0,0,0,0,0),
109 |   c(0,0,0,0,1,1,1,1),
110 |   c(0,0,0,0,1,1,0,0)
111 |  )
112 | 
113 | bcst = c(ms[1], ns[1], ms[2], ms[2]) / n
114 | 
115 | Aeod = cbind(
116 |   c(ms[1], 0, 0          , 0, -ms[2], 0, 0           , 0),
117 |   c(0    , 0, ns[1]-ms[1], 0, 0     , 0, -ns[2]+ms[2], 0)
118 | )
119 | 
120 | 
121 | 
122 | }


--------------------------------------------------------------------------------
/attic/create_learners.R:
--------------------------------------------------------------------------------
 1 | create_learner(classname = "fairnlcm",
 2 |                algorithm = "non-convex fair regression",
 3 |                type = "regr",
 4 |                key = "fairnlcm",
 5 |                package = "fairml",
 6 |                caller = "regr.fairnlcm",
 7 |                feature_types = c("integer", "numeric"),
 8 |                predict_types = "response",
 9 |                properties = NULL,
10 |                references = TRUE,
11 |                gh_name = "pfistfl")
12 | 
13 | 
14 | create_learner(classname = "fairfrrm",
15 |                algorithm = "fair ridge regression",
16 |                type = "regr",
17 |                key = "fairfrrm",
18 |                package = "fairml",
19 |                caller = "regr.fairfrrm",
20 |                feature_types = c("integer", "numeric"),
21 |                predict_types = "response",
22 |                properties = NULL,
23 |                references = TRUE,
24 |                gh_name = "pfistfl")
25 | 
26 | create_learner(classname = "fairfgrrm",
27 |                algorithm = "fair generalized ridge regression",
28 |                type = "classif",
29 |                key = "fairfgrrm",
30 |                package = "fairml",
31 |                caller = "classif.fairfgrrm",
32 |                feature_types = c("integer", "numeric"),
33 |                predict_types = "response",
34 |                properties = NULL,
35 |                references = TRUE,
36 |                gh_name = "pfistfl")
37 | 
38 | 
39 | create_learner(classname = "fairzlrm",
40 |                algorithm = "fair logistic regression with covariance constraints",
41 |                type = "classif",
42 |                key = "fairzlrm",
43 |                package = "fairml",
44 |                caller = "classif.fairzlrm",
45 |                feature_types = c("integer", "numeric"),
46 |                predict_types = "response",
47 |                properties = NULL,
48 |                references = TRUE,
49 |                gh_name = "pfistfl")
50 | 
51 | create_learner(classname = "fairzlm",
52 |                algorithm = "fair regression with covariance constraints",
53 |                type = "regr",
54 |                key = "fairzlm",
55 |                package = "fairml",
56 |                caller = "regr.fairzlm",
57 |                feature_types = c("integer", "numeric"),
58 |                predict_types = "response",
59 |                properties = NULL,
60 |                references = TRUE,
61 |                gh_name = "pfistfl")


--------------------------------------------------------------------------------
/attic/make_reports.R:
--------------------------------------------------------------------------------
 1 | library(gh)
 2 | 
 3 | "~/articles/"
 4 | dir = tempdir()
 5 | report_file = paste0(dir, "/datasheet")
 6 | unlink(report_file, recursive = TRUE)
 7 | fp = report_datasheet(report_file)
 8 | render(fp)
 9 | 
10 | report_file = paste0(dir, "/modelcard")
11 | unlink(report_file, recursive = TRUE)
12 | fp = report_modelcard(report_file)
13 | render(fp)
14 | 
15 | 
16 | task = tsk("compas")
17 | learner = lrn("classif.rpart", predict_type = "prob")
18 | rr = resample(task, learner, rsmp("cv", folds = 3L))
19 | report_file = paste0(dir, "/fairness")
20 | unlink(report_file, recursive = TRUE)
21 | fp = report_fairness(report_file, list(task = task, resample_result = rr))
22 | render(fp)
23 | 


--------------------------------------------------------------------------------
/attic/rfc_template.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | Feature Name: `do_something`
 3 | Start Date: 2019-11-04
 4 | Target Date:
 5 | ```
 6 | 
 7 | ## Summary
 8 | [summary]: #summary
 9 | 
10 | Add method foo and bar, this allows to baz.
11 | 
12 | ## Motivation
13 | [motivation]: #motivation
14 | 
15 | We need baz in order to foo. This can also be longer and more
16 | verbose.
17 | 
18 | ## Guide-level explanation
19 | [guide-level-explanation]: #guide-level-explanation
20 | 
21 | Sometimes, when applying a forecasting method, what occurs is ...
22 | 
23 | Example:
24 | ```r
25 | foo = 1
26 | bar = 2
27 | do_something(foo, bar)
28 | ```
29 | 
30 | I propose to change this to bar, as this would ...
31 | 
32 | 
33 | ## Reference-level explanation
34 | [reference-level-explanation]: #reference-level-explanation
35 | 
36 | Internally, the function would look the following:
37 | 
38 | Example:
39 | ```r
40 | do_something = function(a, b) {
41 |   ...
42 | }
43 | ```
44 | 
45 | 
46 | ## Rationale, drawbacks and alternatives
47 | [rationale-and-alternatives]: #rationale-and-alternatives
48 | 
49 | This design seems fairly obvious choice in the design space.
50 | The main alternative to this proposal is not to implement it,
51 | and let users to calculate joined subslices from indexes or pointers.
52 | 
53 | ## Prior art
54 | [prior-art]: #prior-art
55 | 
56 | There exists a function that implements the API as here...
57 | 
58 | ## Introduced Dependencies
59 | This solution would introduce dependencies on the following (additional) packages:
60 | 
61 | Those packages either depend on or import the following other (additional) packages:
62 | 
63 | Using this package would allow us to ... instead of re-implementing and maintining
64 | N loc ourselves.
65 | 
66 | 
67 | ## Unresolved questions
68 | [unresolved-questions]: #unresolved-questions
69 | 


--------------------------------------------------------------------------------
/attic/rfc_visualization.md:
--------------------------------------------------------------------------------
 1 | ```
 2 | Feature Name: `do_something`
 3 | Start Date: 2020-07-01
 4 | Target Date: 2020-07-30
 5 | ```
 6 | 
 7 | ## Summary
 8 | [summary]: #summary
 9 | 
10 | Implement the common fairness visualizations along with a clear documentation on how to interpret them in mlr3fairness packages.
11 | 
12 | ## Motivation
13 | [motivation]: #motivation
14 | 
15 | We need fairness visualizations so the package will be more user friendly. Most of the users are not expected to understand fairness problems from just the fairness metrics. They are just numbers for non-experts. However, through fairness visualizations we could provide a clear, thorough and understandable way to understand the fairness problems.
16 | 
17 | ## Guide-level explanation
18 | [guide-level-explanation]: #guide-level-explanation
19 | 
20 | There will be multiple visualization functions user could interact with. For example, the fairness compare could compare the fairness metrics on one model or between multiple models.
21 | 
22 | For visualizations about the dataset or basic measures. Users could use mlr3vis to visualize or customize their own visualizations.
23 | 
24 | Example:
25 | ```r
26 | compas = tsk("compas")
27 | lrn = lrn("class.rpart")
28 | lrn$train(compas)
29 | 
30 | measures = msrs(c("fairness.tpr", "fairness.fpr"))
31 | fairness_compare(object = lrn$predict(compas), measures, compas)
32 | ```
33 | 
34 | ```r
35 | design = benchmark_grid(
36 |   tasks = tsk("adult_train"),
37 |   learners = lrns(c("classif.ranger", "classif.rpart"),
38 |                   predict_type = "prob", predict_sets = c("train", "test")),
39 |   resamplings = rsmps("cv", folds = 3)
40 | )
41 | 
42 | bmr = benchmark(design)
43 | measures = msrs(c("fairness.tpr", "fairness.fpr"))
44 | fairness_compare(object = bmr, measures)
45 | ```
46 | 
47 | 
48 | ## Reference-level explanation
49 | [reference-level-explanation]: #reference-level-explanation
50 | 
51 | Internally, since we want the visualization functions to be able handle multiple type of input. Currently we think `PredictionClassif`, `BenchmarkResult` and `ResampleResult` are three most important types we need to handle. We use S3 class to implement the polymorphism of visualization class.
52 | 
53 | Example:
54 | ```r
55 | fairness_compare <- function(object, ...){
56 |   UseMethod("fairness_compare")
57 | }
58 | 
59 | fairness_compare.PredictionClassif <- function(...){ #generalize visualization for Prediction Object }
60 | fairness_compare.BenchmarkResult <- function(...){ #generalize visualization for Benchmark Object }
61 | fairness_compare.ResampleResult <- function(...){ #generalize visualization for Resample Object (By converting to Benchmark)}
62 | ```
63 | 
64 | ## Rationale, drawbacks and alternatives
65 | [rationale-and-alternatives]: #rationale-and-alternatives
66 | 
67 | This design seems fairly obvious choice in the design space.
68 | The main alternative to this proposal is not to implement it,
69 | and let users to create their own plots if they need them.
70 | 
71 | ## Prior art
72 | [prior-art]: #prior-art
73 | 
74 | mlr3viz:
75 | This is the subpackage supports all the other visualizations for mlr3.
76 | https://github.com/mlr-org/mlr3viz
77 | 
78 | ## Introduced Dependencies
79 | This solution would introduce dependencies on the following (additional) packages:
80 | 
81 | ```
82 | ggplot2
83 | ```
84 | 
85 | ## Unresolved questions
86 | [unresolved-questions]: #unresolved-questions
87 | 
88 | 
89 | * We want to add an interactive visualizations through R Shiny. But this is the future work.
90 | * Currently those visualizations need some improvements. Like circle the anchor point in fairness - accuracy tradeoff visualizations.
91 | 


--------------------------------------------------------------------------------
/attic/tests_edge_cases.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("Edge cases", {
 3 |   skip("Tested locally")
 4 |   skip_on_cran()
 5 |   skip_if_not_installed("mlr3oml")
 6 |   skip_if_not_installed("mlr3pipelines")
 7 |   library(mlr3)
 8 |   library(mlr3pipelines)
 9 |   library(mlr3fairness)
10 |   task = tsk("oml", task_id = 317599)
11 |   task$col_roles$pta = "x2"
12 |   l = as_learner(po("explicit_pta") %>>% po("reweighing_os") %>>% lrn("classif.rpart"))
13 |   l$train(task)
14 |   expect_true(!is.null(l$model))
15 | })
16 | 
17 | test_that("Edge cases", {
18 |   skip("Tested locally")
19 |   skip_on_cran()
20 |   skip_if_not_installed("mlr3oml")
21 |   skip_if_not_installed("mlr3pipelines")
22 |   library(mlr3)
23 |   library(mlr3pipelines)
24 |   library(mlr3fairness)
25 |   task = tsk("oml", task_id = 317599)
26 |   task$col_roles$pta = "x2"
27 |   l = lrn("classif.fairfgrrm")
28 |   r = resample(task, l, rsmp("cv", folds = 3L))
29 |   r$aggregate(msr("fairness.acc"))
30 | 
31 |   l = as_learner(po("explicit_pta") %>>% lrn("classif.fairfgrrm"))
32 |   l$train(task)
33 |   expect_true(!is.null(l$model))
34 | })
35 | 
36 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Submission
 2 | 
 3 | Minor update adding small new features, update to comply with upcoming changes in dependencies.
 4 | 
 5 | ## R CMD check results
 6 | 
 7 | R CMD check reports
 8 | 
 9 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔
10 | 
11 | - RHub shows a PREPERROR but all builds succeed.
12 | 


--------------------------------------------------------------------------------
/data-raw/adult.R:
--------------------------------------------------------------------------------
 1 | to_factor = function(dataset) {
 2 |   dataset$workclass = as.factor(dataset$workclass)
 3 |   dataset$education = as.factor(dataset$education)
 4 |   dataset$marital_status = as.factor(dataset$marital_status)
 5 |   dataset$occupation = as.factor(dataset$occupation)
 6 |   dataset$relationship = as.factor(dataset$relationship)
 7 |   dataset$race = as.factor(dataset$race)
 8 |   dataset$sex = as.factor(dataset$sex)
 9 |   dataset$target = as.factor(dataset$target)
10 |   return(dataset)
11 | }
12 | 
13 | root = rprojroot::find_root(rprojroot::is_git_root)
14 | adult_train = data.table::fread(file.path(root, "data-raw", "adult-train-raw.csv"))
15 | adult_test = data.table::fread(file.path(root, "data-raw", "adult-test-raw.csv"))
16 | adult_train = to_factor(adult_train)
17 | adult_test = to_factor(adult_test)
18 | usethis::use_data(adult_train, adult_test, overwrite = TRUE)
19 | 


--------------------------------------------------------------------------------
/data-raw/compas.R:
--------------------------------------------------------------------------------
 1 | root = rprojroot::find_root(rprojroot::is_git_root)
 2 | compas = data.table::fread(file.path(root, "data-raw", "compas-scores-two-years.csv"))
 3 | compas$two_year_recid = as.factor(compas$two_year_recid)
 4 | compas$is_recid = as.factor(compas$is_recid)
 5 | compas$age_cat = as.factor(compas$age_cat)
 6 | compas$score_text = as.factor(compas$score_text)
 7 | compas$sex = as.factor(compas$sex)
 8 | compas$race = as.factor(compas$race)
 9 | compas$c_charge_degree = as.factor(compas$c_charge_degree)
10 | usethis::use_data(compas, overwrite = TRUE)
11 | 


--------------------------------------------------------------------------------
/data/adult_test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/data/adult_test.rda


--------------------------------------------------------------------------------
/data/adult_train.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/data/adult_train.rda


--------------------------------------------------------------------------------
/data/compas.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/data/compas.rda


--------------------------------------------------------------------------------
/inst/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{dwork2012,
 2 |   title={Fairness through awareness},
 3 |   author={Dwork, Cynthia and Hardt, Moritz and Pitassi, Toniann and Reingold, Omer and Zemel, Richard},
 4 |   booktitle={Proceedings of the 3rd Innovations in Theoretical Computer Science Conference},
 5 |   pages={214--226},
 6 |   year={2012}
 7 | }
 8 | 
 9 | @book{fairmlbook,
10 |   title = {Fairness and Machine Learning},
11 |   author = {Solon Barocas and Moritz Hardt and Arvind Narayanan},
12 |   publisher = {fairmlbook.org},
13 |   note = {\url{http://www.fairmlbook.org}},
14 |   year = {2019}
15 | }
16 | 
17 | @article{hardt2016equality,
18 |   title={Equality of opportunity in supervised learning},
19 |   author={Hardt, Moritz and Price, Eric and Srebro, Nati},
20 |   journal={Advances in neural information processing systems},
21 |   volume={29},
22 |   pages={3315--3323},
23 |   year={2016}
24 | }
25 | 
26 | @article{kilbertus2017avoiding,
27 |   title={Avoiding discrimination through causal reasoning},
28 |   author={Kilbertus, Niki and Rojas Carulla, Mateo and Parascandolo, Giambattista and Hardt, Moritz and Janzing, Dominik and Sch{\"o}lkopf, Bernhard},
29 |   journal={Advances in Neural Information Processing Systems},
30 |   volume={30},
31 |   year={2017}
32 | }
33 | 
34 | @inproceedings{binns2020apparent,
35 |   title={On the apparent conflict between individual and group fairness},
36 |   author={Binns, Reuben},
37 |   booktitle={Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency},
38 |   pages={514--524},
39 |   year={2020},
40 |   series = {FAT* '20}
41 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/datasheets/skeleton/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Christian Garbin CS master's assignments
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/datasheets/skeleton/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{modelcards,
 2 | author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
 3 | title = {Model Cards for Model Reporting},
 4 | year = {2019},
 5 | isbn = {9781450361255},
 6 | publisher = {Association for Computing Machinery},
 7 | address = {New York, NY, USA},
 8 | url = {https://doi.org/10.1145/3287560.3287596},
 9 | doi = {10.1145/3287560.3287596},
10 | booktitle = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
11 | pages = {220–229},
12 | numpages = {10},
13 | keywords = {ethical considerations, fairness evaluation, ML model evaluation, disaggregated evaluation, documentation, model cards, datasheets},
14 | location = {Atlanta, GA, USA},
15 | series = {FAT* '19}
16 | }
17 | 
18 | @article{datasheets,
19 |   title={Datasheets for datasets},
20 |   author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daum{\'e} III, Hal and Crawford, Kate},
21 |   journal={arXiv preprint arXiv:1803.09010},
22 |   year={2018}
23 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/datasheets/skeleton/style.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   overflow-x: hidden;
 3 |   margin-left:auto;
 4 |   margin-right:auto;
 5 | }
 6 | 
 7 | div.poster_wrap {
 8 |   padding:  1in;
 9 | }
10 | 
11 | div.title_container {
12 |   height: calc(81in * 0.066);
13 | }
14 | h1#title {
15 |   font-size: 125pt;
16 | }
17 | 
18 | h2 {
19 |   margin-block-start:.4em;
20 |   margin-block-end: .4em;
21 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/datasheets/template.yaml:
--------------------------------------------------------------------------------
1 | name: Datasheet
2 | description: >
3 |  Datasheet for a dataset
4 | create_dir: true


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fairness_report/skeleton/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Christian Garbin CS master's assignments
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fairness_report/skeleton/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{modelcards,
 2 | author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
 3 | title = {Model Cards for Model Reporting},
 4 | year = {2019},
 5 | isbn = {9781450361255},
 6 | publisher = {Association for Computing Machinery},
 7 | address = {New York, NY, USA},
 8 | url = {https://doi.org/10.1145/3287560.3287596},
 9 | doi = {10.1145/3287560.3287596},
10 | booktitle = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
11 | pages = {220–229},
12 | numpages = {10},
13 | keywords = {ethical considerations, fairness evaluation, ML model evaluation, disaggregated evaluation, documentation, model cards, datasheets},
14 | location = {Atlanta, GA, USA},
15 | series = {FAT* '19}
16 | }
17 | 
18 | @article{datasheets,
19 |   title={Datasheets for datasets},
20 |   author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daum{\'e} III, Hal and Crawford, Kate},
21 |   journal={arXiv preprint arXiv:1803.09010},
22 |   year={2018}
23 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fairness_report/skeleton/skeleton.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Fairness Report
  3 | author:
  4 |   - name: First Author
  5 | output: html_document
  6 | bibliography: references.bib
  7 | ---
  8 | 
  9 | ```{r setup, include=FALSE}
 10 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, echo=FALSE)
 11 | library("mlr3")
 12 | library("mlr3viz")
 13 | library("mlr3fairness")
 14 | library("ggplot2")
 15 | library("kableExtra")
 16 | ```
 17 | <!-- This reads in `objects` provided to report_fairness: resample_result, task, ...-->
 18 | 
 19 | ```{r read, child = 'read_data.Rmd'}
 20 | 
 21 | ```
 22 | 
 23 | ## Audit Report: Summary
 24 | 
 25 | This document introduces on how to use `mlr3fairness` to create audit reports with different tasks throughout the fairness exploration process.
 26 | 
 27 | There are three main sections for this document. Which describe the details for the task, the model and the interpretability of the parameters.
 28 | 
 29 | Jump to section:
 30 | 
 31 | - [Task details](#task-details)
 32 | - [Model details](#model-details)
 33 | - [Interpretability](#Interpretability)
 34 | 
 35 | ## Task details
 36 | 
 37 | In this fairness report, we investigate the fairness of the following task:
 38 | 
 39 | ```{r}
 40 | task
 41 | ```
 42 | 
 43 | ### Task Documentation:
 44 | 
 45 | Here we  the basic details for the task.
 46 | 
 47 | ```{r}
 48 | task_summary(task) %>%
 49 |   kbl() %>%
 50 |   kable_paper("hover", full_width = F)
 51 | ```
 52 | ### Exploratory Data Analysis: 
 53 | 
 54 | We also report the number of missing values, types and the levels for each feature:
 55 | 
 56 | ```{r}
 57 | df_summary = data.frame(task$col_info)
 58 | df_summary = df_summary[df_summary$id %in% c(task$feature_names, task$target_names), ]
 59 | df_summary %>%
 60 |   cbind(data.frame("Missings (%)" = task$missings() / task$nrow)) %>%
 61 |   kbl() %>%
 62 |   kable_paper("hover", full_width = F)
 63 | ```
 64 | 
 65 | We first look at the label distribution:
 66 | 
 67 | ```{r}
 68 | autoplot(task) + facet_wrap(task$col_roles$pta)
 69 | ```
 70 | 
 71 | ## Model details
 72 | 
 73 | We could see the model that has been used in `resample_result`:
 74 | 
 75 | ```{r}
 76 | resample_result$learner
 77 | ```
 78 | 
 79 | ### Fairness Metrics
 80 | 
 81 | We furthermore report more than one fairness metric. 
 82 | Below metrics are the mean across all the resample results.
 83 | 
 84 | ```{r}
 85 | fair_metrics = msrs(c("fairness.acc","fairness.eod","fairness.fnr", "fairness.fpr","fairness.ppv", "fairness.cv"))
 86 | ```
 87 | 
 88 | ```{r}
 89 | resample_result$aggregate(fair_metrics) %>%
 90 |   kbl(col.names = c("value")) %>%
 91 |   kable_paper("hover", full_width = F)
 92 | ```
 93 | 
 94 | We can furthermore employ several visualizations to report the fairness.
 95 | For example, the fairness and accuracy trade off, compare metrics visualization and the fairness prediction density of our model.
 96 | For more detailed usage and examples, you may want to check the [visualization vignette](https://mlr3fairness.mlr-org.com/articles/visualization-vignette.html).
 97 | 
 98 | ```{r}
 99 | fairness_accuracy_tradeoff(resample_result, msr("fairness.fnr"))
100 | ```
101 | 
102 | ```{r, eval = (resample_result$learner$predict_type == "prob")}
103 | fairness_prediction_density(resample_result)
104 | ```
105 | 
106 | ```{r}
107 | compare_metrics(resample_result, fair_metrics)
108 | ```
109 | 
110 | ## Interpretability
111 | 
112 | Finally, we use the external package to gain further insight into our model.
113 | For the following example we use the `iml` package as a demonstration.
114 | We need first extract the learner from `resample_result` and wrap it in a `Predictor` object.
115 | 
116 | You could generate the variable importance plot like this
117 | 
118 | ```{r}
119 | library("iml")
120 | 
121 | target = task$target_names
122 | twocols = task$feature_names[1:2]
123 | learner = resample_result$learner
124 | learner$train(task)
125 | 
126 | model = Predictor$new(model = learner,
127 |                       data = task$data()[,.SD, .SDcols = !target],
128 |                       y = task$data()[, ..target])
129 | 
130 | imp <- FeatureImp$new(model, loss = "ce")
131 | plot(imp)
132 | ```
133 | 
134 | Or generate the feature effects plot:
135 | 
136 | ```{r, warning = FALSE}
137 | effect = FeatureEffects$new(model, method = "pdp", grid.size = 10)
138 | effect$plot(features = twocols)
139 | ```
140 | 
141 | For more details on interpretability, check the documentation of the `iml` package.
142 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fairness_report/template.yaml:
--------------------------------------------------------------------------------
1 | name: Fairness Report
2 | description: >
3 |  Fairness Report for a ML Model
4 | create_dir: true


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/modelcards/skeleton/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Christian Garbin CS master's assignments
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/modelcards/skeleton/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{modelcards,
 2 | author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
 3 | title = {Model Cards for Model Reporting},
 4 | year = {2019},
 5 | isbn = {9781450361255},
 6 | publisher = {Association for Computing Machinery},
 7 | address = {New York, NY, USA},
 8 | url = {https://doi.org/10.1145/3287560.3287596},
 9 | doi = {10.1145/3287560.3287596},
10 | booktitle = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
11 | pages = {220–229},
12 | numpages = {10},
13 | keywords = {ethical considerations, fairness evaluation, ML model evaluation, disaggregated evaluation, documentation, model cards, datasheets},
14 | location = {Atlanta, GA, USA},
15 | series = {FAT* '19}
16 | }
17 | 
18 | @article{datasheets,
19 |   title={Datasheets for datasets},
20 |   author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daum{\'e} III, Hal and Crawford, Kate},
21 |   journal={arXiv preprint arXiv:1803.09010},
22 |   year={2018}
23 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/modelcards/skeleton/skeleton.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Model card for "add model name here"
  3 | author:
  4 |   - name: First Author
  5 |     affil: 1
  6 |   - name: Second Author
  7 |     affil: 2
  8 | affiliation:
  9 |   - num: 1
 10 |     address: Planet Earth, Milky Way
 11 |   - num: 2
 12 |     address: Planet Earth, Milky Way
 13 | column_numbers: 4
 14 | output: 
 15 |   posterdown::posterdown_html:
 16 |     self_contained: false
 17 | bibliography: references.bib
 18 | ---
 19 | 
 20 | ```{r setup, include=FALSE}
 21 | knitr::opts_chunk$set(echo = FALSE)
 22 | ```
 23 | 
 24 | Sections and prompts from the [model cards paper](https://arxiv.org/abs/1810.03993), v2. (@modelcards).
 25 | 
 26 | Jump to section:
 27 | 
 28 | - [Model details](#model-details)
 29 | - [Intended use](#intended-use)
 30 | - [Factors](#factors)
 31 | - [Metrics](#metrics)
 32 | - [Evaluation data](#evaluation-data)
 33 | - [Training data](#training-data)
 34 | - [Quantitative analyses](#quantitative-analyses)
 35 | - [Ethical considerations](#ethical-considerations)
 36 | - [Caveats and recommendations](#caveats-and-recommendations)
 37 | 
 38 | ## Model details
 39 | 
 40 | _Basic information about the model._
 41 | 
 42 | Review section 4.1 of the [model cards paper](https://arxiv.org/abs/1810.03993).
 43 | 
 44 | - Person or organization developing model
 45 | - Model date
 46 | - Model version
 47 | - Model type
 48 | - Information about training algorithms, parameters, fairness constraints or other applied
 49 |   approaches, and features
 50 | - Paper or other resource for more information
 51 | - Citation details
 52 | - License
 53 | - Where to send questions or comments about the model
 54 | 
 55 | ## Intended use
 56 | 
 57 | _Use cases that were envisioned during development._
 58 | 
 59 | Review section 4.2 of the [model cards paper](https://arxiv.org/abs/1810.03993).
 60 | 
 61 | ### Primary intended uses
 62 | 
 63 | ### Primary intended users
 64 | 
 65 | ### Out-of-scope use cases
 66 | 
 67 | ## Factors
 68 | 
 69 | _Factors could include demographic or phenotypic groups, environmental conditions, technical
 70 | attributes, or others listed in Section 4.3._
 71 | 
 72 | Review section 4.3 of the [model cards paper](https://arxiv.org/abs/1810.03993).
 73 | 
 74 | ### Relevant factors
 75 | 
 76 | ### Evaluation factors
 77 | 
 78 | ## Metrics
 79 | 
 80 | _The appropriate metrics to feature in a model card depend on the type of model that is being tested.
 81 | For example, classification systems in which the primary output is a class label differ significantly
 82 | from systems whose primary output is a score. In all cases, the reported metrics should be determined
 83 | based on the model’s structure and intended use._
 84 | 
 85 | Review section 4.4 of the [model cards paper](https://arxiv.org/abs/1810.03993).
 86 | 
 87 | ### Model performance measures
 88 | 
 89 | ### Decision thresholds
 90 | 
 91 | ### Approaches to uncertainty and variability
 92 | 
 93 | ## Evaluation data
 94 | 
 95 | _All referenced datasets would ideally point to any set of documents that provide visibility into the
 96 | source and composition of the dataset. Evaluation datasets should include datasets that are publicly
 97 | available for third-party use. These could be existing datasets or new ones provided alongside the model
 98 | card analyses to enable further benchmarking._
 99 | 
100 | Review section 4.5 of the [model cards paper](https://arxiv.org/abs/1810.03993).
101 | 
102 | ### Datasets
103 | 
104 | ### Motivation
105 | 
106 | ### Preprocessing
107 | 
108 | ## Training data
109 | 
110 | Review section 4.6 of the [model cards paper](https://arxiv.org/abs/1810.03993).
111 | 
112 | ## Quantitative analyses
113 | 
114 | _Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative
115 | analyses should provide the results of evaluating the model according to the chosen metrics, providing
116 | confidence interval values when possible._
117 | 
118 | Review section 4.7 of the [model cards paper](https://arxiv.org/abs/1810.03993).
119 | 
120 | ### Unitary results
121 | 
122 | ### Intersectional result
123 | 
124 | ## Ethical considerations
125 | 
126 | _This section is intended to demonstrate the ethical considerations that went into model development,
127 | surfacing ethical challenges and solutions to stakeholders. Ethical analysis does not always lead to
128 | precise solutions, but the process of ethical contemplation is worthwhile to inform on responsible
129 | practices and next steps in future work._
130 | 
131 | Review section 4.8 of the [model cards paper](https://arxiv.org/abs/1810.03993).
132 | 
133 | ### Data
134 | 
135 | ### Human life
136 | 
137 | ### Mitigations
138 | 
139 | ### Risks and harms
140 | 
141 | ### Use cases
142 | 
143 | ## Caveats and recommendations
144 | 
145 | _This section should list additional concerns that were not covered in the previous sections._
146 | 
147 | Review section 4.9 of the [model cards paper](https://arxiv.org/abs/1810.03993).
148 | 
149 | # References
150 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/modelcards/skeleton/style.css:
--------------------------------------------------------------------------------
1 | body {
2 |   overflow-x: hidden;
3 | }


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/modelcards/template.yaml:
--------------------------------------------------------------------------------
1 | name: Modelcard
2 | description: >
3 |  Modelcard for a ML Model
4 | create_dir: true


--------------------------------------------------------------------------------
/man-roxygen/class_learner.R:
--------------------------------------------------------------------------------
 1 | <%
 2 | lrn = mlr3::lrn(id)
 3 | %>
 4 | #' @description
 5 | #' Calls [<%=lrn$packages[2]%>::<%=caller%>] from package \CRANpkg{<%=lrn$packages[2]%>}.
 6 | #'
 7 | #' @section Dictionary: This [Learner] can be instantiated via the
 8 | #'   [dictionary][mlr3misc::Dictionary] [mlr_learners] or with the associated
 9 | #'   sugar function [lrn()]:
10 | #' ```
11 | #' mlr_learners$get("<%= id %>")
12 | #' lrn("<%= id %>")
13 | #' ```
14 | #' 
15 | #' @section Meta Information:
16 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>"))`
17 | #' @md
18 | #'
19 | #' @section Parameters:
20 | #' `r mlr3misc::rd_info(mlr3::lrn("<%= id %>")$param_set)`
21 | #' @md
22 | #' 
23 | #' @family fairness_learners
24 | #' @keywords internal
25 | 


--------------------------------------------------------------------------------
/man-roxygen/example.R:
--------------------------------------------------------------------------------
 1 | <%
 2 | lrn = mlr3::lrn(id)
 3 | %>
 4 | #' @examples
 5 | #' library("mlr3")
 6 | #' # stop example failing with warning if package not installed
 7 | #' learner = suppressWarnings(mlr3::lrn("<%= id %>"))
 8 | #' print(learner)
 9 | #'
10 | #' # available parameters:
11 | #' learner$param_set$ids()
12 | 


--------------------------------------------------------------------------------
/man-roxygen/field_base_measure.R:
--------------------------------------------------------------------------------
1 | #' @field base_measure (`Measure()`)\cr
2 | #' The base measure to be used by the fairness measures,
3 | #' e.g. [mlr_measures_classif.fpr] for the false positive rate.
4 | 


--------------------------------------------------------------------------------
/man-roxygen/field_operation.R:
--------------------------------------------------------------------------------
1 | #' @field operation (`function()`)\cr
2 | #' The operation used to compute the difference. A function with args 'x' and 'y' that returns
3 | #' a single value. Defaults to `abs(x - y)`.
4 | 


--------------------------------------------------------------------------------
/man-roxygen/intersect.R:
--------------------------------------------------------------------------------
1 | #' @description
2 | #' If more than one `pta` columns are provided, the hyperparameter `intersectional` controls whether 
3 | #' intersections of protected groups are formed (e.g. combinations of gender and race).
4 | #' Initialized to `TRUE`.
5 | #' If `FALSE`, only the group specified by the first element of `pta` is used.
6 | 


--------------------------------------------------------------------------------
/man-roxygen/param_base_measure.R:
--------------------------------------------------------------------------------
1 | #' @param base_measure (`Measure()`)\cr
2 | #'   The base metric evaluated within each subgroup.
3 | 


--------------------------------------------------------------------------------
/man-roxygen/pta.R:
--------------------------------------------------------------------------------
1 | #' @section Protected Attributes:
2 | #'   The protected attribute is specified as a `col_role` in the corresponding [`Task()`]:\cr
3 | #'     `<Task>$col_roles$pta = "name_of_attribute"` \cr
4 | #'   This also allows specifying more than one protected attribute, 
5 | #'   in which case fairness will be considered on the level of intersecting groups defined by all columns
6 | #'   selected as a predicted attribute.
7 | 


--------------------------------------------------------------------------------
/man-roxygen/seealso_learner.R:
--------------------------------------------------------------------------------
1 | #' @seealso
2 | #' [Dictionary][mlr3misc::Dictionary] of [Learners][mlr3::Learner]:
3 | #' [mlr3::mlr_learners]
4 | 


--------------------------------------------------------------------------------
/man/MeasureFairnessComposite.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/MeasureFairnessComposite.R
  3 | \name{MeasureFairnessComposite}
  4 | \alias{MeasureFairnessComposite}
  5 | \title{Composite Fairness Measure}
  6 | \description{
  7 | Computes a composite measure from multiple fairness metrics and aggregates them
  8 | using \code{aggfun} (defaulting to \code{\link[=mean]{mean()}}).
  9 | }
 10 | \section{Protected Attributes}{
 11 | 
 12 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
 13 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
 14 | This also allows specifying more than one protected attribute,
 15 | in which case fairness will be considered on the level of intersecting groups defined by all columns
 16 | selected as a predicted attribute.
 17 | }
 18 | 
 19 | \examples{
 20 | library("mlr3")
 21 | # Equalized Odds Metric
 22 | MeasureFairnessComposite$new(measures = msrs(c("fairness.fpr", "fairness.tpr")))
 23 | 
 24 | # Other metrics e.g. based on negative rates
 25 | MeasureFairnessComposite$new(measures = msrs(c("fairness.fnr", "fairness.tnr")))
 26 | }
 27 | \section{Super class}{
 28 | \code{\link[mlr3:Measure]{mlr3::Measure}} -> \code{MeasureFairnessComposite}
 29 | }
 30 | \section{Methods}{
 31 | \subsection{Public methods}{
 32 | \itemize{
 33 | \item \href{#method-MeasureFairnessComposite-new}{\code{MeasureFairnessComposite$new()}}
 34 | \item \href{#method-MeasureFairnessComposite-clone}{\code{MeasureFairnessComposite$clone()}}
 35 | }
 36 | }
 37 | \if{html}{\out{
 38 | <details open><summary>Inherited methods</summary>
 39 | <ul>
 40 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="aggregate"><a href='../../mlr3/html/Measure.html#method-Measure-aggregate'><code>mlr3::Measure$aggregate()</code></a></span></li>
 41 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="format"><a href='../../mlr3/html/Measure.html#method-Measure-format'><code>mlr3::Measure$format()</code></a></span></li>
 42 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="help"><a href='../../mlr3/html/Measure.html#method-Measure-help'><code>mlr3::Measure$help()</code></a></span></li>
 43 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="print"><a href='../../mlr3/html/Measure.html#method-Measure-print'><code>mlr3::Measure$print()</code></a></span></li>
 44 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="score"><a href='../../mlr3/html/Measure.html#method-Measure-score'><code>mlr3::Measure$score()</code></a></span></li>
 45 | </ul>
 46 | </details>
 47 | }}
 48 | \if{html}{\out{<hr>}}
 49 | \if{html}{\out{<a id="method-MeasureFairnessComposite-new"></a>}}
 50 | \if{latex}{\out{\hypertarget{method-MeasureFairnessComposite-new}{}}}
 51 | \subsection{Method \code{new()}}{
 52 | Creates a new instance of this \link[R6:R6Class]{R6} class.
 53 | \subsection{Usage}{
 54 | \if{html}{\out{<div class="r">}}\preformatted{MeasureFairnessComposite$new(
 55 |   id = NULL,
 56 |   measures,
 57 |   aggfun = function(x) mean(x),
 58 |   operation = groupdiff_absdiff,
 59 |   minimize = TRUE,
 60 |   range = c(-Inf, Inf)
 61 | )}\if{html}{\out{</div>}}
 62 | }
 63 | 
 64 | \subsection{Arguments}{
 65 | \if{html}{\out{<div class="arguments">}}
 66 | \describe{
 67 | \item{\code{id}}{(\code{character(1)})\cr
 68 | Id of the measure. Defaults to the concatenation of ids in \code{measure}.}
 69 | 
 70 | \item{\code{measures}}{(list of \link{MeasureFairness})\cr
 71 | List of fairness measures to aggregate.}
 72 | 
 73 | \item{\code{aggfun}}{(\verb{function()})\cr
 74 | Aggregation function used to aggregate results from respective measures. Defaults to \code{sum}.}
 75 | 
 76 | \item{\code{operation}}{(\verb{function()})\cr
 77 | The operation used to compute the difference. A function that returns
 78 | a single value given input: computed metric for each subgroup.
 79 | Defaults to \code{groupdiff_absdiff}.
 80 | See \code{MeasureFairness} for more information.}
 81 | 
 82 | \item{\code{minimize}}{(\code{logical(1)})\cr
 83 | Should the measure be minimized? Defaults to \code{TRUE}.}
 84 | 
 85 | \item{\code{range}}{(\code{numeric(2)})\cr
 86 | Range of the resulting measure. Defaults to \code{c(-Inf, Inf)}.}
 87 | }
 88 | \if{html}{\out{</div>}}
 89 | }
 90 | }
 91 | \if{html}{\out{<hr>}}
 92 | \if{html}{\out{<a id="method-MeasureFairnessComposite-clone"></a>}}
 93 | \if{latex}{\out{\hypertarget{method-MeasureFairnessComposite-clone}{}}}
 94 | \subsection{Method \code{clone()}}{
 95 | The objects of this class are cloneable with this method.
 96 | \subsection{Usage}{
 97 | \if{html}{\out{<div class="r">}}\preformatted{MeasureFairnessComposite$clone(deep = FALSE)}\if{html}{\out{</div>}}
 98 | }
 99 | 
100 | \subsection{Arguments}{
101 | \if{html}{\out{<div class="arguments">}}
102 | \describe{
103 | \item{\code{deep}}{Whether to make a deep clone.}
104 | }
105 | \if{html}{\out{</div>}}
106 | }
107 | }
108 | }
109 | 


--------------------------------------------------------------------------------
/man/MeasureSubgroup.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/MeasureSubgroup.R
  3 | \name{MeasureSubgroup}
  4 | \alias{MeasureSubgroup}
  5 | \title{Evaluate a metric on a subgroup}
  6 | \description{
  7 | Allows for calculation of arbitrary \code{\link[mlr3:Measure]{mlr3::Measure()}}s on a selected sub-group.
  8 | }
  9 | \examples{
 10 | library("mlr3")
 11 | # Create MeasureFairness to measure the Predictive Parity.
 12 | t = tsk("adult_train")
 13 | learner = lrn("classif.rpart", cp = .01)
 14 | learner$train(t)
 15 | measure = msr("subgroup", base_measure = msr("classif.acc"), subgroup = "Female")
 16 | predictions = learner$predict(t)
 17 | predictions$score(measure, task = t)
 18 | }
 19 | \seealso{
 20 | \link{MeasureFairness}, \link{groupwise_metrics}
 21 | }
 22 | \section{Super class}{
 23 | \code{\link[mlr3:Measure]{mlr3::Measure}} -> \code{MeasureSubgroup}
 24 | }
 25 | \section{Public fields}{
 26 | \if{html}{\out{<div class="r6-fields">}}
 27 | \describe{
 28 | \item{\code{base_measure}}{(\code{Measure()})\cr
 29 | The base measure to be used by the fairness measures,
 30 | e.g. \link{mlr_measures_classif.fpr} for the false positive rate.}
 31 | 
 32 | \item{\code{subgroup}}{(\code{character})|(\code{integer})\cr
 33 | Subgroup identifier.}
 34 | 
 35 | \item{\code{intersect}}{(\code{logical})\cr
 36 | Should groups be intersected?}
 37 | }
 38 | \if{html}{\out{</div>}}
 39 | }
 40 | \section{Methods}{
 41 | \subsection{Public methods}{
 42 | \itemize{
 43 | \item \href{#method-MeasureSubgroup-new}{\code{MeasureSubgroup$new()}}
 44 | \item \href{#method-MeasureSubgroup-clone}{\code{MeasureSubgroup$clone()}}
 45 | }
 46 | }
 47 | \if{html}{\out{
 48 | <details open><summary>Inherited methods</summary>
 49 | <ul>
 50 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="aggregate"><a href='../../mlr3/html/Measure.html#method-Measure-aggregate'><code>mlr3::Measure$aggregate()</code></a></span></li>
 51 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="format"><a href='../../mlr3/html/Measure.html#method-Measure-format'><code>mlr3::Measure$format()</code></a></span></li>
 52 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="help"><a href='../../mlr3/html/Measure.html#method-Measure-help'><code>mlr3::Measure$help()</code></a></span></li>
 53 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="print"><a href='../../mlr3/html/Measure.html#method-Measure-print'><code>mlr3::Measure$print()</code></a></span></li>
 54 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="score"><a href='../../mlr3/html/Measure.html#method-Measure-score'><code>mlr3::Measure$score()</code></a></span></li>
 55 | </ul>
 56 | </details>
 57 | }}
 58 | \if{html}{\out{<hr>}}
 59 | \if{html}{\out{<a id="method-MeasureSubgroup-new"></a>}}
 60 | \if{latex}{\out{\hypertarget{method-MeasureSubgroup-new}{}}}
 61 | \subsection{Method \code{new()}}{
 62 | Creates a new instance of this \link[R6:R6Class]{R6} class.
 63 | \subsection{Usage}{
 64 | \if{html}{\out{<div class="r">}}\preformatted{MeasureSubgroup$new(id = NULL, base_measure, subgroup, intersect = TRUE)}\if{html}{\out{</div>}}
 65 | }
 66 | 
 67 | \subsection{Arguments}{
 68 | \if{html}{\out{<div class="arguments">}}
 69 | \describe{
 70 | \item{\code{id}}{(\code{character})\cr
 71 | The measure's id. Set to 'fairness.<base_measure_id>' if ommited.}
 72 | 
 73 | \item{\code{base_measure}}{(\code{Measure()})\cr
 74 | The measure used to measure fairness.}
 75 | 
 76 | \item{\code{subgroup}}{(\code{character})|(\code{integer})\cr
 77 | Subgroup identifier. Either value for the protected attribute or position in \code{task$levels}.}
 78 | 
 79 | \item{\code{intersect}}{\code{\link{logical}} \cr
 80 | Should multiple pta groups be intersected? Defaults to \code{TRUE}.
 81 | Only relevant if more than one \code{pta} columns are provided.}
 82 | }
 83 | \if{html}{\out{</div>}}
 84 | }
 85 | }
 86 | \if{html}{\out{<hr>}}
 87 | \if{html}{\out{<a id="method-MeasureSubgroup-clone"></a>}}
 88 | \if{latex}{\out{\hypertarget{method-MeasureSubgroup-clone}{}}}
 89 | \subsection{Method \code{clone()}}{
 90 | The objects of this class are cloneable with this method.
 91 | \subsection{Usage}{
 92 | \if{html}{\out{<div class="r">}}\preformatted{MeasureSubgroup$clone(deep = FALSE)}\if{html}{\out{</div>}}
 93 | }
 94 | 
 95 | \subsection{Arguments}{
 96 | \if{html}{\out{<div class="arguments">}}
 97 | \describe{
 98 | \item{\code{deep}}{Whether to make a deep clone.}
 99 | }
100 | \if{html}{\out{</div>}}
101 | }
102 | }
103 | }
104 | 


--------------------------------------------------------------------------------
/man/adult.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adult.R
 3 | \docType{data}
 4 | \name{adult}
 5 | \alias{adult}
 6 | \alias{adult_test}
 7 | \alias{adult_train}
 8 | \title{Adult Dataset}
 9 | \source{
10 | Dua, Dheeru, Graff, Casey (2017).
11 | \dQuote{UCI Machine Learning Repository.}
12 | \url{http://archive.ics.uci.edu/ml/}.
13 | Ding, Frances, Hardt, Moritz, Miller, John, Schmidt, Ludwig (2021).
14 | \dQuote{Retiring adult: New datasets for fair machine learning.}
15 | In \emph{Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)}.
16 | }
17 | \description{
18 | Dataset used to predict whether income exceeds $50K/yr based on census data.
19 | Also known as "Census Income" dataset
20 | Train dataset contains 13 features and 30178 observations.
21 | Test dataset contains 13 features and 15315 observations.
22 | Target column is "target": A binary factor where 1: <=50K and 2: >50K for annual income.
23 | The column \code{"sex"} is set as protected attribute.
24 | }
25 | \section{Derived tasks}{
26 | 
27 | \itemize{
28 | \item \code{adult_train}: Original train split for the adult task available at UCI.
29 | \item \code{adult_test}: Original test split for the adult task available at UCI.
30 | }
31 | }
32 | 
33 | \section{Using Adult - Known Problems}{
34 | 
35 | The adult dataset has several known limitations such as its age, limited documentation, and outdated feature encodings (Ding et al., 2021).
36 | Furthermore, the selected threshold (income <=50K) has strong implications on the outcome of analysis, such that
37 | "In many cases, the $50k threshold understates and misrepresents the broader picture" (Ding et al., 2021).
38 | As a result, conclusions w.r.t. real-world implications are severely limited.
39 | 
40 | We decide to replicate the dataset here, as it is a widely used benchmark dataset and it can still serve this purpose.
41 | }
42 | 
43 | \section{Pre-processing}{
44 | 
45 | \itemize{
46 | \item \code{fnlwgt} Remove final weight, which is the number of people the census believes the entry represents
47 | \item \code{native-country} Remove Native Country, which is the country of origin for an individual
48 | \item Rows containing \code{NA} in workclass and occupation have been removed.
49 | \item Pre-processing inspired by article: @url https://cseweb.ucsd.edu//classes/sp15/cse190-c/reports/sp15/048.pdf
50 | }
51 | }
52 | 
53 | \section{Metadata}{
54 | 
55 | \itemize{
56 | \item (integer) age: The age of the individuals
57 | \item (factor) workclass: A general term to represent the employment status of an individual
58 | \item (factor) education: The highest level of education achieved by an individual.
59 | \item (integer) education_num: the highest level of education achieved in numerical form.
60 | \item (factor) marital_status: marital status of an individual.
61 | \item (factor) occupation: the general type of occupation of an individual
62 | \item (factor) relationship: whether the individual is in a relationship-
63 | \item (factor) race: Descriptions of an individual’s race
64 | \item (factor) sex: the biological sex of the individual
65 | \item (integer) captain-gain: capital gains for an individual
66 | \item (integer) captain-loss: capital loss for an individual
67 | \item (integer) hours-per-week: the hours an individual has reported to work per week
68 | \item (factor) target: whether or not an individual makes more than $50,000 annually
69 | }
70 | }
71 | 
72 | \examples{
73 | library("mlr3")
74 | data("adult_test", package = "mlr3fairness")
75 | data("adult_train", package = "mlr3fairness")
76 | }
77 | \keyword{data}
78 | 


--------------------------------------------------------------------------------
/man/compute_metrics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compute_metrics.R
 3 | \name{compute_metrics}
 4 | \alias{compute_metrics}
 5 | \title{Compute metrics for non-mlr3 predictions.}
 6 | \usage{
 7 | compute_metrics(data, target, protected_attribute, prediction, metrics = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{(\code{data.table}) \cr The dataset used for predicting.}
11 | 
12 | \item{target}{(\code{character}) \cr The name of the target variable. Must be available in \code{data}.}
13 | 
14 | \item{protected_attribute}{(\code{character}) \cr The name(s) of the protected attributes(s). Must be available in \code{data}.}
15 | 
16 | \item{prediction}{(\code{vector}) \cr A vector containing predictions.}
17 | 
18 | \item{metrics}{(\code{Metric}|\code{list}) \cr (List of) mlr3 metrics to apply.}
19 | }
20 | \description{
21 | Allows computing metrics for predictions that do not stem from mlr3, and were
22 | e.g. being made by models outside of mlr3.
23 | Currently only \code{classif} and \code{regr} - style predictions are supported.
24 | }
25 | \section{Protected Attributes}{
26 | 
27 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
28 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
29 | This also allows specifying more than one protected attribute,
30 | in which case fairness will be considered on the level of intersecting groups defined by all columns
31 | selected as a predicted attribute.
32 | }
33 | 
34 | \examples{
35 | library("mlr3")
36 | # Get adult data as a data.table
37 | train = tsk("adult_train")$data()
38 | mod = rpart::rpart(target ~ ., train)
39 | 
40 | # Predict on test data
41 | test = tsk("adult_test")$data()
42 | yhat = predict(mod, test, type = "vector")
43 | 
44 | # Convert to a factor with the same levels
45 | yhat = as.factor(yhat)
46 | levels(yhat) = levels(test$target)
47 | 
48 | compute_metrics(
49 |   data = test, 
50 |   target = "target",
51 |   prediction = yhat,
52 |   protected_attribute = "sex",
53 |   metrics = msr("fairness.acc")
54 | )
55 | }
56 | 


--------------------------------------------------------------------------------
/man/fairness_accuracy_tradeoff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fairness_accuracy_tradeoff.R
 3 | \name{fairness_accuracy_tradeoff}
 4 | \alias{fairness_accuracy_tradeoff}
 5 | \title{Plot Fairness Accuracy Trade-offs}
 6 | \usage{
 7 | fairness_accuracy_tradeoff(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{(\link{PredictionClassif} | \link{BenchmarkResult} | \link{ResampleResult})\cr
11 | The binary class prediction object that will be evaluated.
12 | \itemize{
13 | \item If provided a \link{PredictionClassif}.
14 | Then only one point will indicate the accuracy and fairness metrics for the current predictions.
15 | Requires also passing a \link{Task}.
16 | \item If provided a \link{ResampleResult}.
17 | Then the plot will compare the accuracy and fairness metrics for the same model,
18 | but different resampling iterations  as well as the aggregate indicated by a cross.
19 | \item If provided a \link{BenchmarkResult}.
20 | Then the plot will compare the accuracy and fairness metrics for all models and all resampling iterations.
21 | Points are colored according to the learner_id and faceted by task_id.
22 | The aggregated score is indicated by a cross.
23 | }}
24 | 
25 | \item{...}{Arguments to be passed to methods. Such as:
26 | \itemize{
27 | \item \code{fairness_measure} (\link{Measure})\cr
28 | The fairness measures that will evaluated.
29 | Default measure set to be \code{msr("fairness.fpr")}
30 | \item \code{accuracy_measure} (\link{Measure})\cr
31 | The accuracy measure that will evaluated.
32 | Default measure set to be \link[mlr3:MeasureClassif]{msr("classif.acc")}.
33 | \item \code{task} (\link{TaskClassif})\cr
34 | The data task that contains the protected column, only required when the class of object is (\link{PredictionClassif})
35 | }}
36 | }
37 | \value{
38 | A 'ggplot2' object.
39 | }
40 | \description{
41 | Provides visualization wrt. trade-offs between fairness and accuracy metrics across learners and
42 | resampling iterations.
43 | This can assist in gauging the optimal model from a set of options along with estimates of variance
44 | (through individual resampling iterations).
45 | }
46 | \section{Protected Attributes}{
47 | 
48 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
49 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
50 | This also allows specifying more than one protected attribute,
51 | in which case fairness will be considered on the level of intersecting groups defined by all columns
52 | selected as a predicted attribute.
53 | }
54 | 
55 | \examples{
56 | library("mlr3")
57 | library("mlr3learners")
58 | library("ggplot2")
59 | 
60 | # Setup the Fairness measure and tasks
61 | task = tsk("adult_train")$filter(1:500)
62 | learner = lrn("classif.ranger", predict_type = "prob")
63 | fairness_measure = msr("fairness.tpr")
64 | 
65 | # Example 1 - A single prediction
66 | learner$train(task)
67 | predictions = learner$predict(task)
68 | fairness_accuracy_tradeoff(predictions, fairness_measure, task = task)
69 | 
70 | # Example2 - A benchmark
71 | design = benchmark_grid(
72 |   tasks = task,
73 |   learners = lrns(c("classif.featureless", "classif.rpart"),
74 |     predict_type = "prob", predict_sets = c("train", "test")),
75 |   resamplings = rsmps("cv", folds = 2)
76 | )
77 | bmr = benchmark(design)
78 | fairness_accuracy_tradeoff(bmr, fairness_measure)
79 | }
80 | 


--------------------------------------------------------------------------------
/man/fairness_compare_metrics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_metrics.R
 3 | \name{compare_metrics}
 4 | \alias{compare_metrics}
 5 | \title{Compare different metrics}
 6 | \usage{
 7 | compare_metrics(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{(\link{PredictionClassif} | \link{BenchmarkResult} | \link{ResampleResult})\cr
11 | The object to create a plot for.
12 | \itemize{
13 | \item If provided a (\link{PredictionClassif}).
14 | Then the visualization will compare the fairness metrics among the binary level from protected field
15 | through bar plots.
16 | \item If provided a (\link{ResampleResult}).
17 | Then the visualization will generate the boxplots for fairness metrics, and compare them among
18 | the binary level from protected field.
19 | \item If provided a (\link{BenchmarkResult}).
20 | Then the visualization will generate the boxplots for fairness metrics, and compare them among
21 | both the binary level from protected field and the models implemented.
22 | }}
23 | 
24 | \item{...}{The arguments to be passed to methods, such as:
25 | \itemize{
26 | \item \code{fairness_measures} (list of \link{Measure})\cr
27 | The fairness measures that will evaluated on object, could be single \link{Measure} or list of \link{Measure}s.
28 | Default measure set to be \code{msr("fairness.acc")}.
29 | \item \code{task} (\link{TaskClassif})\cr
30 | The data task that contains the protected column, only required when object is (\link{PredictionClassif}).
31 | }}
32 | }
33 | \value{
34 | A 'ggplot2' object.
35 | }
36 | \description{
37 | Compare learners with respect to to one or multiple metrics.
38 | Metrics can but be but are not limited to fairness metrics.
39 | }
40 | \section{Protected Attributes}{
41 | 
42 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
43 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
44 | This also allows specifying more than one protected attribute,
45 | in which case fairness will be considered on the level of intersecting groups defined by all columns
46 | selected as a predicted attribute.
47 | }
48 | 
49 | \examples{
50 | library("mlr3")
51 | library("mlr3learners")
52 | 
53 | # Setup the Fairness Measures and tasks
54 | task = tsk("adult_train")$filter(1:500)
55 | learner = lrn("classif.ranger", predict_type = "prob")
56 | learner$train(task)
57 | predictions = learner$predict(task)
58 | design = benchmark_grid(
59 |   tasks = task,
60 |   learners = lrns(c("classif.ranger", "classif.rpart"),
61 |     predict_type = "prob", predict_sets = c("train", "test")),
62 |   resamplings = rsmps("cv", folds = 3)
63 | )
64 | 
65 | bmr = benchmark(design)
66 | fairness_measure = msr("fairness.tpr")
67 | fairness_measures = msrs(c("fairness.tpr", "fairness.fnr", "fairness.acc"))
68 | 
69 | # Predictions
70 | compare_metrics(predictions, fairness_measure, task)
71 | compare_metrics(predictions, fairness_measures, task)
72 | 
73 | # BenchmarkResult and ResamplingResult
74 | compare_metrics(bmr, fairness_measure)
75 | compare_metrics(bmr, fairness_measures)
76 | }
77 | 


--------------------------------------------------------------------------------
/man/fairness_prediction_density.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fairness_prediction_density.R
 3 | \name{fairness_prediction_density}
 4 | \alias{fairness_prediction_density}
 5 | \title{Probability Density Plot}
 6 | \usage{
 7 | fairness_prediction_density(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{(\link{PredictionClassif} | \link{ResampleResult} | \link{BenchmarkResult})\cr
11 | The binary class prediction object that will be evaluated.
12 | If \link{PredictionClassif}, a \link{Task} is required.}
13 | 
14 | \item{...}{The arguments to be passed to methods, such as:
15 | \itemize{
16 | \item \code{task} (\link{TaskClassif})\cr
17 | The data task that contains the protected column.
18 | \item \code{type} \code{\link{character}}\cr
19 | The plot type. Either \code{violin} or \code{density}.
20 | }}
21 | }
22 | \value{
23 | A 'ggplot2' object.
24 | }
25 | \description{
26 | Visualizes per-subgroup densities across learners, task and class.
27 | The plot is a combination of boxplot and violin plot.
28 | The y-axis shows the levels in protected columns. And the x-axis shows the predicted probability.
29 | The title for the plot will demonstrate which class for predicted probability.
30 | }
31 | \section{Protected Attributes}{
32 | 
33 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
34 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
35 | This also allows specifying more than one protected attribute,
36 | in which case fairness will be considered on the level of intersecting groups defined by all columns
37 | selected as a predicted attribute.
38 | }
39 | 
40 | \examples{
41 | library("mlr3")
42 | library("mlr3learners")
43 | 
44 | task = tsk("adult_train")$filter(1:500)
45 | learner = lrn("classif.rpart", predict_type = "prob", cp = 0.001)
46 | learner$train(task)
47 | 
48 | # For prediction
49 | predictions = learner$predict(task)
50 | fairness_prediction_density(predictions, task)
51 | 
52 | # For resampling
53 | rr = resample(task, learner, rsmp("cv"))
54 | fairness_prediction_density(rr)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/fairness_tensor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fairness_tensor.R
 3 | \name{fairness_tensor}
 4 | \alias{fairness_tensor}
 5 | \alias{fairness_tensor.data.table}
 6 | \alias{fairness_tensor.PredictionClassif}
 7 | \alias{fairness_tensor.ResampleResult}
 8 | \title{Compute the Fairness Tensor given a Prediction and a Task}
 9 | \usage{
10 | fairness_tensor(object, normalize = "all", ...)
11 | 
12 | \method{fairness_tensor}{data.table}(object, normalize = "all", task, ...)
13 | 
14 | \method{fairness_tensor}{PredictionClassif}(object, normalize = "all", task, ...)
15 | 
16 | \method{fairness_tensor}{ResampleResult}(object, normalize = "all", ...)
17 | }
18 | \arguments{
19 | \item{object}{(\code{\link[=data.table]{data.table()}} | \link{PredictionClassif} | \link{ResampleResult})\cr
20 | A data.table with columns \code{truth} and \code{prediction},
21 | a \link{PredictionClassif} or a \link{ResampleResult}.}
22 | 
23 | \item{normalize}{(\code{character})\cr
24 | How should the fairness tensor be normalized?
25 | "all" normalizes entries by dividing by dataset size,
26 | "group" normalizes entries by dividing by group size and
27 | "none" does not conduct any normalization at all.}
28 | 
29 | \item{...}{\code{any}\cr
30 | Currently not used.}
31 | 
32 | \item{task}{(\link{TaskClassif})\cr
33 | A \link{TaskClassif}. Needs \code{col_role} \code{"pta"} to be set.}
34 | }
35 | \value{
36 | \code{list()} of confusion matrix for every group in \code{"pta"}.
37 | }
38 | \description{
39 | A fairness tensor is a list of groupwise confusion matrices.
40 | }
41 | \section{Protected Attributes}{
42 | 
43 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
44 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
45 | This also allows specifying more than one protected attribute,
46 | in which case fairness will be considered on the level of intersecting groups defined by all columns
47 | selected as a predicted attribute.
48 | }
49 | 
50 | \examples{
51 | library("mlr3")
52 | task = tsk("compas")
53 | prediction = lrn("classif.rpart")$train(task)$predict(task)
54 | fairness_tensor(prediction, task = task)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/figures/scale_mlr3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/man/figures/scale_mlr3.png


--------------------------------------------------------------------------------
/man/figures/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/man/figures/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/man/groupdiff_tau.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/measure_operations.R
 3 | \name{groupdiff_tau}
 4 | \alias{groupdiff_tau}
 5 | \alias{groupdiff_absdiff}
 6 | \alias{groupdiff_diff}
 7 | \title{Groupwise Operations}
 8 | \usage{
 9 | groupdiff_tau(x)
10 | 
11 | groupdiff_absdiff(x)
12 | 
13 | groupdiff_diff(x)
14 | }
15 | \arguments{
16 | \item{x}{(\code{numeric()})\cr
17 | Measured performance in group 1, 2, ...}
18 | }
19 | \value{
20 | A single \code{numeric}.
21 | }
22 | \description{
23 | \code{groupdiff_tau()} computes \eqn{min(x/y, y/x)}, i.e. the smallest symmetric ratio between \eqn{x} and eqn{y}
24 | that is smaller than 1. If \eqn{x} is a vector, the symmetric ratio between all
25 | elements in \eqn{x} is computed.
26 | 
27 | \code{groupdiff_absdiff()} computes \eqn{max(abs(x-y, y-x))}, i.e. the smallest absolute difference
28 | between \eqn{x} and \eqn{y}.
29 | If \eqn{x} is a vector, the symmetric absolute difference between all elements in \eqn{x} is computed.
30 | }
31 | \section{Protected Attributes}{
32 | 
33 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
34 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
35 | This also allows specifying more than one protected attribute,
36 | in which case fairness will be considered on the level of intersecting groups defined by all columns
37 | selected as a predicted attribute.
38 | }
39 | 
40 | \examples{
41 | groupdiff_tau(1:3)
42 | groupdiff_diff(1:3)
43 | groupdiff_absdiff(1:3)
44 | }
45 | 


--------------------------------------------------------------------------------
/man/groupwise_metrics.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeasureSubgroup.R
 3 | \name{groupwise_metrics}
 4 | \alias{groupwise_metrics}
 5 | \title{Evaluate a metric on each protected subgroup in a task.}
 6 | \usage{
 7 | groupwise_metrics(base_measure, task, intersect = TRUE)
 8 | }
 9 | \arguments{
10 | \item{base_measure}{(\code{Measure()})\cr
11 | The base metric evaluated within each subgroup.}
12 | 
13 | \item{task}{\code{\link{Task}} \cr
14 | \code{\link[mlr3:Task]{mlr3::Task()}} to instantiate measures for.}
15 | 
16 | \item{intersect}{\code{\link{logical}} \cr
17 | Should multiple pta groups be intersected? Defaults to \code{TRUE}.
18 | Only relevant if more than one \code{pta} columns are provided.}
19 | }
20 | \value{
21 | \code{list} \cr
22 | List of \link[mlr3:Measure]{mlr3::Measure}s.
23 | }
24 | \description{
25 | Instantiates one new measure per protected attribute group in a task.
26 | Each metric is then evaluated only on predictions made for the given specific subgroup.
27 | }
28 | \examples{
29 |   library("mlr3")
30 |   t = tsk("compas")
31 |   l = lrn("classif.rpart")
32 |   m = groupwise_metrics(msr("classif.acc"), t)
33 |   l$train(t)$predict(t)$score(m, t)
34 | }
35 | \seealso{
36 | \link{MeasureSubgroup}
37 | }
38 | 


--------------------------------------------------------------------------------
/man/mlr3fairness-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zzz.R
 3 | \docType{package}
 4 | \name{mlr3fairness-package}
 5 | \alias{mlr3fairness}
 6 | \alias{mlr3fairness-package}
 7 | \title{mlr3fairness: Fairness Auditing and Debiasing for 'mlr3'}
 8 | \description{
 9 | Integrates fairness auditing and bias mitigation methods for the 'mlr3' ecosystem. This includes fairness metrics, reporting tools, visualizations and bias mitigation techniques such as "Reweighing" described in 'Kamiran, Calders' (2012) \doi{10.1007/s10115-011-0463-8} and "Equalized Odds" described in 'Hardt et al.' (2016) \url{https://papers.nips.cc/paper/2016/file/9d2682367c3935defcb1f9e247a97c0d-Paper.pdf}. Integration with 'mlr3' allows for auditing of ML models as well as convenient joint tuning of machine learning algorithms and debiasing methods.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://mlr3fairness.mlr-org.com}
15 |   \item \url{https://github.com/mlr-org/mlr3fairness}
16 |   \item Report bugs at \url{https://github.com/mlr-org/mlr3fairness/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Florian Pfisterer \email{pfistererf@googlemail.com} (\href{https://orcid.org/0000-0001-8867-762X}{ORCID})
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Wei Siyi \email{weisiyi2@gmail.com}
26 |   \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID})
27 | }
28 | 
29 | }
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/man/mlr_learners_fairness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mlr_learners_fairness.R
 3 | \docType{data}
 4 | \name{mlr_learners_fairness}
 5 | \alias{mlr_learners_fairness}
 6 | \title{Fair Learners in mlr3}
 7 | \format{
 8 | An object of class \code{data.table} (inherits from \code{data.frame}) with 5 rows and 3 columns.
 9 | }
10 | \usage{
11 | mlr_learners_fairness
12 | }
13 | \value{
14 | A data.table containing an overview of available fair learners.
15 | }
16 | \description{
17 | Fair Learners in mlr3
18 | }
19 | \section{Predefined measures}{
20 | 
21 | \CRANpkg{mlr3fairness} comes with a set of predefined fairn learners listed below:
22 | 
23 | \tabular{lll}{
24 | \strong{key} \tab \strong{package} \tab \strong{reference} \cr
25 | regr.fairfrrm     \tab fairml \tab Scutari et al., 2021 \cr
26 | classif.fairfgrrm \tab fairml \tab Scutari et al., 2021 \cr
27 | regr.fairzlm      \tab fairml \tab Zafar et al., 2019   \cr
28 | classif.fairzlrm  \tab fairml \tab Zafar et al., 2019   \cr
29 | regr.fairnclm     \tab fairml \tab Komiyama et al., 2018
30 | }
31 | }
32 | 
33 | \section{Protected Attributes}{
34 | 
35 | The protected attribute is specified as a \code{col_role} in the corresponding \code{\link[=Task]{Task()}}:\cr
36 | \verb{<Task>$col_roles$pta = "name_of_attribute"} \cr
37 | This also allows specifying more than one protected attribute,
38 | in which case fairness will be considered on the level of intersecting groups defined by all columns
39 | selected as a predicted attribute.
40 | }
41 | 
42 | \examples{
43 | library("mlr3")
44 | # Available learners:
45 | mlr_learners_fairness
46 | }
47 | \keyword{datasets}
48 | 


--------------------------------------------------------------------------------
/man/mlr_measures_fairness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mlr_measures_fairness.R
 3 | \docType{data}
 4 | \name{mlr_measures_fairness}
 5 | \alias{mlr_measures_fairness}
 6 | \title{Fairness Measures in mlr3}
 7 | \format{
 8 | An object of class \code{data.table} (inherits from \code{data.frame}) with 18 rows and 2 columns.
 9 | }
10 | \usage{
11 | mlr_measures_fairness
12 | }
13 | \value{
14 | A data.table containing an overview of available fairness metrics.
15 | }
16 | \description{
17 | Fairness Measures in mlr3
18 | }
19 | \section{Predefined measures}{
20 | 
21 | \CRANpkg{mlr3fairness} comes with a set of predefined fairness measures as listed below.
22 | For full flexibility, \link{MeasureFairness} can be used to construct classical
23 | group fairness measures based on a difference between a performance metrics across groups
24 | by combining a performance measure with an operation for measuring differences.
25 | Furthermore \link{MeasureSubgroup} can be used to measure performance in a given subgroup, or alternatively
26 | groupwise_metrics(measure, task) to instantiate a measure for each subgroup in a \link{Task}.
27 | 
28 | \tabular{ll}{
29 | \strong{key} \tab \strong{description} \cr
30 | fairness.acc         \tab Absolute differences in accuracy across groups                                                           \cr
31 | fairness.mse         \tab Absolute differences in mean squared error across groups                                                 \cr
32 | fairness.fnr         \tab Absolute differences in false negative rates across groups                                               \cr
33 | fairness.fpr         \tab Absolute differences in false positive rates across groups                                               \cr
34 | fairness.tnr         \tab Absolute differences in true negative rates across groups                                                \cr
35 | fairness.tpr         \tab Absolute differences in true positive rates across groups                                                \cr
36 | fairness.npv         \tab Absolute differences in negative predictive values across groups                                         \cr
37 | fairness.ppv         \tab Absolute differences in positive predictive values across groups                                         \cr
38 | fairness.fomr        \tab Absolute differences in false omission rates across groups                                               \cr
39 | fairness.fp          \tab Absolute differences in false positives across groups                                                    \cr
40 | fairness.tp          \tab Absolute differences in true positives across groups                                                     \cr
41 | fairness.tn          \tab Absolute differences in true negatives across groups                                                     \cr
42 | fairness.fn          \tab Absolute differences in false negatives across groups                                                    \cr
43 | fairness.cv          \tab Difference in positive class prediction, also known as Calders-Wevers gap or demographic parity          \cr
44 | fairness.eod         \tab Equalized Odds: Mean of absolute differences between true positive and false positive rates across groups\cr
45 | fairness.pp          \tab Predictive Parity: Mean of absolute differences between ppv and npv across groups                        \cr
46 | fairness.acc_eod=.05 \tab Accuracy under equalized odds < 0.05 constraint                                                          \cr
47 | fairness.acc_ppv=.05 \tab Accuracy under ppv difference < 0.05 constraint
48 | }
49 | }
50 | 
51 | \examples{
52 | library("mlr3")
53 | # Predefined measures:
54 | mlr_measures_fairness$key
55 | }
56 | \keyword{datasets}
57 | 


--------------------------------------------------------------------------------
/man/mlr_measures_positive_probability.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeasurePositiveProbability.R
 3 | \name{mlr_measures_positive_probability}
 4 | \alias{mlr_measures_positive_probability}
 5 | \alias{MeasurePositiveProbability}
 6 | \title{Positive Probability Measure}
 7 | \description{
 8 | Return the probabiliy of a positive prediction, often known as 'Calders-Wevers' gap.
 9 | This is defined as count of positive predictions divided by the number of observations.
10 | }
11 | \examples{
12 | library("mlr3")
13 | # Create Positive Probability Measure
14 | t = tsk("adult_train")
15 | learner = lrn("classif.rpart", cp = .01)
16 | learner$train(t)
17 | measure = msr("classif.pp")
18 | predictions = learner$predict(t)
19 | predictions$score(measure, task = t)
20 | }
21 | \section{Super class}{
22 | \code{\link[mlr3:Measure]{mlr3::Measure}} -> \code{MeasurePositiveProbability}
23 | }
24 | \section{Methods}{
25 | \subsection{Public methods}{
26 | \itemize{
27 | \item \href{#method-MeasurePositiveProbability-new}{\code{MeasurePositiveProbability$new()}}
28 | \item \href{#method-MeasurePositiveProbability-clone}{\code{MeasurePositiveProbability$clone()}}
29 | }
30 | }
31 | \if{html}{\out{
32 | <details open><summary>Inherited methods</summary>
33 | <ul>
34 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="aggregate"><a href='../../mlr3/html/Measure.html#method-Measure-aggregate'><code>mlr3::Measure$aggregate()</code></a></span></li>
35 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="format"><a href='../../mlr3/html/Measure.html#method-Measure-format'><code>mlr3::Measure$format()</code></a></span></li>
36 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="help"><a href='../../mlr3/html/Measure.html#method-Measure-help'><code>mlr3::Measure$help()</code></a></span></li>
37 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="print"><a href='../../mlr3/html/Measure.html#method-Measure-print'><code>mlr3::Measure$print()</code></a></span></li>
38 | <li><span class="pkg-link" data-pkg="mlr3" data-topic="Measure" data-id="score"><a href='../../mlr3/html/Measure.html#method-Measure-score'><code>mlr3::Measure$score()</code></a></span></li>
39 | </ul>
40 | </details>
41 | }}
42 | \if{html}{\out{<hr>}}
43 | \if{html}{\out{<a id="method-MeasurePositiveProbability-new"></a>}}
44 | \if{latex}{\out{\hypertarget{method-MeasurePositiveProbability-new}{}}}
45 | \subsection{Method \code{new()}}{
46 | Initialize a Measure Positive Probability Object
47 | \subsection{Usage}{
48 | \if{html}{\out{<div class="r">}}\preformatted{MeasurePositiveProbability$new()}\if{html}{\out{</div>}}
49 | }
50 | 
51 | }
52 | \if{html}{\out{<hr>}}
53 | \if{html}{\out{<a id="method-MeasurePositiveProbability-clone"></a>}}
54 | \if{latex}{\out{\hypertarget{method-MeasurePositiveProbability-clone}{}}}
55 | \subsection{Method \code{clone()}}{
56 | The objects of this class are cloneable with this method.
57 | \subsection{Usage}{
58 | \if{html}{\out{<div class="r">}}\preformatted{MeasurePositiveProbability$clone(deep = FALSE)}\if{html}{\out{</div>}}
59 | }
60 | 
61 | \subsection{Arguments}{
62 | \if{html}{\out{<div class="arguments">}}
63 | \describe{
64 | \item{\code{deep}}{Whether to make a deep clone.}
65 | }
66 | \if{html}{\out{</div>}}
67 | }
68 | }
69 | }
70 | 


--------------------------------------------------------------------------------
/man/report_datasheet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reports.R
 3 | \name{report_datasheet}
 4 | \alias{report_datasheet}
 5 | \title{Create a Datasheet for Documenting a Dataset}
 6 | \usage{
 7 | report_datasheet(filename = "datasheet.Rmd", edit = FALSE, build = FALSE)
 8 | }
 9 | \arguments{
10 | \item{filename}{(\code{character(1)})\cr
11 | File path or name for new file that should be created.}
12 | 
13 | \item{edit}{(\code{logical(1)})\cr
14 | \code{TRUE} to edit the template immediately.}
15 | 
16 | \item{build}{(\code{logical(1)})\cr
17 | Should the report be built after creation? Initialized to \code{FALSE}.}
18 | }
19 | \value{
20 | Invisibly returns the path to the newly created file(s).
21 | }
22 | \description{
23 | Creates a new \CRANpkg{rmarkdown} template with a skeleton questionnaire for dataset documentation.
24 | Uses the awesome markdown template created by Chris Garbin
25 | \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
26 | }
27 | \examples{
28 |   report_file = tempfile()
29 |   report_datasheet(report_file)
30 | }
31 | \references{
32 | Gebru, Timnit, Morgenstern, Jamie, Vecchione, Briana, Vaughan, Wortman J, Wallach, Hanna, III D, Hal, Crawford, Kate (2018).
33 | \dQuote{Datasheets for datasets.}
34 | \emph{arXiv preprint arXiv:1803.09010}.
35 | }
36 | \seealso{
37 | Other fairness_reports: 
38 | \code{\link{report_fairness}()},
39 | \code{\link{report_modelcard}()}
40 | }
41 | \concept{fairness_reports}
42 | 


--------------------------------------------------------------------------------
/man/report_fairness.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reports.R
 3 | \name{report_fairness}
 4 | \alias{report_fairness}
 5 | \title{Create a Fairness Report}
 6 | \usage{
 7 | report_fairness(
 8 |   filename = "fairness_report.Rmd",
 9 |   objects,
10 |   edit = FALSE,
11 |   check_objects = FALSE,
12 |   build = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{filename}{(\code{character(1)})\cr
17 | File path or name for new file that should be created.}
18 | 
19 | \item{objects}{(\code{list()})\cr
20 | A named list of objects required for the fairness report.
21 | Objects are saved as \verb{<name>.rds} in the new folder created for the report.
22 | \itemize{
23 | \item \code{task} :: The \code{\link{Task}} a report should be created for.
24 | \item \code{resample_result} ::  A \link[mlr3:ResampleResult]{mlr3::ResampleResult} result to be analyzed.
25 | \item \code{...} :: any other objects passed on for the report.
26 | }}
27 | 
28 | \item{edit}{(\code{logical(1)})\cr
29 | \code{TRUE} to edit the template immediately.}
30 | 
31 | \item{check_objects}{(\code{logical(1)})\cr
32 | Should items in \code{objects} be checked? If \code{FALSE}, no checks on \code{object} are performed.}
33 | 
34 | \item{build}{(\code{logical(1)})\cr
35 | Should the report be built after creation? Initialized to \code{FALSE}.}
36 | }
37 | \value{
38 | Invisibly returns the path to the newly created file(s).
39 | }
40 | \description{
41 | Creates a new \CRANpkg{rmarkdown} template with a skeleton of  reported metrics and visualizations.
42 | Uses the awesome markdown template created by Chris Garbin
43 | \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
44 | }
45 | \examples{
46 |   library("mlr3")
47 |   report_file = tempfile()
48 |   task = tsk("compas")
49 |   learner = lrn("classif.rpart", predict_type = "prob")
50 |   rr = resample(task, learner, rsmp("cv", folds = 3L))
51 |   report_fairness(report_file, list(task = task, resample_result = rr))
52 | }
53 | \seealso{
54 | Other fairness_reports: 
55 | \code{\link{report_datasheet}()},
56 | \code{\link{report_modelcard}()}
57 | }
58 | \concept{fairness_reports}
59 | 


--------------------------------------------------------------------------------
/man/report_modelcard.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reports.R
 3 | \name{report_modelcard}
 4 | \alias{report_modelcard}
 5 | \title{Create a Modelcard}
 6 | \usage{
 7 | report_modelcard(filename = "modelcard.Rmd", edit = FALSE, build = FALSE)
 8 | }
 9 | \arguments{
10 | \item{filename}{(\code{character(1)})\cr
11 | File path or name for new file that should be created.}
12 | 
13 | \item{edit}{(\code{logical(1)})\cr
14 | \code{TRUE} to edit the template immediately.}
15 | 
16 | \item{build}{(\code{logical(1)})\cr
17 | Should the report be built after creation? Initialized to \code{FALSE}.}
18 | }
19 | \value{
20 | Invisibly returns the path to the newly created file(s).
21 | }
22 | \description{
23 | Creates a new \CRANpkg{rmarkdown} template with a skeleton questionnaire for a model card.
24 | Uses the awesome markdown template created by Chris Garbin
25 | \href{https://github.com/fau-masters-collected-works-cgarbin/model-card-template}{from Github}.
26 | }
27 | \examples{
28 |   report_file = tempfile()
29 |   report_modelcard(report_file)
30 | }
31 | \references{
32 | Mitchell, Margaret, Wu, Simone, Zaldivar, Andrew, Barnes, Parker, Vasserman, Lucy, Hutchinson, Ben, Spitzer, Elena, Raji, Deborah I, Gebru, Timnit (2019).
33 | \dQuote{Model cards for model reporting.}
34 | In \emph{Proceedings of the conference on fairness, accountability, and transparency}, 220--229.
35 | }
36 | \seealso{
37 | Other fairness_reports: 
38 | \code{\link{report_datasheet}()},
39 | \code{\link{report_fairness}()}
40 | }
41 | \concept{fairness_reports}
42 | 


--------------------------------------------------------------------------------
/man/task_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helpers.R
 3 | \name{task_summary}
 4 | \alias{task_summary}
 5 | \title{Task summary for fairness report}
 6 | \usage{
 7 | task_summary(task)
 8 | }
 9 | \arguments{
10 | \item{task}{\link{Task}}
11 | }
12 | \value{
13 | \code{data.frame} containing the reported information
14 | }
15 | \description{
16 | Create the general task documentation in a dataframe for fairness report.
17 | The information includes
18 | \itemize{
19 | \item Audit Date
20 | \item Task Name
21 | \item Number of observations
22 | \item Number of features
23 | \item Target Name
24 | \item Feature Names
25 | \item The Protected Attribute
26 | }
27 | }
28 | \examples{
29 | library("mlr3")
30 | task_summary(tsk("adult_train"))
31 | }
32 | 


--------------------------------------------------------------------------------
/mlr3fairness.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageCheckArgs: --as-cran
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/paper/.gitignore:
--------------------------------------------------------------------------------
 1 | *.tex
 2 | *.log
 3 | *.R
 4 | *.pdf
 5 | mlr3fairness_rj_files/
 6 | 
 7 | *.aux
 8 | *.blg
 9 | *.brf
10 | *.fls
11 | *.fdb_latexmk
12 | *.out
13 | 
14 | mlr3fairness_files
15 | *.bbl
16 | *.html


--------------------------------------------------------------------------------
/paper/RJwrapper.bbl:
--------------------------------------------------------------------------------
 1 | \begin{thebibliography}{6}
 2 | \providecommand{\natexlab}[1]{#1}
 3 | \providecommand{\url}[1]{\texttt{#1}}
 4 | \expandafter\ifx\csname urlstyle\endcsname\relax
 5 |   \providecommand{\doi}[1]{doi: #1}\else
 6 |   \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
 7 | 
 8 | \bibitem[Berk et~al.(2018)Berk, Heidari, Jabbari, Kearns, and Roth]{richardcompas}
 9 | R.~Berk, H.~Heidari, S.~Jabbari, M.~Kearns, and A.~Roth.
10 | \newblock {Fairness in Criminal Justice Risk Assessments: The State of the Art}.
11 | \newblock \emph{Sociological Methods \& Research}, Aug. 2018.
12 | \newblock \doi{10.1177/0049124118782533}.
13 | 
14 | \bibitem[Buolamwini and Gebru(2018)]{gendershades}
15 | J.~Buolamwini and T.~Gebru.
16 | \newblock {Gender shades: Intersectional accuracy disparities in commercial gender classification}.
17 | \newblock In \emph{Proceedings of the Conference on Fairness, Accountability, and Transparency}, pages 77--91. PMLR, 2018.
18 | 
19 | \bibitem[Calders and Verwer(2010)]{Calders2010}
20 | T.~Calders and S.~Verwer.
21 | \newblock {Three naive Bayes approaches for discrimination-free classification}.
22 | \newblock \emph{Data Mining and Knowledge Discovery}, 21\penalty0 (2):\penalty0 277--292, 2010.
23 | \newblock \doi{10.1007/s10618-010-0190-x}.
24 | 
25 | \bibitem[Chouldechova(2017)]{chouldechova2017fair}
26 | A.~Chouldechova.
27 | \newblock Fair prediction with disparate impact: A study of bias in recidivism prediction instruments.
28 | \newblock \emph{Big Data}, 5\penalty0 (2):\penalty0 153--163, June 2017.
29 | \newblock \doi{10.1089/big.2016.0047}.
30 | 
31 | \bibitem[Hardt et~al.(2016)Hardt, Price, and Srebro]{hardt2016equality}
32 | M.~Hardt, E.~Price, and N.~Srebro.
33 | \newblock Equality of opportunity in supervised learning.
34 | \newblock \emph{Advances in Neural Information Processing Systems}, 29:\penalty0 3315--3323, 2016.
35 | 
36 | \bibitem[Perrone et~al.(2021)Perrone, Donini, Zafar, Schmucker, Kenthapadi, and Archambeau]{perrone2021fair}
37 | V.~Perrone, M.~Donini, M.~B. Zafar, R.~Schmucker, K.~Kenthapadi, and C.~Archambeau.
38 | \newblock {Fair Bayesian Optimization}.
39 | \newblock In \emph{Proceedings of the 2021 AAAI/ACM Conference on AI, Ethics, and Society}, pages 854--863, 2021.
40 | 
41 | \end{thebibliography}
42 | 


--------------------------------------------------------------------------------
/paper/Rlogo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/paper/Rlogo.pdf


--------------------------------------------------------------------------------
/paper/presentations/dagstat_2022.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: Fairness Audits and Bias Mitigation with mlr3fairness
  3 | author:
  4 |   - name: Florian Pfisterer
  5 |     affil: 1
  6 |     main: true
  7 |   - name: Siyi Wei
  8 |   - name: Sebastian Vollmer
  9 |   - affil: 3
 10 |   - name: Michel Lang
 11 |     affil: 2
 12 |   - name: Bernd Bischl
 13 |     affil: 1
 14 | affiliation:
 15 |   - num: 1
 16 |     address: LMU Munich
 17 |   - num: 2
 18 |     address: TU Dortmund
 19 |   - num: 3
 20 |     address: DFKI Kaiserslautern
 21 | primary_colour:	"#0b4545"
 22 | secondary_colour:	"#008080"
 23 | accent_colour: "#cc0000"
 24 | body_textsize: "50px"
 25 | author_textsize: "55px"
 26 | authorextra_textsize: "45px"
 27 | affiliation_textsize: "45px"
 28 | affiliation_textcol: '#606060'
 29 | main_width: 0.8
 30 | main_findings:
 31 |   - "Fairness Audits and Bias Mitigation with **mlr3fairness**"
 32 |   - '![](images/center.svg){.main_pic}'
 33 | logoleft_name: '![](images/qr.png){.main-img-left}'
 34 | logoright_name: '![](https://github.com/mlr-org/mlr3fairness/raw/main/man/figures/scale_mlr3.png){.main-img-right}'
 35 | output: 
 36 |   posterdown::posterdown_betterland:
 37 |     css: "style.css"
 38 |     self_contained: false
 39 |     pandoc_args: --mathjax
 40 |     number_sections: false
 41 | bibliography: references.bib
 42 | ---
 43 | 
 44 | ```{r setup, include=FALSE}
 45 | knitr::opts_chunk$set(echo = FALSE)
 46 | library("mlr3fairness")
 47 | ```
 48 | <link rel="stylesheet" href="style.css" type="text/css" />
 49 | 
 50 | # TL;DR
 51 | 
 52 | - If we make decisions based on models, they should not discriminate against sub-populations 
 53 | - Broadly, a model is considered fair if it treats people equally independent of the group they belong to.
 54 | - We present software that helps users detect and mitigate un-fairness in models:
 55 |   - Fairness Metrics allow for detecting biases
 56 |   - Bias mitigation techniques can help to make models fairer
 57 |   - Data and Model reporting can inform developers and users!
 58 | 
 59 | # What is algorithmic fairness?
 60 | 
 61 | Algorithmic fairness studies potentially negative effects of decisions derived from statistical / machine learning models.
 62 | Biases in models can occur due to many reasons for example biases in the data, miss-specification of the model.
 63 | In practice, biases are often measured based on differences in predictions between two groups.
 64 | 
 65 | # Bias Audits
 66 | 
 67 | Bias audits apply a **Measure** to score predictions.
 68 | 
 69 | Our software currently contains `r sum(grepl("fair", msr()$keys()))` different fairnes metrics..
 70 | We can construct a measure using the `msr()` shorthand, here the `"fairness.fpr"`, measuring differences in FPR.
 71 | 
 72 | ```{r, eval = FALSE, echo = TRUE}
 73 | m = msr("fairness.tpr")
 74 | prediction$score(m)
 75 | ```
 76 | 
 77 | # Bias Mitigation
 78 | 
 79 | Combining **bias mitigation** techniques with learning algorithm can help 
 80 | creating fair(er) learners!
 81 | 
 82 | 
 83 | ```{r, eval = FALSE, echo = TRUE}
 84 | # Reweigh data before training a learner
 85 | lrn = po("reweighing_wts") %>>% 
 86 |   po("learner", lrn("classif.glmnet"))
 87 | ```
 88 | 
 89 | # Integration with mlr3 
 90 | 
 91 | Integration with **mlr3** (@mlr3) allows for:
 92 | - Bias audits for any **mlr3** learner
 93 | - Model debiasing as part of a **mlr3 pipeline**
 94 | - Joint tuning of debiasing and ML model!
 95 | 
 96 | # Reporting
 97 | 
 98 | Unfairness can not always be detected if it is already in the data we use to build models.
 99 | Better documentation of data and models can help make users aware of potential problems
100 | and are therefore an integral part of developing fair models.
101 | 
102 | 
103 | | Report             |  Description             |
104 | |--------------------|--------------------------|
105 | | `report_modelcard()` | Modelcard for ML models <br> (@modelcards)         |
106 | | `report_datasheet()` | Datasheet for data sets <br> (@datasheets)         |
107 | | `report_fairness()`  | Fairness Report                                    |
108 | 
109 | 
110 | # Contribute!
111 | 
112 | We are looking for contributors to further improve `mlr3fairness`.
113 | We have several additions in mind, but are also open to input from the outside.
114 | Get in touch via GitHub issues or email!
115 | 
116 | # References
117 | 


--------------------------------------------------------------------------------
/paper/presentations/images/center.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "&nbsp;"
 3 | output: pdf_document
 4 | geometry: margin=1cm
 5 | ---
 6 | 
 7 | ```{r, echo = FALSE}
 8 | library(ggplot2)
 9 | library("lgr")
10 | library("mlr3learners")
11 | 
12 | ```
13 | 
14 | ```{r, eval = FALSE}
15 | library(mlr3fairness)
16 | 
17 | # The dataset: 
18 | task = TaskClassif$new("adult", adult_train, target = "target")
19 | # Set "sex" as the protected attribute
20 | task$col_roles$pta = "sex" 
21 | 
22 | # Initialize model(s)
23 | lrnr = lrns(c("classif.rpart", "classif.ranger"), predict_type = "prob")
24 | 
25 | # Perform train-test split and training
26 | bmr = benchmark(benchmark_grid(task, lrnr, rsmp("cv", folds = 3L)))
27 | 
28 | # Construct fairness metrics
29 | ms = msr("fairness.fpr")
30 | 
31 | #Visualize the predicted probability score based on protected attribute.
32 | fairness_accuracy_tradeoff(bmr, ms)
33 | ```
34 | 
35 | 
36 | ```{r, eval = TRUE, results="hide", output = 'hide', message = FALSE, echo = FALSE, fig.align="center"}
37 | library(mlr3fairness)
38 | lgr$set_threshold("error")
39 | 
40 | # The dataset: 
41 | task = tsk("adult_train")
42 | # Set "sex" as the protected attribute
43 | task$col_roles$pta = "sex" 
44 | 
45 | # Initialize model(s)
46 | lrnr = lrns(c("classif.rpart", "classif.ranger"), predict_type = "prob")
47 | 
48 | # Perform train-test split and training
49 | bmr = benchmark(benchmark_grid(task, lrnr, rsmp("cv", folds = 3L)))
50 | 
51 | # Construct fairness metrics
52 | ms = msr("fairness.fpr")
53 | 
54 | #Visualize the predicted probability score based on protected attribute.
55 | ```
56 | 
57 | 
58 | ```{r, eval = TRUE, results="hide", echo = FALSE, fig.align="center"}
59 | fairness_accuracy_tradeoff(bmr, ms) + theme_minimal() + ggtitle("") + 
60 |   theme(
61 |     panel.background = element_rect(fill = "white", colour = "white", linetype = "solid"),
62 |     legend.background = element_rect(colour = "white", fill = "white"), 
63 |     plot.background = element_rect(fill = "white", colour = "white", linetype = "solid")
64 |   )
65 | ```


--------------------------------------------------------------------------------
/paper/presentations/images/center.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/paper/presentations/images/center.png


--------------------------------------------------------------------------------
/paper/presentations/images/qr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/paper/presentations/images/qr.png


--------------------------------------------------------------------------------
/paper/presentations/references.bib:
--------------------------------------------------------------------------------
 1 | @inproceedings{modelcards,
 2 | author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
 3 | title = {Model Cards for Model Reporting},
 4 | year = {2019},
 5 | isbn = {9781450361255},
 6 | publisher = {Association for Computing Machinery},
 7 | address = {New York, NY, USA},
 8 | url = {https://doi.org/10.1145/3287560.3287596},
 9 | doi = {10.1145/3287560.3287596},
10 | booktitle = {Proceedings of the Conference on Fairness, Accountability, and Transparency},
11 | pages = {220–229},
12 | numpages = {10},
13 | keywords = {ethical considerations, fairness evaluation, ML model evaluation, disaggregated evaluation, documentation, model cards, datasheets},
14 | location = {Atlanta, GA, USA},
15 | series = {FAT* '19}
16 | }
17 | 
18 | @article{datasheets,
19 |   title={Datasheets for datasets},
20 |   author={Gebru, Timnit and Morgenstern, Jamie and Vecchione, Briana and Vaughan, Jennifer Wortman and Wallach, Hanna and Daum{\'e} III, Hal and Crawford, Kate},
21 |   journal={arXiv preprint arXiv:1803.09010},
22 |   year={2018}
23 | }
24 | 
25 |   @Article{mlr3,
26 |     title = {{mlr3}: A modern object-oriented machine learning framework in {R}},
27 |     author = {Michel Lang and Martin Binder and Jakob Richter and Patrick Schratz and Florian Pfisterer and Stefan Coors and Quay Au and Giuseppe Casalicchio and Lars Kotthoff and Bernd Bischl},
28 |     journal = {Journal of Open Source Software},
29 |     year = {2019},
30 |     month = {dec},
31 |     doi = {10.21105/joss.01903},
32 |     url = {https://joss.theoj.org/papers/10.21105/joss.01903},
33 |   }
34 | 


--------------------------------------------------------------------------------
/paper/presentations/style.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |   overflow-x: hidden;
 3 | }
 4 | .level1 h1 {
 5 |   text-align: center;
 6 |   color: #000000;
 7 |   font-size: 65pt;
 8 |   border: 2mm solid #040808;
 9 |   background-color: #fdffff;
10 |   border-radius: 2mm 0mm;
11 |   margin-top: 0mm;
12 |   margin-bottom: 0mm;
13 |   font-weight: normal;
14 | }
15 | .level1 h2 {
16 |   color: #000000;
17 |   font-size: 40pt;
18 |   padding-left: 4mm;
19 |   font-weight: normal;
20 | }
21 | 
22 | img.main-img-left {
23 |   width: 18%;
24 |   left: 0.5in;
25 |   bottom: 0.2in;
26 |   position: absolute;
27 | }
28 | 
29 | img.main-img-right {
30 |   width: 25%;
31 |   right: 0.5in;
32 |   bottom: 0.4in;
33 |   position: absolute;
34 | }
35 | 
36 | img.main_pic {
37 |   width: 63%;
38 |   display: block;
39 |   margin-top: 0%;
40 |   margin-left: auto;
41 |   margin-right: auto;
42 | }
43 | 
44 | .main p {
45 |   padding-top: 10%;
46 | }
47 | 
48 | div.sourceCode  {
49 |   margin: 0.3em;
50 | }
51 | 
52 | pre.sourceCode.r  {
53 |   margin-top: 0.2em;
54 |   margin-bottom: 0.2em
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://mlr3fairness.mlr-org.com
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   package: mlr3pkgdowntemplate
 6 | 
 7 | development:
 8 |   mode: auto
 9 |   version_label: default
10 |   version_tooltip: "Version"
11 | 
12 | toc:
13 |   depth: 3
14 | 
15 | navbar:
16 |   structure:
17 |     left:  [reference, articles, news, book]
18 |     right: [search, github, mattermost, stackoverflow, rss]
19 |   components:
20 |     home: ~
21 |     reference:
22 |       icon: fa fa-file-alt
23 |       text: Reference
24 |       href: reference/index.html
25 |     mattermost:
26 |       icon: fa fa-comments
27 |       href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/
28 |     book:
29 |       text: mlr3book
30 |       icon: fa fa-link
31 |       href: https://mlr3book.mlr-org.com
32 |     stackoverflow:
33 |       icon: fab fa-stack-overflow
34 |       href: https://stackoverflow.com/questions/tagged/mlr3
35 |     rss:
36 |       icon: fa-rss
37 |       href: https://mlr-org.com/
38 | 
39 | reference:
40 |   - title: Measuring Bias
41 |     desc: >
42 |       Performance measures for fairness, based on [Measure].
43 |     contents:
44 |       - mlr_measures_fairness
45 |       - MeasureFairness
46 |       - MeasureFairnessComposite
47 |       - MeasureFairnessConstraint
48 |       - MeasureSubgroup
49 |       - contains("groupdiff")
50 |       - groupwise_metrics
51 |       - compute_metrics
52 |       - mlr_measures_positive_probability
53 |   - title: Detecting Bias
54 |     contents:
55 |       - fairness_tensor
56 |       - fairness_accuracy_tradeoff
57 |       - fairness_prediction_density
58 |       - compare_metrics
59 |   - title: Correcting Bias
60 |     contents:
61 |       - mlr_pipeops_equalized_odds
62 |       - mlr_pipeops_reweighing
63 |       - mlr_pipeops_explicit_pta
64 |   - title: Fair Learners
65 |     contents:
66 |       - mlr_learners_fairness
67 |       - contains("learner")
68 |   - title: Reports
69 |     contents:
70 |       - report_datasheet
71 |       - report_modelcard
72 |       - report_fairness
73 |   - title: Integrated data & tasks
74 |     contents:
75 |       - adult
76 |       - compas
77 |       - task_summary
78 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlr-org/mlr3fairness/31c5e657756f6696464ec02896092f1544acaa09/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | if (requireNamespace("testthat", quietly = TRUE)) {
2 |   library("testthat")
3 |   library("checkmate")
4 |   library("mlr3")
5 |   library("mlr3pipelines")
6 |   library("mlr3fairness")
7 |   test_check("mlr3fairness")
8 | }
9 | 


--------------------------------------------------------------------------------
/tests/testthat/helper_data.R:
--------------------------------------------------------------------------------
  1 | library(mlr3learners)
  2 | 
  3 | test_task_small = function(need_pta = TRUE) {
  4 |   example_data = data.frame(
  5 |     value = as.factor(c(1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1)),
  6 |     variable = c(3, 1, 4, 8, 5, 41, 22, 3, 4, 29, 2, 13, 4, 26, 2, 34),
  7 |     pta = as.factor(c(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2)))
  8 |   b = as_data_backend(example_data)
  9 |   task = mlr3::TaskClassif$new("example", b, target = "value")
 10 |   if (need_pta) task$col_roles$pta = "pta"
 11 |   return(task)
 12 | }
 13 | 
 14 | pred_small = function() {
 15 |   PredictionClassif$new(
 16 |     row_ids = c(1:16),
 17 |     truth = as.factor(c(1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1)),
 18 |     response = as.factor(c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2))
 19 |   )
 20 | }
 21 | 
 22 | test_tasks = function() {
 23 |   tasks = list(tsk("adult_train")$filter(1:500), suppressWarnings(tsk("compas")$filter(1:500)))
 24 | }
 25 | 
 26 | test_measures = function() {
 27 |   msrs(c("fairness.tpr", "fairness.fnr"))
 28 | }
 29 | 
 30 | test_bmr = function() {
 31 |   lrns = list(lrn("classif.rpart", predict_type = "prob"), lrn("classif.featureless", predict_type = "prob"))
 32 |   tasks = test_tasks()
 33 |   fairness_measures = test_measures()
 34 |   benchmark(benchmark_grid(tasks = tasks, learners = lrns, rsmp("cv", folds = 3L)))
 35 | }
 36 | 
 37 | make_classif_regr_task = function(data, task_type) {
 38 |   if (task_type == "classif") {
 39 |     data$value = as.factor(data$value)
 40 |     b = as_data_backend(data)
 41 |     task = mlr3::TaskClassif$new("example", b, target = "value")
 42 |   } else if (task_type == "regr") {
 43 |     b = as_data_backend(data)
 44 |     task = mlr3::TaskRegr$new("example", b, target = "value")
 45 |   } else {
 46 |     stop("Task type must be classif or regr!")
 47 |   }
 48 |   return(task)
 49 | }
 50 | 
 51 | # One non-binary pta
 52 | test_task_multipta = function(task_type, need_pta = TRUE) {
 53 |   example_data = data.frame(
 54 |     value = rep(1:2, 10),
 55 |     variable = rep(rep(c(1, 4, 3, 6), each = 5)),
 56 |     var2 = rnorm(20),
 57 |     pta = as.factor(rep(1:4, 5))
 58 |   )
 59 |   task = make_classif_regr_task(example_data, task_type)
 60 |   if (need_pta) task$col_roles$pta = "pta"
 61 |   return(task)
 62 | }
 63 | 
 64 | # Two ptas
 65 | test_task_intersect = function(task_type, need_pta = TRUE) {
 66 |   example_data = data.frame(
 67 |     value = rep(1:2, 10),
 68 |     variable = rep(rep(c(1, 4, 3, 6), each = 5)),
 69 |     var2 = rnorm(20),
 70 |     pta1 = as.factor(rep(1:2, 5)),
 71 |     pta2 = as.factor(rep(1:2, each = 5))
 72 |   )
 73 |   task = make_classif_regr_task(example_data, task_type)
 74 |   if (need_pta) task$col_roles$pta = c("pta1", "pta2")
 75 |   return(task)
 76 | }
 77 | 
 78 | # Multiclass outcome / two ptas
 79 | test_task_multicl = function(task_type, need_pta = TRUE) {
 80 |   example_data = data.frame(
 81 |     value = rep(1:4, 5),
 82 |     variable = rep(rep(c(1, 4, 3, 6), each = 5)),
 83 |     var2 = rnorm(20),
 84 |     pta1 = as.factor(rep(1:2, 5)),
 85 |     pta2 = as.factor(rep(1:2, each = 5))
 86 |   )
 87 |   task = make_classif_regr_task(example_data, task_type)
 88 |   if (need_pta) task$col_roles$pta = c("pta1", "pta2")
 89 |   return(task)
 90 | }
 91 | 
 92 | # continuous protected attribute
 93 | test_task_contpta = function(task_type, need_pta = TRUE) {
 94 |   example_data = data.frame(
 95 |     value = rep(1:4, 5),
 96 |     variable = rep(rep(c(1, 4, 3, 6), each = 5)),
 97 |     var2 = rnorm(20),
 98 |     pta = rnorm(20)
 99 |   )
100 |   task = make_classif_regr_task(example_data, task_type)
101 |   if (need_pta) task$col_roles$pta = "pta"
102 |   return(task)
103 | }
104 | 


--------------------------------------------------------------------------------
/tests/testthat/helper_learner_tests.R:
--------------------------------------------------------------------------------
 1 | simple_autotest = function(learner, task) {
 2 |     task = task$clone(deep = TRUE)
 3 |     ft_cols = task$feature_types[, map(.SD, 1L), by = type]$id
 4 |     task$filter(seq_len(min(task$nrow, 500)))$select(cols = ft_cols)
 5 | 
 6 |     learner = learner$clone(deep = TRUE)
 7 |     learner$train(task)
 8 |     assert_true(!is.null(learner$model))
 9 | 
10 |     for (pt in learner$predict_types) {
11 |         learner$predict_type = pt
12 |         prd = learner$predict(task)
13 |         expect_class(prd, "Prediction")
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/testthat/helper_test.R:
--------------------------------------------------------------------------------
 1 | # check plot satisfy those three conditions
 2 | # is_ggplot, no errors, no warnings
 3 | check_plots = function(ggplot_obj) {
 4 |   expect_true(is.ggplot(ggplot_obj))
 5 |   expect_error(ggplot_obj, NA)
 6 |   expect_warning(ggplot_obj, NA)
 7 | }
 8 | 
 9 | library(mlr3)
10 | lapply(list.files(system.file("testthat", package = "mlr3"), pattern = "helper", full.names = TRUE), source)
11 | 
12 | run_autotest = function(learner, N = 30L, exclude = NULL, predict_types = learner$predict_types, check_replicable = TRUE) {
13 |   learner = learner$clone(deep = TRUE)
14 |   id = learner$id
15 |   tasks = generate_tasks(learner, N = N)
16 |   map(tasks, function(x) {
17 |     pta = data.table(
18 |       pta = sample(factor(rep_len(c("f1", "f2"), x$nrow), levels = c("f1", "f2"))),
19 |       noisevar = runif(x$nrow)
20 |     )
21 |     x$cbind(pta)
22 |     x$col_roles$pta = "pta"
23 |   })
24 |   
25 |   if (!is.null(exclude)) {
26 |     tasks = tasks[!grepl(exclude, names(tasks))]
27 |   }
28 | 
29 | 
30 |   sanity_runs = list()
31 |   make_err = function(msg, ...) {
32 |     run$ok = FALSE
33 |     run$error = sprintf(msg, ...)
34 |     run
35 |   }
36 | 
37 |   for (task in tasks) {
38 |     for (predict_type in predict_types) {
39 |       learner$id = sprintf("%s:%s", id, predict_type)
40 |       learner$predict_type = predict_type
41 | 
42 |       run = run_experiment(task, learner)
43 |       if (!run$ok) {
44 |         return(run)
45 |       }
46 | 
47 |       # re-run task with same seed for feat_all
48 |       if (startsWith(task$id, "feat_all")) {
49 |         repeated_run = run_experiment(task, learner, seed = run$seed)
50 | 
51 |         if (!repeated_run$ok) {
52 |           return(repeated_run)
53 |         }
54 | 
55 |         if (check_replicable && !isTRUE(all.equal(as.data.table(run$prediction), as.data.table(repeated_run$prediction)))) {
56 |           return(make_err("Different results for replicated runs using fixed seed %i", run$seed))
57 |         }
58 |       }
59 | 
60 |       if (task$task_type == "classif" && task$id == "sanity") {
61 |         sanity_runs[[predict_type]] = run
62 |       }
63 |     }
64 |     if (task$task_type == "classif" && length(sanity_runs) > 1L) {
65 |       responses = lapply(sanity_runs, function(r) r$prediction$response)
66 |       if (!isTRUE(Reduce(all.equal, responses))) {
67 |         return(make_err("Response is different for different predict types"))
68 |       }
69 |     }
70 |   }
71 |   return(TRUE)
72 | }
73 | 
74 | # Do not load this on CRAN
75 | if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
76 |   environment(run_autotest) = .GlobalEnv
77 |   assign("run_autotest", run_autotest, .GlobalEnv)
78 | }


--------------------------------------------------------------------------------
/tests/testthat/test_datasets.R:
--------------------------------------------------------------------------------
 1 | check_data_format = function(data) {
 2 |   expect_true(is.data.frame(data))
 3 |   expect_true(all(colnames(data) == tolower(colnames(data))))
 4 |   expect_true(all(colnames(data) == make.names(colnames(data), unique = TRUE)))
 5 |   expect_true(is.integer(attr(data, "row.names")))
 6 | }
 7 | 
 8 | test_that("tasks can be loaded", {
 9 |   for (name in c("compas", "adult_train", "adult_test")) {
10 |     tsk_obj = tsk(name)
11 |     expect_true(inherits(tsk_obj, "TaskClassif"))
12 |   }
13 | })
14 | 
15 | test_that("compas dataset can be loaded with correct format", {
16 |   skip_on_cran()
17 |   compas = tsk("compas")
18 |   expect_r6(compas, "TaskClassif")
19 |   compas_data = compas$data()
20 |   check_data_format(compas_data)
21 |   expect_true(nrow(compas_data) == 6172L)
22 |   expect_true(ncol(compas_data) == 12L)
23 |   expect_true(compas$col_roles$pta == "sex")
24 | 
25 |   assert_col_type = (sapply(compas_data, class) == c("factor", "integer", "factor", "factor",
26 |     "integer", "integer", "factor", "integer",
27 |     "integer", "factor", "factor", "factor"))
28 |   expect_true(all(assert_col_type))
29 | })
30 | 
31 | test_that("adult_train dataset can be loaded with correct format", {
32 |   skip_on_cran()
33 |   adult_train = tsk("adult_train")
34 |   expect_r6(adult_train, "TaskClassif")
35 |   adult_train_data = adult_train$data()
36 |   expect_true(nrow(adult_train_data) == 30718L)
37 |   expect_true(ncol(adult_train_data) == 13L)
38 |   expect_true(adult_train$col_roles$pta == "sex")
39 | 
40 |   assert_col_type = (sapply(adult_train_data, class) == c("factor", "integer", "integer", "integer",
41 |     "factor", "integer", "integer", "factor",
42 |     "factor", "factor", "factor", "factor", "factor"))
43 |   expect_true(all(assert_col_type))
44 | })
45 | 
46 | test_that("adult_test dataset can be loaded with correct format", {
47 |   skip_on_cran()
48 |   adult_test = tsk("adult_test")
49 |   expect_r6(adult_test, "TaskClassif")
50 |   adult_test_data = adult_test$data()
51 |   expect_true(nrow(adult_test_data) == 15315L)
52 |   expect_true(ncol(adult_test_data) == 13L)
53 |   expect_true(adult_test$col_roles$pta == "sex")
54 | 
55 |   assert_col_type = (sapply(adult_test_data, class) == c("factor", "integer", "integer", "integer",
56 |     "factor", "integer", "integer", "factor",
57 |     "factor", "factor", "factor", "factor", "factor"))
58 |   expect_true(all(assert_col_type))
59 | })
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test_fairness_tensor.R:
--------------------------------------------------------------------------------
 1 | test_that("fairness.fpr can be loaded and work as expected", {
 2 |   pp = pred_small()
 3 |   tt = test_task_small()
 4 | 
 5 |   a = cbind(tt$data(cols = c(tt$col_roles$pta)), rw = tt$row_ids)
 6 |   resp = table(pp$response)
 7 |   l1 = fairness_tensor(pp, task = tt)
 8 |   l2 = fairness_tensor(pp, normalize = "none", task = tt)
 9 |   l3 = fairness_tensor(pp, normalize = "group", task = tt)
10 | 
11 |   expect_true(sum(map_int(l2, sum)) ==  length(pp$response))
12 |   expect_true(sum(map_dbl(l1, sum)) == 1)
13 |   expect_true(sum(map_dbl(l3, sum)) == 2)
14 | 
15 |   expect_true(all(l2[[1]] == pp$clone()$filter(a$rw[a$pta == 1])$confusion)) 
16 |   expect_true(all(l2[[2]] == pp$clone()$filter(a$rw[a$pta == 2])$confusion))
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test_learners_fairml.R:
--------------------------------------------------------------------------------
 1 | test_that("classif.fairfgrrm", {
 2 |     skip_on_cran()
 3 |     skip_if_not_installed("fairml")
 4 |     learner = lrn("classif.fairfgrrm")
 5 |     out = expect_learner(learner)
 6 |     simple_autotest(learner, tsk("compas")$select(cols = c("age_cat", "priors_count")))
 7 |     result = run_autotest(learner, exclude = "sanity")
 8 |     expect_true(result, info = result$error)
 9 | })
10 | 
11 | test_that("regr.fairfrrm", {
12 |     skip_on_cran()
13 |     skip_if_not_installed("fairml")
14 |     learner = lrn("regr.fairfrrm", unfairness = 0.5)
15 |     out = expect_learner(learner)
16 | 
17 |     task = TaskRegr$new("long", fairml::national.longitudinal.survey, target = "income06")
18 |     task$col_roles$pta = "gender"
19 |     simple_autotest(learner, task)
20 |     result = run_autotest(learner)
21 |     expect_true(result, info = result$error)
22 | })
23 | 
24 | test_that("regr.fairzlm", {
25 |     skip_on_cran()
26 |     skip_if_not_installed("fairml")
27 |     skip_if_not_installed("CVXR")
28 |     learner = lrn("regr.fairzlm", unfairness = 0.5)
29 |     out = expect_learner(learner)
30 | 
31 |     task = TaskRegr$new("long", fairml::national.longitudinal.survey, target = "income06")
32 |     task$col_roles$pta = "gender"
33 |     simple_autotest(learner, task)
34 | 
35 |     result = run_autotest(learner)
36 |     expect_true(result, info = result$error)
37 | })
38 | 
39 | test_that("classif.fairzlrm", {
40 |     skip_on_cran()
41 |     skip_if_not_installed("fairml")
42 |     skip_if_not_installed("CVXR")
43 |     learner = lrn("classif.fairzlrm", unfairness = 0.2)
44 |     out = expect_learner(learner)
45 | 
46 |     simple_autotest(learner, tsk("compas")$select(cols = c("age_cat", "priors_count")))
47 | 
48 |     result = run_autotest(learner, exclude = "sanity")
49 |     expect_true(result, info = result$error)
50 | })
51 | 
52 | test_that("regr.fairnclm", {
53 |     skip_on_cran()
54 |     skip_if_not_installed("fairml")
55 |     learner = lrn("regr.fairnclm")
56 |     out = expect_learner(learner)
57 | 
58 |     task = TaskRegr$new("long", fairml::national.longitudinal.survey, target = "income06")
59 |     task$col_roles$pta = "gender"
60 |     simple_autotest(learner, task)
61 | 
62 |     result = run_autotest(learner)
63 |     expect_true(result, info = result$error)
64 | })
65 | 


--------------------------------------------------------------------------------
/tests/testthat/test_learners_fairml_ptas.R:
--------------------------------------------------------------------------------
 1 | test_that("all learners, multi-class pta", {
 2 |     skip_on_cran()
 3 |     skip_if_not_installed("fairml")
 4 | 
 5 |     for (k in mlr_learners_fairness$key) {
 6 |         learner = lrn(k)
 7 |         task = test_task_multipta(task_type = learner$task_type)
 8 |         learner$train(task)
 9 |         p = learner$predict(task, row_ids = sample(task$row_ids, 5))
10 |         expect_prediction(p)
11 |         dt = as.data.table(p)
12 |         expect_true(nrow(dt) == 5L)
13 |     }
14 | })
15 | 
16 | test_that("all learners, two pta columns", {
17 |     skip_on_cran()
18 |     skip_if_not_installed("fairml")
19 |     for (k in mlr_learners_fairness$key) {
20 |         learner = lrn(k)
21 |         task = test_task_intersect(task_type = learner$task_type)
22 |         learner$train(task)
23 |         p = learner$predict(task, row_ids = sample(task$row_ids, 5))
24 |         expect_prediction(p)
25 |         dt = as.data.table(p)
26 |         expect_true(nrow(dt) == 5L)
27 |     }
28 | })
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test_measure_subgroup.R:
--------------------------------------------------------------------------------
 1 | test_that("measure constructor works", {
 2 |   m = MeasureSubgroup$new(base_measure = msr("classif.acc"), subgroup = 1L)
 3 |   expect_equal(m$id, "subgroup.acc_1")
 4 |   expect_equal(m$base_measure, msr("classif.acc"))
 5 |   expect_equal(m$range, c(0, 1))
 6 |   expect_equal(m$task_type, "classif")
 7 | 
 8 |   m = MeasureSubgroup$new(base_measure = msr("regr.mse"), subgroup = "Foo")
 9 |   expect_equal(m$id, "subgroup.mse_Foo")
10 |   expect_equal(m$base_measure, msr("regr.mse"))
11 |   expect_equal(m$range, c(0, Inf))
12 |   expect_equal(m$task_type, "regr")
13 | })
14 | 
15 | test_that("measure", {
16 |   skip_if_not_installed("rpart")
17 |   m = MeasureSubgroup$new(base_measure = msr("classif.acc"), subgroup = "Female")
18 |   t = tsk("compas")
19 |   l = lrn("classif.rpart")
20 |   out = l$train(t)$predict(t)$score(m, t)
21 |   expect_number(out, lower = 0, upper = 1)
22 |   expect_true(names(out) == "subgroup.acc_Female")
23 | })
24 | 
25 | test_that("measure", {
26 |   skip_if_not_installed("rpart")
27 |   t = tsk("compas")
28 |   l = lrn("classif.rpart")
29 |   m = groupwise_metrics(msr("classif.acc"), t, intersect = FALSE)
30 |   expect_set_equal(map_chr(m, "subgroup"), t$levels(t$col_roles$pta)[[1]])
31 |   map(m, expect_class, "Measure")
32 |   out = l$train(t)$predict(t)$score(m, t)
33 |   expect_numeric(out, len = 2L, upper = 1, lower = 0)
34 |   expect_set_equal(names(out), c("subgroup.acc_Female", "subgroup.acc_Male"))
35 | 
36 |   pta = get_pta(t, rows = t$row_ids)
37 |   rw_1 = t$row_ids[pta == levels(pta[[1]])[1]]
38 |   rw_2 = t$row_ids[pta == levels(pta[[1]])[2]]
39 |   outi = c(
40 |     l$predict(t, row_ids = rw_1)$score(msr("classif.acc")),
41 |     l$predict(t, row_ids = rw_2)$score(msr("classif.acc"))
42 |   )
43 |   expect_true(all(sort(out) == sort(outi)))
44 | })
45 | 
46 | test_that("multi pta", {
47 |   skip_on_cran()
48 |   skip_if_not_installed("rpart")
49 |   t = tsk("compas")
50 |   t$col_roles$pta = c("sex", "race")
51 |   l = lrn("classif.rpart")
52 |   m = groupwise_metrics(msr("classif.acc"), t)
53 |   map(m, expect_class, "Measure")
54 |   expect_true(length(map(m, "subgroup")) == 12L)
55 |   prd = l$train(t)$predict(t)
56 |   out = prd$score(m, t)
57 |   expect_numeric(out, len = 12L, upper = 1, lower = 0)
58 | 
59 |   m = msr("fairness.acc")
60 |   out2 = prd$score(m, t)
61 |   expect_true(out2 == max(out) - min(out))
62 | })
63 | 
64 | 
65 | test_that("pp differences", {
66 |   skip_on_cran()
67 |   skip_if_not_installed("rpart")
68 |   t = tsk("adult_train")
69 |   l = as_learner(po("reweighing_os") %>>% lrn("classif.rpart"))
70 |   l$train(t)
71 |   prd = l$predict_newdata(t$data())
72 | 
73 |   out = prd$score(msr("fairness.pp"), t)
74 |   expect_number(out, lower = 0, upper = 1)
75 | 
76 |   out = prd$score(msr("fairness.cv"), t)
77 |   expect_number(out, lower = -1, upper = 1)
78 | })
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test_measures_operations.R:
--------------------------------------------------------------------------------
 1 | test_that("core operations for fairness measures", {
 2 |   x = c(.1, .2, .15)
 3 |   expect_true(groupdiff_absdiff(x) == .1)
 4 |   expect_true(groupdiff_diff(x) == -.1)
 5 |   expect_true(groupdiff_tau(x) == .5)
 6 | 
 7 |   x = c(.1, .1, .15)
 8 |   expect_true(abs(groupdiff_absdiff(x) - .05) < 1e-8)
 9 |   expect_equal(groupdiff_diff(x), -0.05)
10 |   expect_true(abs(groupdiff_tau(x) - 2 / 3) < 1e-8)
11 | 
12 |   x = c(.1, .11, 1)
13 |   expect_true(abs(groupdiff_absdiff(x) - .9) < 1e-8)
14 |   expect_equal(groupdiff_diff(x), -.9)
15 |   expect_true(abs(groupdiff_tau(x) - .1) < 1e-8)
16 | 
17 |   x = c(-.1, .1, 1)
18 |   expect_true(abs(groupdiff_absdiff(x) - 1.1) < 1e-8)
19 |   expect_equal(groupdiff_diff(x), -1.1)
20 |   expect_true(abs(groupdiff_tau(x) + 10.) < 1e-8)
21 | 
22 |   x = runif(5)
23 |   expect_number(groupdiff_absdiff(x))
24 |   expect_number(groupdiff_diff(x))
25 |   expect_number(groupdiff_tau(x))
26 | 
27 |   x = c(1, NA)
28 |   expect_true(is.na(groupdiff_absdiff(x)))
29 |   expect_true(is.na(groupdiff_diff(x)))
30 |   expect_true(is.na(groupdiff_absdiff(x)))
31 | })
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pipeop_eod.R:
--------------------------------------------------------------------------------
 1 | test_that("PipeOpEOd works on a task", {
 2 |   skip_on_cran()
 3 |   skip_if_not_installed("linprog")
 4 |   task = tsk("adult_train")$filter(1:700)
 5 |   poed = po("EOd")
 6 |   graph = po("learner_cv", lrn("classif.rpart")) %>>% poed
 7 |   glrn = GraphLearner$new(graph)
 8 |   glrn$train(task)
 9 |   expect_true(names(glrn$state$model$EOd) == "flip_probs")
10 |   tem = glrn$predict(task)
11 |   expect_true(!is.null(glrn$state))
12 |   expect_r6(tem, "PredictionClassif")
13 |   expect_number(tem$score(msr("fairness.eod"), task = task))
14 | })
15 | 
16 | test_that("PipeOpEOd technically works for trivial cases priv 0", {
17 |   skip_on_cran()
18 |   skip_if_not_installed("linprog")
19 |   # Test data set / task
20 |   dt = data.table(
21 |     truth = rep(c(0, 1), 100),
22 |     pta = rep(c(0, 1), each = 100)
23 |   )
24 |   dt[, prediction := truth]
25 |   dt[pta == 1 & truth == 1, prediction := c(rep(0, 25), rep(1, 25))]
26 |   dt[, truth := factor(truth)]
27 |   dt[, prediction := factor(prediction)]
28 |   t = TaskClassif$new("test_task", dt, target = "truth")
29 |   t$set_col_roles("pta", "pta")
30 | 
31 |   # Fairness Tensor based
32 |   dft = fairness_tensor(dt, task = t)[[1]] == fairness_tensor(dt, task = t)[[2]]
33 |   expect_true(all(dft[, 1]))
34 |   expect_true(all(!dft[, 2]))
35 | 
36 |   # Errors as epxected
37 |   poed = po("EOd", privileged = "5")
38 |   expect_error(poed$train(list(t)), "needs to be a valid value")
39 |   # Trains as expected
40 |   poed = po("EOd", privileged = "0")
41 |   poed$train(list(t))
42 |   expect_true(all(unlist(poed$state) == c(1, 0.5, 1, 0)))
43 |   prd = poed$predict(list(t))[[1]]
44 |   expect_true(prd$score(msr("fairness.eod"), task = t) < 1 / 50)
45 |   # Matching fairness tensors afterwards
46 |   expect_true(all(fairness_tensor(prd, task = t)[[1]] == fairness_tensor(prd, task = t)[[2]]))
47 | 
48 |   # No changes as expected
49 |   poed = po("EOd", privileged = "1")
50 |   poed$train(list(t))
51 |   expect_true(all(unlist(poed$state) == c(1, 0, 1, 0.5)))
52 |   prd = poed$predict(list(t))[[1]]
53 |   expect_true(all(pmap_lgl(list(fairness_tensor(dt, task = t), fairness_tensor(prd, task = t)), function(x, y) all(x == y))))
54 | })
55 | 
56 | test_that("PipeOpEOd technically works for trivial cases priv 1", {
57 |   skip_on_cran()
58 |   skip_if_not_installed("linprog")
59 |   # Test data set
60 |   dt = data.table(
61 |     truth = rep(c(0, 1), 100),
62 |     pta = rep(c(0, 1), each = 100)
63 |   )
64 |   dt[, prediction := truth]
65 |   dt[pta == 0 & truth == 0, prediction := c(rep(0, 25), rep(1, 25))]
66 |   dt[, truth := factor(truth)]
67 |   dt[, prediction := factor(prediction)]
68 |   # Debias
69 |   t = TaskClassif$new("test_task", dt, target = "truth", positive = "0")
70 |   t$set_col_roles("pta", "pta")
71 |   poed = po("EOd", privileged = "1")
72 |   poed$train(list(t))
73 |   expect_true(all(unlist(poed$state) == c(0.5, 0, 1, 0)))
74 |   prd = poed$predict(list(t))[[1]]
75 |   expect_true(all(fairness_tensor(prd, task = t)[[1]] == fairness_tensor(prd, task = t)[[2]]))
76 | })
77 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pipeop_explicit_pta.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("explicit pta", {
 3 |     skip_on_cran()
 4 |     t = tsk("adult_train")
 5 |     p = po("explicit_pta")
 6 |     nt = p$train(list(t))[[1]]
 7 |     expect_true(p$is_trained)
 8 |     expect_equal(nt$feature_names, t$feature_names)
 9 |     expect_equal(nt$col_roles$pta, "..internal_pta_sex")
10 | 
11 |     nt = p$predict(list(t))[[1]]
12 |     expect_equal(nt$feature_names, t$feature_names)
13 |     expect_equal(nt$col_roles$pta, "..internal_pta_sex")
14 | })
15 | 
16 | test_that("explicit pta prevents from dropping during encode", {
17 |     skip_on_cran()
18 |     t = tsk("adult_train")
19 |     p = po("explicit_pta") %>>% po("encodeimpact")
20 |     nt = p$train(t)[[1]]
21 |     expect_true(p$is_trained)
22 |     expect_true(all(nt$feature_types$type %in% c("integer", "numeric")))
23 |     expect_equal(nt$col_roles$pta, "..internal_pta_sex")
24 |     npta = nt$data(cols = nt$col_roles$pta)[[1]]
25 |     opta = t$data(cols = t$col_roles$pta)[[1]]
26 |     expect_equal(npta, opta)
27 | 
28 |     nt = p$predict(t)[[1]]
29 |     expect_true(p$is_trained)
30 |     expect_true(all(nt$feature_types$type %in% c("integer", "numeric")))
31 |     expect_equal(nt$col_roles$pta, "..internal_pta_sex")
32 |     npta = nt$data(cols = nt$col_roles$pta)[[1]]
33 |     opta = t$data(cols = t$col_roles$pta)[[1]]
34 |     expect_equal(npta, opta)
35 | })
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test_pipeop_reweighing.R:
--------------------------------------------------------------------------------
 1 | test_that("reweighing PipeOp can be loaded and works with disparate impact score fairness measures", {
 2 |   skip_on_cran()
 3 |   skip_if_not_installed("rpart")
 4 |   task = tsk("adult_train")$filter(1:300)
 5 |   reweighing = po("reweighing_wts")
 6 |   graph = reweighing %>>% lrn("classif.rpart")
 7 |   glrn = GraphLearner$new(graph)
 8 |   glrn$train(task)
 9 |   tem = glrn$predict(task)
10 |   expect_true(!is.null(glrn$state))
11 |   expect_r6(tem, "PredictionClassif")
12 | })
13 | 
14 | test_that("reweighing_wts", {
15 |   skip_on_cran()
16 |   skip_if_not_installed("rpart")
17 |   tsk = po("reweighing_wts")$train(list(tsk("adult_train")$filter(1:300)))[[1]]
18 |   expect_true(tsk$col_roles$weight == "reweighing.WEIGHTS")
19 |   dt = cbind(tsk$data(cols = c("..row_id", "sex", "target")), tsk$weights)
20 |   dt = dt[, mean(weight) * .N, by = .(sex, target)][, sum(V1 / sum(V1)), by = "target"]
21 |   expect_true(all(abs(dt$V1 - 1) < 1e-3))
22 | })
23 | 
24 | test_that("reweighing_wts", {
25 |   skip_on_cran()
26 |   skip_if_not_installed("rpart")
27 |   tsk = po("reweighing_os")$train(list(tsk("adult_train")$filter(1:1000)))[[1]]
28 |   dt = cbind(tsk$data(cols = c("..row_id", "sex", "target")))
29 |   tab = table(dt$sex, dt$target)
30 |   expect_true(abs(diff(tab[1, ] / tab[2, ])) < .1)
31 | })
32 | 
33 | test_that("reweighing_wts with initial weights", {
34 |   skip_on_cran()
35 |   skip_if_not_installed("rpart")
36 |   t1 = tsk("compas")
37 |   t2 = t1$clone()
38 |   t2$set_col_roles("age", "weight")
39 | 
40 |   p1 = po("reweighing_wts")
41 |   p2 = p1$clone()
42 | 
43 |   ot1 = p1$train(list(t1))[[1]]
44 |   ot2 = p2$train(list(t2))[[1]]
45 |   w1 = ot1$weights$weight * t1$data(cols = "age")[["age"]]
46 |   w2 = ot2$weights$weight
47 |   expect_true(abs(mean(w1 - w2)) < 1e-2)
48 | })
49 | 
50 | test_that("reweighing errors on multiclass", {
51 |   skip_on_cran()
52 |   skip_if_not_installed("rpart")
53 |   t = tsk("iris")
54 |   t$set_col_roles("Petal.Length", "pta")
55 |   expect_error(po("reweighing_wts")$train(list(t))[[1]], "Only binary")
56 |   expect_error(po("reweighing_os")$train(list(t))[[1]], "Only binary")
57 | })
58 | 
59 | 
60 | test_that("reweighing int to char conversion", {
61 |   skip_on_cran()
62 |   skip_if_not_installed("rpart")
63 |   task = tsk("adult_train")$filter(1:300)
64 |   dt = task$data()
65 | 
66 |   # integer
67 |   dt[, sex := as.integer(sex)]
68 |   t = TaskClassif$new("adult_int", backend = dt, target = "target")
69 |   t$col_roles$pta = "sex"
70 |   tsk = po("reweighing_wts")$train(list(t))[[1]]
71 |   expect_true(tsk$col_roles$weight == "reweighing.WEIGHTS")
72 |   dt = cbind(tsk$data(cols = c("..row_id", "sex", "target")), tsk$weights)
73 |   dt = dt[, mean(weight) * .N, by = .(sex, target)][, sum(V1 / sum(V1)), by = "target"]
74 |   expect_true(all(abs(dt$V1 - 1) < 1e-3))
75 | 
76 |   # numeric
77 |   dt = task$data()
78 |   dt[, sex := as.numeric(sex)]
79 |   t = TaskClassif$new("adult_int", backend = dt, target = "target")
80 |   t$col_roles$pta = "sex"
81 |   tsk = po("reweighing_wts")$train(list(t))[[1]]
82 |   expect_true(tsk$col_roles$weight == "reweighing.WEIGHTS")
83 |   dt = cbind(tsk$data(cols = c("..row_id", "sex", "target")), tsk$weights)
84 |   dt = dt[, mean(weight) * .N, by = .(sex, target)][, sum(V1 / sum(V1)), by = "target"]
85 |   expect_true(all(abs(dt$V1 - 1) < 1e-3))
86 | 
87 |   # ordered
88 |   dt = task$data()
89 |   dt[, sex := as.ordered(sex)]
90 |   t = TaskClassif$new("adult_int", backend = dt, target = "target")
91 |   t$col_roles$pta = "sex"
92 |   tsk = po("reweighing_wts")$train(list(t))[[1]]
93 |   expect_true(tsk$col_roles$weight == "reweighing.WEIGHTS")
94 |   dt = cbind(tsk$data(cols = c("..row_id", "sex", "target")), tsk$weights)
95 |   dt = dt[, mean(weight) * .N, by = .(sex, target)][, sum(V1 / sum(V1)), by = "target"]
96 |   expect_true(all(abs(dt$V1 - 1) < 1e-3))
97 | })
98 | 


--------------------------------------------------------------------------------
/tests/testthat/test_report_modelcard_datasheet.R:
--------------------------------------------------------------------------------
 1 | test_that("model cards", {
 2 |   skip_on_cran()
 3 |   skip_if_not_installed("rmarkdown")
 4 |   skip_if_not_installed("posterdown")
 5 |   skip_if_not(rmarkdown::pandoc_available())
 6 |   tmp = tempdir()
 7 |   tmp = paste0(tmp, "/report")
 8 |   unlink(tmp, recursive = TRUE)
 9 |   report_modelcard(tmp, edit = FALSE)
10 |   expect_true(all(list.files(tmp) %in% c("references.bib", "style.css", paste0(basename(tmp), ".Rmd"), "LICENSE")))
11 |   out = rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"), quiet = TRUE)
12 |   expect_character(out)
13 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
14 |   unlink(tmp, recursive = TRUE)
15 | })
16 | 
17 | test_that("datasheets", {
18 |   skip_on_cran()
19 |   skip_if_not_installed("rmarkdown")
20 |   skip_if_not_installed("posterdown")
21 |   skip_if_not(rmarkdown::pandoc_available())
22 |   tmp = tempdir()
23 |   tmp = paste0(tmp, "/report")
24 |   unlink(tmp, recursive = TRUE)
25 |   report_datasheet(tmp, edit = FALSE)
26 |   expect_true(all(list.files(tmp) %in% c("references.bib", "style.css", paste0(basename(tmp), ".Rmd"), "LICENSE")))
27 |   out = rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"), quiet = TRUE)
28 |   expect_character(out)
29 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
30 |   unlink(tmp, recursive = TRUE)
31 | })
32 | 
33 | test_that("fairness_report", {
34 |   skip_on_cran()
35 |   skip_if_not_installed("rmarkdown")
36 |   skip_if_not_installed("kableExtra")
37 |   skip_if_not(rmarkdown::pandoc_available())
38 |   tmp = tempdir()
39 |   tmp = paste0(tmp, "/report")
40 |   unlink(tmp, recursive = TRUE)
41 |   task = suppressWarnings(tsk("compas")$filter(1:500)$select(c("age", "decile_score","race", "sex", "c_charge_degree")))
42 |   learner = lrn("classif.rpart", predict_type = "prob")
43 |   rr = resample(task, learner, rsmp("cv", folds = 5))
44 |   report_fairness(tmp, list(task = task, resample_result = rr, foo = 1))
45 |   expect_true(all(c("references.bib", paste0(basename(tmp), ".Rmd")) %in% list.files(tmp)))
46 |   out = suppressWarnings(rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"), quiet = TRUE))
47 |   expect_character(out)
48 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
49 |   unlink(tmp, recursive = TRUE)
50 | })
51 | 


--------------------------------------------------------------------------------
/tests/testthat/test_use_modelcard_datasheet.R:
--------------------------------------------------------------------------------
 1 | test_that("model cards", {
 2 |   skip_on_cran()
 3 |   skip("Only tested locally")
 4 |   tmp = tempdir()
 5 |   tmp = paste0(tmp, "/report")
 6 |   unlink(tmp, recursive = TRUE)
 7 |   use_modelcard(tmp, edit = FALSE)
 8 |   expect_true(all(list.files(tmp) %in% c("references.bib", "style.css", paste0(basename(tmp), ".Rmd"), "LICENSE")))
 9 |   out = rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"))
10 |   system(paste0("firefox ", out))
11 |   expect_character(out)
12 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
13 |   unlink(tmp, recursive = TRUE)
14 | })
15 | 
16 | test_that("datasheets", {
17 |   skip_on_cran()
18 |   skip("Only tested locally")
19 |   tmp = tempdir()
20 |   tmp = paste0(tmp, "/report")
21 |   unlink(tmp, recursive = TRUE)
22 |   use_datasheet(tmp, edit = FALSE)
23 |   expect_true(all(list.files(tmp) %in% c("references.bib", "style.css", paste0(basename(tmp), ".Rmd"), "LICENSE")))
24 |   out = rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"))
25 |   expect_character(out)
26 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
27 |   unlink(tmp, recursive = TRUE)
28 | })
29 | 
30 | test_that("fairness_report", {
31 |   skip_on_cran()
32 |   skip("Only tested locally")
33 |   tmp = tempdir()
34 |   tmp = paste0(tmp, "/report")
35 |   unlink(tmp, recursive = TRUE)
36 |   task = tsk("compas")
37 |   learner = lrn("classif.rpart", predict_type = "prob")
38 |   rr = resample(task, learner, rsmp("cv", folds = 5))
39 |   report_fairness(tmp, list(task = task, resampling_result = rr))
40 |   expect_true(all(c("references.bib", paste0(basename(tmp), ".Rmd")) %in% list.files(tmp)))
41 |   out = rmarkdown::render(paste0(tmp, "/", basename(tmp),  ".Rmd"))
42 |   expect_character(out)
43 |   expect_true(readLines(out)[1] == "<!DOCTYPE html>")
44 |   unlink(tmp, recursive = TRUE)
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test_visualizations.R:
--------------------------------------------------------------------------------
 1 | # fairness_accuracy_tradeoff Tests
 2 | test_that("fairness_accuracy_tradeoff", {
 3 |   skip_if_not_installed("rpart")
 4 |   tasks = test_tasks()
 5 |   fairness_measures = test_measures()
 6 |   bmr = test_bmr()
 7 | 
 8 |   # BMR
 9 |   map(fairness_measures, function(fmsr) {
10 |     check_plots(fairness_accuracy_tradeoff(bmr, fmsr))
11 |   })
12 | 
13 |   # RR
14 |   map(bmr$resample_results$resample_result, function(rr) {
15 |     check_plots(fairness_accuracy_tradeoff(rr, fairness_measures[[1]]))
16 |   })
17 | 
18 |   # PRDS
19 |   map(bmr$resample_results$resample_result[1:2], function(rr) {
20 |     map(rr$predictions(), function(prd) {
21 |       check_plots(fairness_accuracy_tradeoff(prd, fairness_measures[[1]], task = tasks[[1]]))
22 |     })
23 |   })
24 | })
25 | 
26 | test_that("compare_metrics", {
27 |   skip_if_not_installed("rpart")
28 |   tasks = test_tasks()
29 |   fairness_measures = test_measures()
30 |   bmr = test_bmr()
31 | 
32 |   # BMR
33 |   check_plots(compare_metrics(bmr, fairness_measures))
34 | 
35 |   # RR
36 |   map(bmr$resample_results$resample_result, function(rr) {
37 |     check_plots(compare_metrics(rr, fairness_measures[[1]]))
38 |   })
39 | 
40 |   # PRDS
41 |   map(bmr$resample_results$resample_result[1:2], function(rr) {
42 |     map(rr$predictions(), function(prd) {
43 |       check_plots(compare_metrics(prd, fairness_measures[[1]], tasks[[1]]))
44 |     })
45 |   })
46 | })
47 | 
48 | test_that("prediction_density", {
49 |   skip_if_not_installed("rpart")
50 |   tasks = test_tasks()
51 |   fairness_measures = test_measures()
52 |   bmr = test_bmr()
53 | 
54 |   # BMR
55 |   check_plots(fairness_prediction_density(bmr))
56 |   check_plots(fairness_prediction_density(bmr, type = "violin"))
57 | 
58 |   # RR
59 |   map(bmr$resample_results$resample_result, function(rr) {
60 |     check_plots(fairness_prediction_density(rr))
61 |   })
62 | 
63 |   # PRDS
64 |   map(bmr$resample_results$resample_result[1:2], function(rr) {
65 |     map(rr$predictions(), function(prd) {
66 |       check_plots(fairness_prediction_density(prd, tasks[[1]]))
67 |     })
68 |   })
69 | })
70 | 


--------------------------------------------------------------------------------
/tests/testthat/test_write_files.R:
--------------------------------------------------------------------------------
 1 | test_that("write_files", {
 2 |   skip_on_cran()
 3 |   skip_if_not_installed("rpart")
 4 |   skip_if_not_installed("rmarkdown")
 5 |   tdir = tempdir()
 6 |   tdir = paste0(tdir, "/report")
 7 |   if (!dir.exists(tdir)) dir.create(tdir)
 8 |   unlink(list.files(tdir, full.names = TRUE), recursive = TRUE)
 9 |   object = list("one" = 1, "two" = 1)
10 |   write_files(object, tdir)
11 |   lfiles = list.files(tdir, full.names = TRUE)
12 |   expect_true(all(basename(lfiles) %in% c("one.rds", "two.rds", "read_data.Rmd")))
13 |   map_lgl(lfiles[grepl(".rds", lfiles)], function(x) {
14 |     x = readRDS(x)
15 |     expect_true(x == 1)
16 |   })
17 |   ll = readLines(lfiles[grepl("read_data.Rmd", lfiles)])
18 |   expect_true(ll[2] == "one = readRDS('one.rds')")
19 |   unlink(list.files(tdir, full.names = TRUE), recursive = TRUE)
20 | })
21 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/debiasing-vignette.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Debiasing Methods"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Debiasing Methods}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r setup, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | # Introduction: Fairness Pipeline Operators
18 | 
19 | Given we detected some form of bias during bias auditing, we are often interested in obtaining fair(er) models.
20 | There are several ways to achieve this, such as collecting additional data or finding and fixing errors in the data.
21 | Assuming there are no biases in the data and labels, one other option is to debias models using either **preprocessing**, **postprocessing** and **inprocessing** methods.
22 | `mlr3fairness` provides some operators as `PipeOp`s for `mlr3pipelines`.
23 | If you are not familiar with `mlr3pipelines`, the [mlr3 book](https://mlr3book.mlr-org.com/pipelines.html) contains an introduction. 
24 | 
25 | We again showcase debiasing using the `adult_train` task:
26 | 
27 | ```{r}
28 | library(mlr3)
29 | library(mlr3fairness)
30 | library(mlr3pipelines)
31 | 
32 | task = tsk("adult_train")
33 | ```
34 | 
35 | #  Reweighing algorithms
36 | 
37 | `mlr3fairness` implements 2 reweighing-based algorithms:
38 | `reweighing_wts` and `reweighing_os`. 
39 | `reweighing_wts` adds observation weights to a `Task` that can counteract imbalances between the conditional probabilities $P(Y | pta)$.
40 | 
41 | ```{r, echo = FALSE}
42 | library(mlr3misc)
43 | dt = as.data.table(mlr_pipeops)
44 | knitr::kable(dt[map_lgl(dt$tags, function(x) "fairness" %in% x)][, c(1,7,8,9,10)])
45 | ```
46 | 
47 | We fist instantiate the `PipeOp`:
48 | 
49 | ```{r}
50 | p1 = po("reweighing_wts")
51 | ```
52 | 
53 | and directly add the weights:
54 | 
55 | ```{r}
56 | t1 = p1$train(list(task))[[1]]
57 | ```
58 | 
59 | Often we directly combine the `PipeOp` with a `Learner` to automate the preprocessing (see `learner_rw`). Below we instantiate a small benchmark
60 | 
61 | ```{r}
62 | set.seed(4321)
63 | learner = lrn("classif.rpart", cp = 0.005)
64 | learner_rw = as_learner(po("reweighing_wts") %>>% learner)
65 | grd = benchmark_grid(list(task), list(learner, learner_rw), rsmp("cv", folds=3))
66 | bmr = benchmark(grd)
67 | ```
68 | 
69 | We can now compute the metrics for our benchmark and see if reweighing actually improved fairness, measured
70 | via True Positive Rate (TPR) and classification accuracy (ACC):
71 | 
72 | ```{r}
73 | bmr$aggregate(msrs(c("fairness.tpr", "fairness.acc")))
74 | ```
75 | ```{r}
76 | fairness_accuracy_tradeoff(bmr, msr("fairness.tpr"))
77 | ```
78 | 
79 | Our model became way fairer wrt. TPR but minimally worse wrt. accuracy!
80 | 


--------------------------------------------------------------------------------
/vignettes/reports-vignette.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Reports"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Reports}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r setup-knitr, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | ```{r setup-load, echo = FALSE, message = FALSE}
18 | library(mlr3)
19 | library(mlr3fairness)
20 | td = "../docs/articles"
21 | if (!dir.exists(td)) dir.create(td, recursive = TRUE)
22 | report_dirs = c("datasheet", "modelcard", "fairness")
23 | unlink(paste0(td, "/", report_dirs), recursive = TRUE)
24 | ```
25 | 
26 | `mlr3fairness` contains several templates that allow for creating reports based on `RMarkdown` files.
27 | The `report_*` functions instantiate a new `.Rmd` file that can be further adapted by the user.
28 | 
29 | The following reports are currently available in `mlr3fairness`.
30 | 
31 | | Report             | Description             | Reference             |
32 | | ------------------ | ----------------------- | --------------------- |
33 | | `report_modelcard` | Modelcard for ML models | Mitchell et al., 2018 |
34 | | `report_datasheet` | Datasheet for data sets | Gebru et al., 2018    |
35 | | `report_fairness`  | Fairness Report         | –                     |
36 | 
37 | **Usage:**
38 | 
39 | Templates contain a set of pre-defined questions which can be used for reporting as well as
40 | initial graphics. The created `.Rmd` file can then be extended by the
41 | user. It can later be converted into a `html` report using `rmarkdown::render()`.
42 | 
43 | 
44 | ```{r eval_false_example_for_vignette, eval = FALSE, results = 'hide', message = FALSE}
45 | library(mlr3fairness)
46 | rmdfile = report_datasheet()
47 | rmarkdown::render(rmdfile)
48 | ```
49 | 
50 | 
51 | ```{r build_modelcard_example_for_vignette, echo = FALSE, results = 'hide', message = FALSE}
52 | rmdfile = report_modelcard(paste0(td, "/modelcard"))
53 | rmarkdown::render(rmdfile)
54 | ```
55 | 
56 | ## Examples
57 | 
58 | #### [Example: Model Card](https://mlr3fairness.mlr-org.com/articles/modelcard/modelcard.html)
59 | 
60 | 
61 | ```{r build_datasheet_example_for_vignette, echo = FALSE, results = 'hide', message = FALSE}
62 | rmdfile = report_datasheet(paste0(td, "/datasheet"))
63 | rmarkdown::render(rmdfile)
64 | ```
65 | 
66 | #### [Example: Data Sheet](https://mlr3fairness.mlr-org.com/articles/datasheet/datasheet.html)
67 | 
68 | 
69 | ```{r build_fairness_example_for_vignette, echo = FALSE, results = 'hide', message = FALSE, warning = FALSE, error = FALSE}
70 | task = tsk("adult_train")$filter(1:700)$select(c("age", "education", "marital_status", "sex", "race"))
71 | learner = lrn("classif.rpart", predict_type = "prob")
72 | rr = resample(task, learner, rsmp("cv", folds = 5))
73 | rmdfile = report_fairness(paste0(td, "/fairness"), list(task = task, resample_result = rr))
74 | rmarkdown::render(rmdfile)
75 | ```
76 | 
77 | #### [Example: Fairness Report](https://mlr3fairness.mlr-org.com/articles/fairness/fairness.html)
78 | 


--------------------------------------------------------------------------------