├── .Rbuildignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── api_docu.yml
    │   ├── bug_report.yml
    │   ├── config.yml
    │   └── feature_request.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── check_styler.yml
    │   ├── deploy_docu.yml
    │   ├── deploy_docu_dev.yml
    │   ├── deploy_pkg.yml
    │   └── rcheck.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DESCRIPTION
├── DoubleML.Rproj
├── LICENSE
├── NAMESPACE
├── R
    ├── datasets.R
    ├── double_ml.R
    ├── double_ml_data.R
    ├── double_ml_iivm.R
    ├── double_ml_irm.R
    ├── double_ml_pliv.R
    ├── double_ml_plr.R
    ├── double_ml_ssm.R
    ├── helper.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── inst
    └── CITATION
├── man
    ├── DoubleML.Rd
    ├── DoubleMLClusterData.Rd
    ├── DoubleMLData.Rd
    ├── DoubleMLIIVM.Rd
    ├── DoubleMLIRM.Rd
    ├── DoubleMLPLIV.Rd
    ├── DoubleMLPLR.Rd
    ├── DoubleMLSSM.Rd
    ├── double_ml_data_from_data_frame.Rd
    ├── double_ml_data_from_matrix.Rd
    ├── fetch_401k.Rd
    ├── fetch_bonus.Rd
    ├── figures
    │   ├── logo.png
    │   └── oop.svg
    ├── make_iivm_data.Rd
    ├── make_irm_data.Rd
    ├── make_pliv_CHS2015.Rd
    ├── make_pliv_multiway_cluster_CKMS2021.Rd
    ├── make_plr_CCDDHNR2018.Rd
    ├── make_plr_turrell2018.Rd
    └── make_ssm_data.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── tests
    ├── testthat
    │   ├── helper-01-helper_functions.R
    │   ├── helper-03-dgp.R
    │   ├── helper-04-simdata.R
    │   ├── helper-05-ml-learner.R
    │   ├── helper-08-dml_plr.R
    │   ├── helper-09-dml_pliv.R
    │   ├── helper-10-dml_irm.R
    │   ├── helper-11-dml_iivm.R
    │   ├── helper-12-p_adjust.R
    │   ├── helper-13-dml_pliv_partial_x.R
    │   ├── helper-14-dml_pliv_partial_z.R
    │   ├── helper-15-dml_pliv_partial_xz.R
    │   ├── helper-16-dml_cluster.R
    │   ├── helper-17-dml_ssm.R
    │   ├── print_outputs
    │   │   ├── dml_cluster_data.txt
    │   │   ├── dml_data.txt
    │   │   ├── dml_pliv.txt
    │   │   └── dml_plr.txt
    │   ├── test-double_ml_active_bindings.R
    │   ├── test-double_ml_cluster_not_implemented.R
    │   ├── test-double_ml_data.R
    │   ├── test-double_ml_data_active_bindings.R
    │   ├── test-double_ml_data_cluster.R
    │   ├── test-double_ml_datasets.R
    │   ├── test-double_ml_iivm.R
    │   ├── test-double_ml_iivm_binary_outcome.R
    │   ├── test-double_ml_iivm_parameter_passing.R
    │   ├── test-double_ml_iivm_trim.R
    │   ├── test-double_ml_iivm_tuning.R
    │   ├── test-double_ml_iivm_user_score.R
    │   ├── test-double_ml_irm.R
    │   ├── test-double_ml_irm_binary_outcome.R
    │   ├── test-double_ml_irm_loaded_mlr3learner.R
    │   ├── test-double_ml_irm_parameter_passing.R
    │   ├── test-double_ml_irm_trim.R
    │   ├── test-double_ml_irm_tuning.R
    │   ├── test-double_ml_irm_user_score.R
    │   ├── test-double_ml_pliv.R
    │   ├── test-double_ml_pliv_exception_handling.R
    │   ├── test-double_ml_pliv_multi_z_parameter_passing.R
    │   ├── test-double_ml_pliv_one_way_cluster.R
    │   ├── test-double_ml_pliv_parameter_passing.R
    │   ├── test-double_ml_pliv_partial_functional_initializer.R
    │   ├── test-double_ml_pliv_partial_functional_initializer_IVtype.R
    │   ├── test-double_ml_pliv_partial_x.R
    │   ├── test-double_ml_pliv_partial_xz.R
    │   ├── test-double_ml_pliv_partial_xz_parameter_passing.R
    │   ├── test-double_ml_pliv_partial_z.R
    │   ├── test-double_ml_pliv_partial_z_parameter_passing.R
    │   ├── test-double_ml_pliv_tuning.R
    │   ├── test-double_ml_pliv_two_way_cluster.R
    │   ├── test-double_ml_pliv_user_score.R
    │   ├── test-double_ml_plr.R
    │   ├── test-double_ml_plr_classifier.R
    │   ├── test-double_ml_plr_exception_handling.R
    │   ├── test-double_ml_plr_export_preds.R
    │   ├── test-double_ml_plr_loaded_mlr3learner.R
    │   ├── test-double_ml_plr_multitreat.R
    │   ├── test-double_ml_plr_nocrossfit.R
    │   ├── test-double_ml_plr_nonorth.R
    │   ├── test-double_ml_plr_p_adjust.R
    │   ├── test-double_ml_plr_parameter_passing.R
    │   ├── test-double_ml_plr_rep_cross_fit.R
    │   ├── test-double_ml_plr_set_samples.R
    │   ├── test-double_ml_plr_tuning.R
    │   ├── test-double_ml_plr_user_score.R
    │   ├── test-double_ml_print.R
    │   ├── test-double_ml_set_sample_splitting.R
    │   ├── test-double_ml_ssm_mar.R
    │   ├── test-double_ml_ssm_nonignorable.R
    │   └── test-double_ml_ssm_tuning.R
    └── testthat_regression_tests.R
└── vignettes
    ├── .gitignore
    ├── Introduction_to_DoubleML.Rmd
    ├── getstarted.Rmd
    └── install.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^_pkgdown\.yml$
 4 | ^docs$
 5 | ^pkgdown$
 6 | ^README.Rmd
 7 | ^CONTRIBUTING.md
 8 | ^CODE_OF_CONDUCT.md
 9 | ^\.github$
10 | ^codecov\.yml$
11 | ^doc$
12 | ^Meta$
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/api_docu.yml:
--------------------------------------------------------------------------------
 1 | name: API Documentation Improvement
 2 | description: Suggest an improvement for the API documentation.
 3 | title: "[API Documentation]: "
 4 | labels: ["documentation"]
 5 | 
 6 | body:
 7 | - type: textarea
 8 |   attributes:
 9 |     label: Describe the issue related to the API documentation
10 |   validations:
11 |     required: true
12 | - type: textarea
13 |   attributes:
14 |     label: Suggested alternative or fix


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: File a bug report
 3 | title: "[Bug]: "
 4 | labels: ["bug"]
 5 | assignees:
 6 |   - PhilippBach
 7 | 
 8 | body:
 9 |   - type: markdown
10 |     attributes:
11 |       value: |
12 |         Thanks for taking the time to fill out this bug report!
13 |   - type: textarea
14 |     id: description
15 |     attributes:
16 |       label: Describe the bug
17 |       description: |
18 |         Please provide a clear and concise discription of the bug.
19 |     validations:
20 |       required: true
21 |   - type: textarea
22 |     id: mwe
23 |     attributes:
24 |       label: Minimum reproducible code snippet
25 |       description: |
26 |         Please provide a short reproducible code snippet. Example:
27 | 
28 |         ```R
29 |         library(DoubleML)
30 |         library(mlr3)
31 |         library(mlr3learners)
32 |         library(data.table)
33 |         set.seed(2)
34 |         ml_g = lrn("regr.ranger", num.trees = 10, max.depth = 2)
35 |         ml_m = ml_g$clone()
36 |         obj_dml_data = make_plr_CCDDHNR2018(alpha = 0.5)
37 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_g, ml_m)
38 |         dml_plr_obj$fit()
39 |         dml_plr_obj$summary()
40 |         ```
41 |       placeholder: |
42 | 
43 |         ```R
44 |         Sample code here
45 |         ```
46 |     validations:
47 |       required: true
48 |   - type: textarea
49 |     id: expected
50 |     attributes:
51 |       label: Expected Result
52 |       description: |
53 |         State the result you would have expected
54 |     validations:
55 |       required: true
56 |   - type: textarea
57 |     id: actual
58 |     attributes:
59 |       label: Actual Result
60 |       description: |
61 |         State the result you get (for example an exception including the full traceback)
62 |     validations:
63 |       required: true
64 |   - type: textarea
65 |     id: versions
66 |     attributes:
67 |       label: Versions
68 |       description: |
69 |         Please run the following code and copy-paste the result
70 | 
71 |         ```R
72 |         sessionInfo()
73 |         packageVersion('DoubleML')
74 |         packageVersion('mlr3')
75 |         ```
76 |     validations:
77 |       required: true


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: Discussions
 4 |     url: https://github.com/DoubleML/doubleml-for-r/discussions/new
 5 |     about: Ask questions about the DoubleML package or start related dicussions
 6 |   - name: Documentation and User Guide
 7 |     url: https://github.com/DoubleML/doubleml-docs/issues/new/choose
 8 |     about: Issues related to the documentation and user guide on docs.doubleml.org should be reported in the doubleml-docs repo
 9 |   - name: Blank Issue
10 |     url: https://github.com/DoubleML/doubleml-for-r/issues/new
11 |     about: Please note that often Dicussions is the better choice
12 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Suggest an enhancement or extension for the DoubleML package
 3 | title: "[Feature Request]: "
 4 | labels: ["new feature", "enhancement"]
 5 | 
 6 | body:
 7 | - type: textarea
 8 |   attributes:
 9 |     label: Describe the feature you want to propose or implement
10 |   validations:
11 |     required: true
12 | - type: textarea
13 |   attributes:
14 |     label: Propose a possible solution or implementation
15 | - type: textarea
16 |   attributes:
17 |     label: Did you consider alternatives to the proposed solution. If yes, please describe
18 | - type: textarea
19 |   attributes:
20 |     label: Comments, context or references


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Thanks for contributing to DoubleML.
 2 | Before submitting a PR, please take a look at our [contribution guidelines](https://github.com/DoubleML/doubleml-for-r/blob/main/CONTRIBUTING.md).
 3 | Additionally, please fill out the PR checklist below.
 4 | 
 5 | ### Description
 6 | Please describe all changes and additions.
 7 | In addition, you may want to comment on the diff in GitHub.
 8 | 
 9 | ### Reference to Issues or PRs
10 | Add references to related issues or PRs here.
11 | 
12 | ### Comments
13 | Here you can add further comments.
14 | You can also delete this section, if it is not necessary.
15 | 
16 | ### PR Checklist
17 | Please fill out this PR checklist (see our [contributing guidelines](https://github.com/DoubleML/doubleml-for-r/blob/main/CONTRIBUTING.md#checklist-for-pull-requests-pr) for details).
18 | 
19 | - [ ] The title of the pull request summarizes the changes made.
20 | - [ ] The PR contains a detailed description of all changes and additions.
21 | - [ ] References to related issues or PRs are added.
22 | - [ ] The code passes `R CMD check` and all (unit) tests (see our [contributing guidelines](https://github.com/DoubleML/doubleml-for-r/blob/main/CONTRIBUTING.md#checklist-for-pull-requests-pr) for details).
23 | - [ ] Enhancements or new feature are equipped with unit tests.
24 | - [ ] The changes adhere to the "mlr-style" standards (see our [contributing guidelines](https://github.com/DoubleML/doubleml-for-r/blob/main/CONTRIBUTING.md#checklist-for-pull-requests-pr) for details).
25 | 


--------------------------------------------------------------------------------
/.github/workflows/check_styler.yml:
--------------------------------------------------------------------------------
 1 | # Workflow based on https://github.com/r-lib/actions/tree/master/examples#standard-ci-workflow
 2 | name: Check style guidelines
 3 | 
 4 | on:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |   schedule:
12 |     - cron: "0 9 * * 1,3,5"
13 |   workflow_dispatch:
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (R ${{ matrix.config.r }}; mlr3 ${{ matrix.config.mlr3 }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: ubuntu-22.04, r: 'release', mlr3: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
26 | 
27 |     env:
28 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
29 |       RSPM: ${{ matrix.config.rspm }}
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v4
34 |         with:
35 |           fetch-depth: 2
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 | 
41 |       - uses: r-lib/actions/setup-pandoc@v2
42 | 
43 |       - name: Install dependencies
44 |         run: |
45 |           install.packages('remotes')
46 |           remotes::install_github("mlr-org/styler.mlr", dependencies=TRUE)
47 |           install.packages("styler")
48 |         shell: Rscript {0}
49 | 
50 |       - name: Install system dependencies
51 |         run: |
52 |           while read -r cmd
53 |           do
54 |             eval sudo $cmd
55 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
56 | 
57 |       - name: Check styler
58 |         run: |
59 |           res = styler::style_pkg(style = styler.mlr::mlr_style, include_roxygen_examples = FALSE)
60 |           if (!all(res$changed==FALSE)) stop("Code is not in line with the style guidelines (see https://github.com/DoubleML/doubleml-for-r/wiki/Style-Guidelines#use-styler-mlr-style)")
61 |         shell: Rscript {0}
62 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_docu.yml:
--------------------------------------------------------------------------------
  1 | # Workflow based on https://github.com/r-lib/actions/blob/master/examples/pkgdown.yaml
  2 | 
  3 | name: CI deploy documentation (stable)
  4 | 
  5 | on:
  6 |   push:
  7 |     branches:
  8 |       - main
  9 |   workflow_dispatch:
 10 |   release:
 11 |     types:
 12 |       - published
 13 | 
 14 | jobs:
 15 |   build:
 16 | 
 17 |     runs-on: ubuntu-22.04
 18 | 
 19 |     steps:
 20 |     - uses: actions/checkout@v4
 21 |       with:
 22 |         persist-credentials: false
 23 |     - name: Install SSH Client
 24 |       uses: webfactory/ssh-agent@v0.7.0
 25 |       with:
 26 |         ssh-private-key: ${{ secrets.DEPLOY_KEY }}
 27 | 
 28 |     - name: Install R
 29 |       uses: r-lib/actions/setup-r@v2
 30 |       with:
 31 |         r-version: 'release'
 32 | 
 33 |     - uses: r-lib/actions/setup-pandoc@v2
 34 | 
 35 |     - name: Query dependencies
 36 |       run: |
 37 |         install.packages('remotes')
 38 |         saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
 39 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
 40 |       shell: Rscript {0}
 41 | 
 42 |     - name: Cache R packages
 43 |       uses: actions/cache@v4
 44 |       with:
 45 |         path: ${{ env.R_LIBS_USER }}
 46 |         key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
 47 |         restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
 48 | 
 49 |     - name: Install system dependencies package
 50 |       run: |
 51 |         while read -r cmd
 52 |         do
 53 |           eval sudo $cmd
 54 |         done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
 55 | 
 56 |     - name: Install system dependencies pkgdown
 57 |       run: |
 58 |         while read -r cmd
 59 |         do
 60 |           eval sudo $cmd
 61 |         done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04", package="pkgdown"))')
 62 | 
 63 |     - name: Install additional system dependencies for pkgdown
 64 |       run: sudo apt-get install libharfbuzz-dev libfribidi-dev
 65 | 
 66 |     - name: Install dependencies
 67 |       run: |
 68 |         remotes::install_deps(dependencies = TRUE)
 69 |         remotes::install_cran("pkgdown")
 70 |       shell: Rscript {0}
 71 | 
 72 |     - name: Install package
 73 |       run: R CMD INSTALL .
 74 | 
 75 |     - name: Build docu with pkgdown
 76 |       run: Rscript -e 'pkgdown::build_site(new_process = FALSE)'
 77 | 
 78 |     - name: Deploy to stable
 79 |       if: ${{ github.event_name == 'workflow_dispatch' }}
 80 |       uses: JamesIves/github-pages-deploy-action@v4
 81 |       with:
 82 |         repository-name: DoubleML/doubleml.github.io
 83 |         branch: main
 84 |         folder: docs/dev
 85 |         target-folder: r/stable
 86 |         git-config-name: DoubleML Deploy Bot
 87 |         git-config-email: DoubleML@users.noreply.github.com
 88 |         clean: true
 89 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
 90 | 
 91 |     - name: Get tag
 92 |       run: echo RELEASE_VERSION=${GITHUB_REF:10} >> $GITHUB_ENV
 93 | 
 94 |     - name: Deploy to stable (release)
 95 |       if: ${{ github.event_name == 'release' }}
 96 |       uses: JamesIves/github-pages-deploy-action@v4
 97 |       with:
 98 |         repository-name: DoubleML/doubleml.github.io
 99 |         branch: main
100 |         folder: docs/dev
101 |         target-folder: r/stable
102 |         git-config-name: DoubleML Deploy Bot
103 |         git-config-email: DoubleML@users.noreply.github.com
104 |         clean: true
105 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
106 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_docu_dev.yml:
--------------------------------------------------------------------------------
  1 | # Workflow based on https://github.com/r-lib/actions/blob/master/examples/pkgdown.yaml
  2 | 
  3 | name: CI deploy documentation (dev)
  4 | 
  5 | on:
  6 |   push:
  7 |     branches:
  8 |       - main
  9 |   workflow_dispatch:
 10 |   release:
 11 |     types:
 12 |       - published
 13 | 
 14 | jobs:
 15 |   build:
 16 |     runs-on: ubuntu-22.04
 17 | 
 18 |     steps:
 19 |     - uses: actions/checkout@v4
 20 |       with:
 21 |         persist-credentials: false
 22 |     - name: Install SSH Client
 23 |       uses: webfactory/ssh-agent@v0.7.0
 24 |       with:
 25 |         ssh-private-key: ${{ secrets.DEPLOY_KEY }}
 26 | 
 27 |     - name: Install R
 28 |       uses: r-lib/actions/setup-r@v2
 29 |       with:
 30 |         r-version: 'release'
 31 | 
 32 |     - uses: r-lib/actions/setup-pandoc@v2
 33 | 
 34 |     - name: Query dependencies
 35 |       run: |
 36 |         install.packages('remotes')
 37 |         saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
 38 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
 39 |       shell: Rscript {0}
 40 | 
 41 |     - name: Cache R packages
 42 |       uses: actions/cache@v4
 43 |       with:
 44 |         path: ${{ env.R_LIBS_USER }}
 45 |         key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
 46 |         restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
 47 | 
 48 |     - name: Install system dependencies package
 49 |       run: |
 50 |         while read -r cmd
 51 |         do
 52 |           eval sudo $cmd
 53 |         done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
 54 | 
 55 |     - name: Install system dependencies pkgdown
 56 |       run: |
 57 |         while read -r cmd
 58 |         do
 59 |           eval sudo $cmd
 60 |         done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04", package="pkgdown"))')
 61 | 
 62 |     - name: Install additional system dependencies for pkgdown
 63 |       run: sudo apt-get install libharfbuzz-dev libfribidi-dev
 64 | 
 65 |     - name: Install dependencies
 66 |       run: |
 67 |         remotes::install_deps(dependencies = TRUE)
 68 |         remotes::install_cran("pkgdown")
 69 |       shell: Rscript {0}
 70 | 
 71 |     - name: Install package
 72 |       run: R CMD INSTALL .
 73 | 
 74 |     - name: Build docu with pkgdown
 75 |       run: Rscript -e 'pkgdown::build_site(new_process = FALSE)'
 76 | 
 77 |     - name: Deploy to dev
 78 |       if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'push' }}
 79 |       uses: JamesIves/github-pages-deploy-action@v4
 80 |       with:
 81 |         repository-name: DoubleML/doubleml.github.io
 82 |         branch: main
 83 |         folder: docs/dev
 84 |         target-folder: r/dev
 85 |         git-config-name: DoubleML Deploy Bot
 86 |         git-config-email: DoubleML@users.noreply.github.com
 87 |         clean: true
 88 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
 89 | 
 90 |     - name: Get tag
 91 |       run: echo RELEASE_VERSION=${GITHUB_REF:10} >> $GITHUB_ENV
 92 | 
 93 |     - name: Deploy to version
 94 |       if: ${{ github.event_name == 'release' }}
 95 |       uses: JamesIves/github-pages-deploy-action@v4
 96 |       with:
 97 |         repository-name: DoubleML/doubleml.github.io
 98 |         branch: main
 99 |         folder: docs/dev
100 |         target-folder: r/${{env.RELEASE_VERSION}}
101 |         git-config-name: DoubleML Deploy Bot
102 |         git-config-email: DoubleML@users.noreply.github.com
103 |         clean: true
104 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
105 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_pkg.yml:
--------------------------------------------------------------------------------
 1 | # Workflow based on https://github.com/r-lib/actions/tree/master/examples#standard-ci-workflow
 2 | name: CI deploy package
 3 | 
 4 | on:
 5 |   release:
 6 |     types:
 7 |       - published
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-22.04
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 | 
17 |       - uses: r-lib/actions/setup-r@v2
18 |         with:
19 |           r-version: 'release'
20 | 
21 |       - uses: r-lib/actions/setup-pandoc@v2
22 | 
23 |       - name: Query dependencies
24 |         run: |
25 |           install.packages('remotes')
26 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
27 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
28 |         shell: Rscript {0}
29 | 
30 |       - name: Cache R packages
31 |         uses: actions/cache@v4
32 |         with:
33 |           path: ${{ env.R_LIBS_USER }}
34 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
35 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
36 | 
37 |       - name: Install system dependencies
38 |         run: |
39 |           while read -r cmd
40 |           do
41 |             eval sudo $cmd
42 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
43 | 
44 |       - name: Install dependencies
45 |         run: |
46 |           remotes::install_deps(dependencies = TRUE)
47 |         shell: Rscript {0}
48 | 
49 |       - name: Build package
50 |         run: |
51 |           R CMD build .
52 | 
53 |       - uses: actions/upload-artifact@v4
54 |         with:
55 |           name: DoubleML-pkg
56 |           path: DoubleML_*.tar.gz
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | # History files
 3 | .Rhistory
 4 | .Rapp.history
 5 | 
 6 | # Session Data files
 7 | .RData
 8 | # User-specific files
 9 | .Ruserdata
10 | # Example code in package build process
11 | *-Ex.R
12 | # Output files from R CMD build
13 | /*.tar.gz
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | # RStudio files
17 | .Rproj.user/
18 | 
19 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
20 | .httr-oauth
21 | # knitr and R markdown default cache directories
22 | /*_cache/
23 | /cache/
24 | # Temporary files created by R markdown
25 | *.utf8.md
26 | *.knit.md
27 | 
28 | DoubleML.Rproj
29 | .Rproj.user
30 | 
31 | *.tar.gz
32 | *.Rcheck
33 | 
34 | # documentation files
35 | docs/
36 | docs
37 | doc
38 | inst/doc
39 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: DoubleML
 2 | Type: Package
 3 | Title: Double Machine Learning in R
 4 | Version: 1.0.1.9000
 5 | Authors@R: c(
 6 |     person("Philipp", "Bach", email = "philipp.bach@uni-hamburg.de", role=c("aut", "cre")),
 7 |     person("Victor", "Chernozhukov", role="aut"),
 8 |     person("Malte S.", "Kurz", email = "mkurz-software@gmx.de", role="aut"),
 9 |     person("Martin", "Spindler", email="martin.spindler@gmx.de", role="aut"),
10 |     person("Klaassen", "Sven", email="sven.klaassen@uni-hamburg.de", role="aut"))
11 | Description: Implementation of the double/debiased machine learning framework of
12 |     Chernozhukov et al. (2018) <doi:10.1111/ectj.12097> for partially linear
13 |     regression models, partially linear instrumental variable regression models,
14 |     interactive regression models and interactive instrumental variable
15 |     regression models. 'DoubleML' allows estimation of the nuisance parts in
16 |     these models by machine learning methods and computation of the Neyman
17 |     orthogonal score functions. 'DoubleML' is built on top of 'mlr3' and the
18 |     'mlr3' ecosystem. The object-oriented implementation of 'DoubleML' based on
19 |     the 'R6' package is very flexible. More information available in the
20 |     publication in the Journal of Statistical Software: <doi:10.18637/jss.v108.i03>.
21 | License: MIT + file LICENSE
22 | URL: https://docs.doubleml.org/stable/index.html, https://github.com/DoubleML/doubleml-for-r/
23 | BugReports: https://github.com/DoubleML/doubleml-for-r/issues
24 | Encoding: UTF-8
25 | Depends: 
26 |     R (>= 3.5.0)
27 | Imports: 
28 |     R6 (>= 2.4.1),
29 |     data.table (>= 1.12.8),
30 |     stats,
31 |     checkmate,
32 |     mlr3 (>= 0.5.0),
33 |     mlr3tuning (>= 0.3.0),
34 |     mvtnorm,
35 |     utils,
36 |     clusterGeneration,
37 |     readstata13,
38 |     mlr3learners (>= 0.3.0),
39 |     mlr3misc
40 | Roxygen: list(markdown = TRUE, r6 = TRUE)
41 | RoxygenNote: 7.3.2
42 | Suggests: 
43 |     knitr,
44 |     rmarkdown,
45 |     testthat,
46 |     covr,
47 |     patrick (>= 0.1.0),
48 |     paradox (>= 0.4.0),
49 |     dplyr,
50 |     glmnet,
51 |     lgr,
52 |     ranger,
53 |     sandwich,
54 |     AER,
55 |     rpart,
56 |     bbotk,
57 |     mlr3pipelines
58 | VignetteBuilder: knitr
59 | Collate:
60 |     'double_ml.R'
61 |     'double_ml_data.R'
62 |     'double_ml_ssm.R'
63 |     'double_ml_iivm.R'
64 |     'double_ml_irm.R'
65 |     'double_ml_pliv.R'
66 |     'double_ml_plr.R'
67 |     'helper.R'
68 |     'datasets.R'
69 |     'zzz.R'
70 | 


--------------------------------------------------------------------------------
/DoubleML.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageCheckArgs: --as-cran --no-tests --run-donttest
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2019-2023
2 | COPYRIGHT HOLDER: Philipp Bach, Victor Chernozhukov, Malte S. Kurz, Martin Spindler, Sven Klaassen


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(DoubleMLClusterData)
 4 | export(DoubleMLData)
 5 | export(DoubleMLIIVM)
 6 | export(DoubleMLIRM)
 7 | export(DoubleMLPLIV)
 8 | export(DoubleMLPLR)
 9 | export(DoubleMLSSM)
10 | export(double_ml_data_from_data_frame)
11 | export(double_ml_data_from_matrix)
12 | export(fetch_401k)
13 | export(fetch_bonus)
14 | export(make_iivm_data)
15 | export(make_irm_data)
16 | export(make_pliv_CHS2015)
17 | export(make_pliv_multiway_cluster_CKMS2021)
18 | export(make_plr_CCDDHNR2018)
19 | export(make_plr_turrell2018)
20 | export(make_ssm_data)
21 | import(checkmate)
22 | importFrom(R6,R6Class)
23 | importFrom(clusterGeneration,genPositiveDefMat)
24 | importFrom(data.table,as.data.table)
25 | importFrom(data.table,data.table)
26 | importFrom(data.table,setnafill)
27 | importFrom(mlr3,Task)
28 | importFrom(mlr3,TaskClassif)
29 | importFrom(mlr3,TaskRegr)
30 | importFrom(mlr3,default_measures)
31 | importFrom(mlr3,lrn)
32 | importFrom(mlr3,msr)
33 | importFrom(mlr3,resample)
34 | importFrom(mlr3,rsmp)
35 | importFrom(mlr3learners,LearnerRegrLM)
36 | importFrom(mlr3misc,insert_named)
37 | importFrom(mlr3tuning,TuningInstanceBatchSingleCrit)
38 | importFrom(mlr3tuning,tnr)
39 | importFrom(mlr3tuning,trm)
40 | importFrom(mvtnorm,rmvnorm)
41 | importFrom(readstata13,read.dta13)
42 | importFrom(stats,formula)
43 | importFrom(stats,median)
44 | importFrom(stats,model.matrix)
45 | importFrom(stats,p.adjust)
46 | importFrom(stats,p.adjust.methods)
47 | importFrom(stats,pnorm)
48 | importFrom(stats,printCoefmat)
49 | importFrom(stats,qnorm)
50 | importFrom(stats,quantile)
51 | importFrom(stats,rexp)
52 | importFrom(stats,rnorm)
53 | importFrom(stats,runif)
54 | importFrom(stats,toeplitz)
55 | importFrom(utils,compareVersion)
56 | importFrom(utils,packageVersion)
57 | importFrom(utils,read.table)
58 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | #' @import checkmate
 2 | #' @importFrom R6 R6Class
 3 | #' @importFrom mlr3 lrn rsmp msr resample default_measures Task TaskRegr
 4 | #' TaskClassif
 5 | #' @importFrom mlr3tuning TuningInstanceBatchSingleCrit tnr trm
 6 | #' @importFrom mlr3learners LearnerRegrLM
 7 | #' @importFrom mlr3misc insert_named
 8 | #' @importFrom data.table data.table as.data.table setnafill
 9 | #' @importFrom readstata13 read.dta13
10 | #' @importFrom stats formula model.matrix rnorm runif rexp toeplitz pnorm qnorm
11 | #' printCoefmat quantile p.adjust.methods p.adjust median
12 | #' @importFrom mvtnorm rmvnorm
13 | #' @importFrom clusterGeneration genPositiveDefMat
14 | #' @importFrom utils read.table compareVersion packageVersion
15 | NULL
16 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | destination: docs
 2 | 
 3 | home:
 4 |   title: DoubleML - Double Machine Learning with R
 5 |   links:
 6 |   - text: Download
 7 |     href: https://github.com/DoubleML/doubleml-for-r/
 8 | 
 9 | development:
10 |   mode: devel
11 | 
12 | toc:
13 |   depth: 3
14 | 
15 | template:
16 |   bootstrap: 5
17 | 
18 | navbar:
19 |   structure:
20 |     left:  [home, userguide, getstarted, reference, articles]
21 |     right: [github]
22 |   components:
23 |     home: ~
24 |     userguide:
25 |       text: User Guide
26 |       icon: fa fa-file-alt
27 |       href: https://docs.doubleml.org/
28 |     reference:
29 |       text: Reference
30 |       href: reference/index.html
31 |     getstarted:
32 |       text: Get started
33 |       href: articles/getstarted.html
34 |     articles:
35 |       text: Articles
36 |       menu:
37 |         - text: Install DoubleML
38 |           href: articles/install.html
39 |         - text: Introduction to Double Machine Learning
40 |           href: articles/Introduction_to_DoubleML.html
41 |     news:
42 |       text: Release Notes
43 |       href: https://docs.doubleml.org/stable/release/release.html
44 | 
45 | reference:
46 |   - title: Double machine learning data class
47 |     contents:
48 |       - DoubleMLData
49 |       - DoubleMLClusterData
50 |       - double_ml_data_from_data_frame
51 |       - double_ml_data_from_matrix
52 |   - title: Abstract base class for double machine learning models
53 |     contents:
54 |       - DoubleML
55 |   - title: Double machine learning models
56 |     contents:
57 |       - DoubleMLPLR
58 |       - DoubleMLPLIV
59 |       - DoubleMLIRM
60 |       - DoubleMLIIVM
61 |       - DoubleMLSSM
62 |   - title: Datasets module
63 |     contents:
64 |       - fetch_401k
65 |       - fetch_bonus
66 |   - title: Datasets generators
67 |     contents:
68 |       - make_plr_CCDDHNR2018
69 |       - make_pliv_CHS2015
70 |       - make_irm_data
71 |       - make_iivm_data
72 |       - make_plr_turrell2018
73 |       - make_pliv_multiway_cluster_CKMS2021
74 |       - make_ssm_data
75 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | coverage:
3 |   status:
4 |     project: off
5 |     patch: off
6 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(bibtype = "Article",
 2 |   title        = "{DoubleML}: An Object-Oriented Implementation of Double Machine Learning in {R}",
 3 |   author       = c(person(given = "Philipp",
 4 |                           family = "Bach",
 5 |                           email = "philipp.bach@uni-hamburg.de"),
 6 |                    person(given = c("Malte", "S."),
 7 |                           family = "Kurz",
 8 |                           email = "malte.kurz@tum.de"),
 9 |                    person(given = "Victor",
10 |                           family = "Chernozhukov",
11 |                           email = "vchern@mit.edu"),
12 |                    person(given = "Martin",
13 |                           family = "Spindler",
14 |                           email = "martin.spindler@uni-hamburg.de"),
15 |                    person(given = "Sven",
16 |                           family = "Klaassen",
17 |                           email = "sven.klaassen@uni-hamburg.de")),
18 |   journal      = "Journal of Statistical Software",
19 |   year         = "2024",
20 |   volume       = "108",
21 |   number       = "3",
22 |   pages        = "1--56",
23 |   doi          = "10.18637/jss.v108.i03",
24 |   header       = "To cite DoubleML in publications use:"
25 | )
26 | 


--------------------------------------------------------------------------------
/man/double_ml_data_from_data_frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/double_ml_data.R
 3 | \name{double_ml_data_from_data_frame}
 4 | \alias{double_ml_data_from_data_frame}
 5 | \title{Wrapper for Double machine learning data-backend initialization from
 6 | data.frame.}
 7 | \usage{
 8 | double_ml_data_from_data_frame(
 9 |   df,
10 |   x_cols = NULL,
11 |   y_col = NULL,
12 |   d_cols = NULL,
13 |   z_cols = NULL,
14 |   s_col = NULL,
15 |   cluster_cols = NULL,
16 |   use_other_treat_as_covariate = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{df}{(\code{data.frame()})\cr
21 | Data object.}
22 | 
23 | \item{x_cols}{(\code{NULL}, \code{character()}) \cr
24 | The covariates. If \code{NULL}, all variables (columns of \code{data}) which are
25 | neither specified as outcome variable \code{y_col}, nor as treatment variables
26 | \code{d_cols}, nor as instrumental variables \code{z_cols} are used as covariates.
27 | Default is \code{NULL}.}
28 | 
29 | \item{y_col}{(\code{character(1)}) \cr
30 | The outcome variable.}
31 | 
32 | \item{d_cols}{(\code{character()}) \cr
33 | The treatment variable(s).}
34 | 
35 | \item{z_cols}{(\code{NULL}, \code{character()}) \cr
36 | The instrumental variables. Default is \code{NULL}.}
37 | 
38 | \item{s_col}{(\code{NULL}, \code{character()}) \cr
39 | The score or selection variable (only relevant/used for SSM Estimators). Default is \code{NULL}.}
40 | 
41 | \item{cluster_cols}{(\code{NULL}, \code{character()}) \cr
42 | The cluster variables. Default is \code{NULL}.}
43 | 
44 | \item{use_other_treat_as_covariate}{(\code{logical(1)}) \cr
45 | Indicates whether in the multiple-treatment case the other treatment
46 | variables should be added as covariates. Default is \code{TRUE}.}
47 | }
48 | \value{
49 | Creates a new instance of class \code{DoubleMLData}.
50 | }
51 | \description{
52 | Initalization of DoubleMLData from \code{data.frame}.
53 | }
54 | \examples{
55 | df = make_plr_CCDDHNR2018(return_type = "data.frame")
56 | x_names = names(df)[grepl("X", names(df))]
57 | obj_dml_data = double_ml_data_from_data_frame(
58 |   df = df, x_cols = x_names,
59 |   y_col = "y", d_cols = "d")
60 | # Input: Data frame, Output: DoubleMLData object
61 | }
62 | 


--------------------------------------------------------------------------------
/man/double_ml_data_from_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/double_ml_data.R
 3 | \name{double_ml_data_from_matrix}
 4 | \alias{double_ml_data_from_matrix}
 5 | \title{Wrapper for Double machine learning data-backend initialization
 6 | from matrix.}
 7 | \usage{
 8 | double_ml_data_from_matrix(
 9 |   X = NULL,
10 |   y,
11 |   d,
12 |   z = NULL,
13 |   s = NULL,
14 |   cluster_vars = NULL,
15 |   data_class = "DoubleMLData",
16 |   use_other_treat_as_covariate = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{X}{(\code{matrix()}) \cr
21 | Matrix of covariates.}
22 | 
23 | \item{y}{(\code{numeric()}) \cr
24 | Vector of outcome variable.}
25 | 
26 | \item{d}{(\code{matrix()}) \cr
27 | Matrix of treatment variables.}
28 | 
29 | \item{z}{(\code{matrix()}) \cr
30 | Matrix of instruments.}
31 | 
32 | \item{s}{(\code{numeric()}) \cr
33 | Vector of the score or selection variable (only relevant for SSM models).}
34 | 
35 | \item{cluster_vars}{(\code{matrix()}) \cr
36 | Matrix of cluster variables.}
37 | 
38 | \item{data_class}{(\code{character(1)}) \cr
39 | Class of returned object. By default, an object of class \code{DoubleMLData} is
40 | returned. Setting \code{data_class = "data.table"} returns an object of class
41 | \code{data.table}.}
42 | 
43 | \item{use_other_treat_as_covariate}{(\code{logical(1)}) \cr
44 | Indicates whether in the multiple-treatment case the other treatment
45 | variables should be added as covariates. Default is \code{TRUE}.}
46 | }
47 | \value{
48 | Creates a new instance of class \code{DoubleMLData}.
49 | }
50 | \description{
51 | Initalization of DoubleMLData from \code{matrix()} objects.
52 | }
53 | \examples{
54 | matrix_list = make_plr_CCDDHNR2018(return_type = "matrix")
55 | obj_dml_data = double_ml_data_from_matrix(
56 |   X = matrix_list$X,
57 |   y = matrix_list$y,
58 |   d = matrix_list$d)
59 | }
60 | 


--------------------------------------------------------------------------------
/man/fetch_401k.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{fetch_401k}
 4 | \alias{fetch_401k}
 5 | \title{Data set on financial wealth and 401(k) plan participation.}
 6 | \usage{
 7 | fetch_401k(
 8 |   return_type = "DoubleMLData",
 9 |   polynomial_features = FALSE,
10 |   instrument = FALSE
11 | )
12 | }
13 | \arguments{
14 | \item{return_type}{(\code{character(1)}) \cr
15 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
16 | If \code{"data.frame"} returns a \code{data.frame()}.
17 | If \code{"data.table"} returns a \code{data.table()}.
18 | Default is \code{"DoubleMLData"}.}
19 | 
20 | \item{polynomial_features}{(\code{logical(1)}) \cr
21 | If \code{TRUE} polynomial freatures are added
22 | (see replication file of Chernozhukov et al. (2018)).}
23 | 
24 | \item{instrument}{(\code{logical(1)}) \cr
25 | If \code{TRUE}, the returned data object contains the variables \code{e401} and \code{p401}.
26 | If \code{return_type = "DoubleMLData"}, the variable \code{e401} is used as an
27 | instrument for the endogenous treatment variable \code{p401}.
28 | If \code{FALSE}, \code{p401} is removed from the data set.}
29 | }
30 | \value{
31 | A data object according to the choice of \code{return_type}.
32 | }
33 | \description{
34 | Preprocessed data set on financial wealth and 401(k) plan participation.
35 | The raw data files are preprocessed to reproduce the examples in
36 | Chernozhukov et al. (2020).
37 | An internet connection is required to sucessfully download the data set.
38 | }
39 | \details{
40 | Variable description, based on the supplementary material of
41 | Chernozhukov et al. (2020):
42 | \itemize{
43 | \item net_tfa: net total financial assets
44 | \item e401: = 1 if employer offers 401(k)
45 | \item p401: = 1 if individual participates in a 401(k) plan
46 | \item age: age
47 | \item inc: income
48 | \item fsize: family size
49 | \item educ: years of education
50 | \item db: = 1 if individual has defined benefit pension
51 | \item marr: = 1 if married
52 | \item twoearn: = 1 if two-earner household
53 | \item pira: = 1 if individual participates in IRA plan
54 | \item hown: = 1 if home owner
55 | }
56 | 
57 | The supplementary data of the study by Chernozhukov et al. (2018) is
58 | available at
59 | \url{https://academic.oup.com/ectj/article/21/1/C1/5056401#supplementary-data}.
60 | }
61 | \references{
62 | Abadie, A. (2003), Semiparametric instrumental variable
63 | estimation of treatment response models.
64 | Journal of Econometrics, 113(2): 231-263.
65 | 
66 | Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E.,
67 | Hansen, C., Newey, W. and Robins, J. (2018), Double/debiased machine learning
68 | for treatment and structural parameters.
69 | The Econometrics Journal, 21: C1-C68. \doi{10.1111/ectj.12097}.
70 | }
71 | 


--------------------------------------------------------------------------------
/man/fetch_bonus.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/datasets.R
  3 | \name{fetch_bonus}
  4 | \alias{fetch_bonus}
  5 | \title{Data set on the Pennsylvania Reemployment Bonus experiment.}
  6 | \usage{
  7 | fetch_bonus(return_type = "DoubleMLData", polynomial_features = FALSE)
  8 | }
  9 | \arguments{
 10 | \item{return_type}{(\code{character(1)}) \cr
 11 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
 12 | If \code{"data.frame"} returns a \code{data.frame()}.
 13 | If \code{"data.table"} returns a \code{data.table()}. Default is \code{"DoubleMLData"}.}
 14 | 
 15 | \item{polynomial_features}{(\code{logical(1)}) \cr
 16 | If \code{TRUE} polynomial freatures are added (see replication file of
 17 | Chernozhukov et al. (2018)).}
 18 | }
 19 | \value{
 20 | A data object according to the choice of \code{return_type}.
 21 | }
 22 | \description{
 23 | Preprocessed data set on the Pennsylvania Reemploymnent Bonus experiment.
 24 | The raw data files are preprocessed to reproduce the examples in
 25 | Chernozhukov et al. (2020).
 26 | An internet connection is required to sucessfully download the data set.
 27 | }
 28 | \details{
 29 | Variable description, based on the supplementary material of
 30 | Chernozhukov et al. (2020):
 31 | \itemize{
 32 | \item abdt:  chronological time of enrollment of each claimant in the
 33 | Pennsylvania reemployment bonus experiment.
 34 | \item tg:  indicates the treatment group (bonus amount - qualification period)
 35 | of each claimant.
 36 | \item inuidur1:  a measure of length (in weeks) of the first spell of
 37 | unemployment
 38 | \item inuidur2:  a second measure for the length (in weeks) of
 39 | \item female:  dummy variable; it indicates if the claimant's sex is
 40 | female (=1) or male (=0).
 41 | \item black: dummy variable; it indicates a person of black race (=1).
 42 | \item hispanic:  dummy variable; it indicates a person of hispanic race (=1).
 43 | \item othrace: dummy variable; it indicates a non-white, non-black,
 44 | not-hispanic person (=1).
 45 | \item dep1: dummy variable; indicates if the number of dependents of each
 46 | claimant is equal to 1 (=1).
 47 | \item dep2: dummy variable; indicates if the number of dependents of each
 48 | claimant is equal to 2 (=1).
 49 | \item q1-q6: six dummy variables indicating the quarter of experiment during
 50 | which each claimant enrolled.
 51 | \item recall:  takes the value of 1 if the claimant answered ``yes'' when was
 52 | asked if he/she had any expectation to be recalled
 53 | \item agelt35: takes the value of 1 if the claimant's age is less than 35
 54 | and 0 otherwise.
 55 | \item agegt54: takes the value of 1 if the claimant's age is more than 54
 56 | and 0 otherwise.
 57 | \item durable: it takes the value of 1 if the occupation of the claimant was in
 58 | the sector of durable manufacturing and 0 otherwise.
 59 | \item nondurable:  it takes the value of 1 if the occupation of the claimant was
 60 | in the sector of nondurable manufacturing and 0 otherwise.
 61 | \item lusd:  it takes the value of 1 if the claimant filed in Coatesville,
 62 | Reading, or Lancaster and 0 otherwise.
 63 | \item These three sites were considered to be located in areas characterized by
 64 | low unemployment rate and short duration of unemployment.
 65 | \item husd:  it takes the value of 1 if the claimant filed in Lewistown,
 66 | Pittston, or Scranton and 0 otherwise.
 67 | \item These three sites were considered to be located in areas characterized by
 68 | high unemployment rate and short duration of unemployment.
 69 | \item muld:  it takes the value of 1 if the claimant filed in Philadelphia-North,
 70 | Philadelphia-Uptown, McKeesport, Erie, or Butler and 0 otherwise.
 71 | \item These three sites were considered to be located in areas characterized by
 72 | moderate unemployment rate and long duration of unemployment."
 73 | }
 74 | 
 75 | The supplementary data of the study by Chernozhukov et al. (2018) is
 76 | available at \url{https://academic.oup.com/ectj/article/21/1/C1/5056401#supplementary-data}.
 77 | 
 78 | The supplementary data of the study by Bilias (2000) is available at
 79 | \url{https://www.journaldata.zbw.eu/dataset/sequential-testing-of-duration-data-the-case-of-the-pennsylvania-reemployment-bonus-experiment}.
 80 | }
 81 | \examples{
 82 | library(DoubleML)
 83 | df_bonus = fetch_bonus(return_type = "data.table")
 84 | obj_dml_data_bonus = DoubleMLData$new(df_bonus,
 85 |   y_col = "inuidur1",
 86 |   d_cols = "tg",
 87 |   x_cols = c(
 88 |     "female", "black", "othrace", "dep1", "dep2",
 89 |     "q2", "q3", "q4", "q5", "q6", "agelt35", "agegt54",
 90 |     "durable", "lusd", "husd"
 91 |   )
 92 | )
 93 | obj_dml_data_bonus
 94 | }
 95 | \references{
 96 | Bilias Y. (2000), Sequential Testing of Duration Data:
 97 | The Case of Pennsylvania ‘Reemployment Bonus’ Experiment. Journal of Applied
 98 | Econometrics, 15(6): 575-594.
 99 | 
100 | Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E.,
101 | Hansen, C., Newey, W. and Robins, J. (2018), Double/debiased machine learning
102 | for treatment and structural parameters.
103 | The Econometrics Journal, 21: C1-C68. \doi{10.1111/ectj.12097}.
104 | }
105 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/man/figures/logo.png


--------------------------------------------------------------------------------
/man/make_iivm_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_iivm_data}
 4 | \alias{make_iivm_data}
 5 | \title{Generates data from a interactive IV regression (IIVM) model.}
 6 | \usage{
 7 | make_iivm_data(
 8 |   n_obs = 500,
 9 |   dim_x = 20,
10 |   theta = 1,
11 |   alpha_x = 0.2,
12 |   return_type = "DoubleMLData"
13 | )
14 | }
15 | \arguments{
16 | \item{n_obs}{(\code{integer(1)}) \cr
17 | The number of observations to simulate.}
18 | 
19 | \item{dim_x}{(\code{integer(1)}) \cr
20 | The number of covariates.}
21 | 
22 | \item{theta}{(\code{numeric(1)}) \cr
23 | The value of the causal parameter.}
24 | 
25 | \item{alpha_x}{(\code{numeric(1)}) \cr
26 | The value of the parameter \eqn{\alpha_x}.}
27 | 
28 | \item{return_type}{(\code{character(1)}) \cr
29 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
30 | If \code{"data.frame"} returns a \code{data.frame()}.
31 | If \code{"data.table"} returns a \code{data.table()}.
32 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y}, \code{d} and \code{z}
33 | is returned.
34 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLData"}.}
35 | }
36 | \description{
37 | Generates data from a interactive IV regression (IIVM) model.
38 | The data generating process is defined as
39 | 
40 | \eqn{d_i = 1\left\lbrace \alpha_x Z + v_i > 0 \right\rbrace,}
41 | 
42 | \eqn{y_i = \theta d_i + x_i' \beta + u_i,}
43 | 
44 | \eqn{Z \sim \textstyle{Bernoulli} (0.5)} and
45 | 
46 | \eqn{\left(\begin{array}{c} u_i \\ v_i \end{array} \right) \sim
47 | \mathcal{N}\left(0, \left(\begin{array}{cc} 1 & 0.3 \\ 0.3 & 1
48 | \end{array} \right) \right).}
49 | 
50 | The covariates :\eqn{x_i \sim \mathcal{N}(0, \Sigma)}, where  \eqn{\Sigma}
51 | is a matrix with entries
52 | \eqn{\Sigma_{kj} = 0.5^{|j-k|}} and \eqn{\beta} is a \code{dim_x}-vector with
53 | entries \eqn{\beta_j=\frac{1}{j^2}}.
54 | 
55 | The data generating process is inspired by a process used in the
56 | simulation experiment of Farbmacher, Gruber and Klaaßen (2020).
57 | }
58 | \references{
59 | Farbmacher, H., Guber, R. and Klaaßen, S. (2020).
60 | Instrument Validity Tests with Causal Forests.
61 | MEA Discussion Paper No. 13-2020.
62 | Available at SSRN:\doi{10.2139/ssrn.3619201}.
63 | }
64 | 


--------------------------------------------------------------------------------
/man/make_irm_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_irm_data}
 4 | \alias{make_irm_data}
 5 | \title{Generates data from a interactive regression (IRM) model.}
 6 | \usage{
 7 | make_irm_data(
 8 |   n_obs = 500,
 9 |   dim_x = 20,
10 |   theta = 0,
11 |   R2_d = 0.5,
12 |   R2_y = 0.5,
13 |   return_type = "DoubleMLData"
14 | )
15 | }
16 | \arguments{
17 | \item{n_obs}{(\code{integer(1)}) \cr
18 | The number of observations to simulate.}
19 | 
20 | \item{dim_x}{(\code{integer(1)}) \cr
21 | The number of covariates.}
22 | 
23 | \item{theta}{(\code{numeric(1)}) \cr
24 | The value of the causal parameter.}
25 | 
26 | \item{R2_d}{(\code{numeric(1)}) \cr
27 | The value of the parameter \eqn{R_d^2}.}
28 | 
29 | \item{R2_y}{(\code{numeric(1)}) \cr
30 | The value of the parameter \eqn{R_y^2}.}
31 | 
32 | \item{return_type}{(\code{character(1)}) \cr
33 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
34 | If \code{"data.frame"} returns a \code{data.frame()}.
35 | If \code{"data.table"} returns a \code{data.table()}.
36 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y}, \code{d} and \code{z}
37 | is returned.
38 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLData"}.}
39 | }
40 | \description{
41 | Generates data from a interactive regression (IRM) model.
42 | The data generating process is defined as
43 | 
44 | \eqn{d_i = 1\left\lbrace \frac{\exp(c_d x_i' \beta)}{1+\exp(c_d x_i' \beta)}
45 | > v_i \right\rbrace,}
46 | 
47 | \eqn{ y_i = \theta d_i + c_y x_i' \beta d_i + \zeta_i,}
48 | 
49 | with \eqn{v_i \sim \mathcal{U}(0,1)}, \eqn{\zeta_i \sim \mathcal{N}(0,1)}
50 | and covariates \eqn{x_i \sim \mathcal{N}(0, \Sigma)}, where \eqn{\Sigma}
51 | is a matrix with entries \eqn{\Sigma_{kj} = 0.5^{|j-k|}}.
52 | \eqn{\beta} is a \code{dim_x}-vector with entries \eqn{\beta_j = \frac{1}{j^2}}
53 | and the constancts \eqn{c_y} and \eqn{c_d} are given by
54 | 
55 | \eqn{ c_y = \sqrt{\frac{R_y^2}{(1-R_y^2) \beta' \Sigma \beta}},}
56 | 
57 | \eqn{c_d = \sqrt{\frac{(\pi^2 /3) R_d^2}{(1-R_d^2) \beta' \Sigma \beta}}.}
58 | 
59 | The data generating process is inspired by a process used in the simulation
60 | experiment (see Appendix P) of Belloni et al. (2017).
61 | }
62 | \references{
63 | Belloni, A., Chernozhukov, V., Fernández-Val, I. and
64 | Hansen, C. (2017). Program Evaluation and Causal Inference With
65 | High-Dimensional Data. Econometrica, 85: 233-298.
66 | }
67 | 


--------------------------------------------------------------------------------
/man/make_pliv_CHS2015.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_pliv_CHS2015}
 4 | \alias{make_pliv_CHS2015}
 5 | \title{Generates data from a partially linear IV regression model used in
 6 | Chernozhukov, Hansen and Spindler (2015).}
 7 | \usage{
 8 | make_pliv_CHS2015(
 9 |   n_obs,
10 |   alpha = 1,
11 |   dim_x = 200,
12 |   dim_z = 150,
13 |   return_type = "DoubleMLData"
14 | )
15 | }
16 | \arguments{
17 | \item{n_obs}{(\code{integer(1)}) \cr
18 | The number of observations to simulate.}
19 | 
20 | \item{alpha}{(\code{numeric(1)}) \cr
21 | The value of the causal parameter.}
22 | 
23 | \item{dim_x}{(\code{integer(1)}) \cr
24 | The number of covariates.}
25 | 
26 | \item{dim_z}{(\code{integer(1)}) \cr
27 | The number of instruments.}
28 | 
29 | \item{return_type}{(\code{character(1)}) \cr
30 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
31 | If \code{"data.frame"} returns a \code{data.frame()}.
32 | If \code{"data.table"} returns a \code{data.table()}.
33 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y}, \code{d} and
34 | \code{z} is returned.
35 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLData"}.}
36 | }
37 | \value{
38 | A data object according to the choice of \code{return_type}.
39 | }
40 | \description{
41 | Generates data from a partially linear IV regression model used in
42 | Chernozhukov, Hansen and Spindler (2015). The data generating process
43 | is defined as
44 | 
45 | \eqn{z_i = \Pi x_i + \zeta_i,}
46 | 
47 | \eqn{d_i = x_i'\gamma + z_i'\delta + u_i,}
48 | 
49 | \eqn{y_i = \alpha d_i + x_i'\beta + \epsilon_i,}
50 | 
51 | with
52 | 
53 | \eqn{\left(\begin{array}{c} \varepsilon_i \\ u_i \\ \zeta_i \\ x_i
54 | \end{array} \right)
55 | \sim \mathcal{N}\left(0,
56 | \left(\begin{array}{cccc} 1 & 0.6 & 0 & 0 \\ 0.6 & 1 & 0 & 0
57 | \\ 0 & 0 & 0.25 I_{p_n^z} & 0 \\ 0 & 0 & 0 & \Sigma \end{array}
58 | \right) \right)}
59 | 
60 | where \eqn{\Sigma} is a \eqn{p_n^x \times p_n^x} matrix with entries
61 | \eqn{\Sigma_{kj} = 0.5^{|j-k|}} and
62 | \eqn{I_{p_n^z}} is the \eqn{p^z_n \times p^z_n}
63 | identity matrix. \eqn{\beta=\gamma} iis a \eqn{p^x_n}-vector with entries
64 | \eqn{\beta_j = \frac{1}{j^2}}, \eqn{\delta} is a \eqn{p^z_n}-vector with
65 | entries \eqn{\delta_j = \frac{1}{j^2}} and
66 | \eqn{\Pi = (I_{p_n^z}, O_{p_n^z \times (p_n^x - p_n^z)})}.
67 | }
68 | \references{
69 | Chernozhukov, V., Hansen, C. and Spindler, M. (2015),
70 | Post-Selection and Post-Regularization Inference in Linear Models with
71 | Many Controls and Instruments.
72 | American Economic Review: Papers and Proceedings, 105 (5): 486-90.
73 | }
74 | 


--------------------------------------------------------------------------------
/man/make_pliv_multiway_cluster_CKMS2021.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_pliv_multiway_cluster_CKMS2021}
 4 | \alias{make_pliv_multiway_cluster_CKMS2021}
 5 | \title{Generates data from a partially linear IV regression model with
 6 | multiway cluster sample used in Chiang et al. (2021).}
 7 | \usage{
 8 | make_pliv_multiway_cluster_CKMS2021(
 9 |   N = 25,
10 |   M = 25,
11 |   dim_X = 100,
12 |   theta = 1,
13 |   return_type = "DoubleMLClusterData",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{N}{(\code{integer(1)}) \cr
19 | The number of observations (first dimension).}
20 | 
21 | \item{M}{(\code{integer(1)}) \cr
22 | The number of observations (second dimension).}
23 | 
24 | \item{dim_X}{(\code{integer(1)}) \cr
25 | The number of covariates.}
26 | 
27 | \item{theta}{(\code{numeric(1)}) \cr
28 | The value of the causal parameter.}
29 | 
30 | \item{return_type}{(\code{character(1)}) \cr
31 | If \code{"DoubleMLClusterData"}, returns a \code{DoubleMLClusterData} object.
32 | If \code{"data.frame"} returns a \code{data.frame()}.
33 | If \code{"data.table"} returns a \code{data.table()}.
34 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y}, \code{d}, \code{z} and
35 | \code{cluster_vars} is returned.
36 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLClusterData"}.}
37 | 
38 | \item{...}{Additional keyword arguments to set non-default values for the parameters
39 | \eqn{\pi_{10}=1.0},
40 | \eqn{\omega_X = \omega_{\varepsilon} = \omega_V = \omega_v = (0.25, 0.25)},
41 | \eqn{s_X = s_{\varepsilon v} = 0.25}, or the \eqn{p_x}-vectors
42 | \eqn{\zeta_0 = \pi_{20} = \xi_0} with default entries
43 | \eqn{\zeta_{0})_j = 0.5^j}.}
44 | }
45 | \value{
46 | A data object according to the choice of \code{return_type}.
47 | }
48 | \description{
49 | Generates data from a partially linear IV regression model with multiway
50 | cluster sample used in Chiang et al. (2021). The data generating process
51 | is defined as
52 | 
53 | \eqn{Z_{ij} = X_{ij}' \xi_0 + V_{ij},}
54 | 
55 | \eqn{D_{ij} = Z_{ij}' \pi_{10} + X_{ij}' \pi_{20} + v_{ij},}
56 | 
57 | \eqn{Y_{ij} = D_{ij} \theta + X_{ij}' \zeta_0 + \varepsilon_{ij},}
58 | 
59 | with
60 | 
61 | \eqn{X_{ij} = (1 - \omega_1^X - \omega_2^X) \alpha_{ij}^X
62 | + \omega_1^X \alpha_{i}^X + \omega_2^X \alpha_{j}^X,}
63 | 
64 | \eqn{\varepsilon_{ij} = (1 - \omega_1^\varepsilon - \omega_2^\varepsilon) \alpha_{ij}^\varepsilon
65 | + \omega_1^\varepsilon \alpha_{i}^\varepsilon + \omega_2^\varepsilon \alpha_{j}^\varepsilon,}
66 | 
67 | \eqn{v_{ij} = (1 - \omega_1^v - \omega_2^v) \alpha_{ij}^v
68 | + \omega_1^v \alpha_{i}^v + \omega_2^v \alpha_{j}^v,}
69 | 
70 | \eqn{V_{ij} = (1 - \omega_1^V - \omega_2^V) \alpha_{ij}^V
71 | + \omega_1^V \alpha_{i}^V + \omega_2^V \alpha_{j}^V,}
72 | 
73 | and \eqn{\alpha_{ij}^X, \alpha_{i}^X, \alpha_{j}^X \sim \mathcal{N}(0, \Sigma)}
74 | where \eqn{\Sigma} is a \eqn{p_x \times p_x} matrix with entries
75 | \eqn{\Sigma_{kj} = s_X^{|j-k|}}.
76 | 
77 | Further
78 | 
79 | \eqn{\left(\begin{array}{c} \alpha_{ij}^\varepsilon \\ \alpha_{ij}^v \end{array}\right),
80 | \left(\begin{array}{c} \alpha_{i}^\varepsilon \\ \alpha_{i}^v \end{array}\right),
81 | \left(\begin{array}{c} \alpha_{j}^\varepsilon \\ \alpha_{j}^v \end{array}\right)
82 | \sim \mathcal{N}\left(0, \left(\begin{array}{cc} 1 & s_{\varepsilon v} \\
83 | s_{\varepsilon v} & 1 \end{array}\right) \right)}
84 | 
85 | and \eqn{\alpha_{ij}^V, \alpha_{i}^V, \alpha_{j}^V \sim \mathcal{N}(0, 1)}.
86 | }
87 | \references{
88 | Chiang, H. D., Kato K., Ma, Y. and Sasaki, Y. (2021),
89 | Multiway Cluster Robust Double/Debiased Machine Learning,
90 | Journal of Business & Economic Statistics,
91 | \doi{10.1080/07350015.2021.1895815}, https://arxiv.org/abs/1909.03489.
92 | }
93 | 


--------------------------------------------------------------------------------
/man/make_plr_CCDDHNR2018.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_plr_CCDDHNR2018}
 4 | \alias{make_plr_CCDDHNR2018}
 5 | \title{Generates data from a partially linear regression model used in
 6 | Chernozhukov et al. (2018)}
 7 | \usage{
 8 | make_plr_CCDDHNR2018(
 9 |   n_obs = 500,
10 |   dim_x = 20,
11 |   alpha = 0.5,
12 |   return_type = "DoubleMLData"
13 | )
14 | }
15 | \arguments{
16 | \item{n_obs}{(\code{integer(1)}) \cr
17 | The number of observations to simulate.}
18 | 
19 | \item{dim_x}{(\code{integer(1)}) \cr
20 | The number of covariates.}
21 | 
22 | \item{alpha}{(\code{numeric(1)}) \cr
23 | The value of the causal parameter.}
24 | 
25 | \item{return_type}{(\code{character(1)}) \cr
26 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
27 | If \code{"data.frame"} returns a \code{data.frame()}.
28 | If \code{"data.table"} returns a \code{data.table()}.
29 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y} and \code{d} is returned.
30 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLData"}.}
31 | }
32 | \value{
33 | A data object according to the choice of \code{return_type}.
34 | }
35 | \description{
36 | Generates data from a partially linear regression model used in
37 | Chernozhukov et al. (2018) for Figure 1.
38 | The data generating process is defined as
39 | 
40 | \eqn{d_i = m_0(x_i) + s_1 v_i,}
41 | 
42 | \eqn{y_i = \alpha d_i + g_0(x_i) + s_2 \zeta_i,}
43 | 
44 | with \eqn{v_i \sim \mathcal{N}(0,1)} and
45 | \eqn{\zeta_i \sim \mathcal{N}(0,1),}.
46 | The covariates are distributed as \eqn{x_i \sim \mathcal{N}(0, \Sigma)},
47 | where  \eqn{\Sigma} is a matrix with entries \eqn{\Sigma_{kj} = 0.7^{|j-k|}}.
48 | The nuisance functions are given by
49 | 
50 | \eqn{m_0(x_i) = a_0 x_{i,1} + a_1 \frac{\exp(x_{i,3})}{1+\exp(x_{i,3})},}
51 | 
52 | \eqn{g_0(x_i) = b_0 \frac{\exp(x_{i,1})}{1+\exp(x_{i,1})} + b_1 x_{i,3},}
53 | 
54 | with \eqn{a_0=1}, \eqn{a_1=0.25}, \eqn{s_1=1}, \eqn{b_0=1}, \eqn{b_1=0.25},
55 | \eqn{s_2=1}.
56 | }
57 | \references{
58 | Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E.,
59 | Hansen, C., Newey, W. and Robins, J. (2018), Double/debiased machine learning
60 | for treatment and structural parameters.
61 | The Econometrics Journal, 21: C1-C68. \doi{10.1111/ectj.12097}.
62 | }
63 | 


--------------------------------------------------------------------------------
/man/make_plr_turrell2018.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_plr_turrell2018}
 4 | \alias{make_plr_turrell2018}
 5 | \title{Generates data from a partially linear regression model used in a blog
 6 | article by Turrell (2018).}
 7 | \usage{
 8 | make_plr_turrell2018(
 9 |   n_obs = 100,
10 |   dim_x = 20,
11 |   theta = 0.5,
12 |   return_type = "DoubleMLData",
13 |   nu = 0,
14 |   gamma = 1
15 | )
16 | }
17 | \arguments{
18 | \item{n_obs}{(\code{integer(1)}) \cr
19 | The number of observations to simulate.}
20 | 
21 | \item{dim_x}{(\code{integer(1)}) \cr
22 | The number of covariates.}
23 | 
24 | \item{theta}{(\code{numeric(1)}) \cr
25 | The value of the causal parameter.}
26 | 
27 | \item{return_type}{(\code{character(1)}) \cr
28 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
29 | If \code{"data.frame"} returns a \code{data.frame()}.
30 | If \code{"data.table"} returns a \code{data.table()}.
31 | If \code{"matrix"} a named \code{list()} with entries \code{X}, \code{y} and \code{d} is returned.
32 | Every entry in the list is a \code{matrix()} object.  Default is \code{"DoubleMLData"}.}
33 | 
34 | \item{nu}{(\code{numeric(1)}) \cr
35 | The value of the parameter \eqn{\nu}. Default is \code{0}.}
36 | 
37 | \item{gamma}{(\code{numeric(1)}) \cr
38 | The value of the parameter \eqn{\gamma}. Default is \code{1}.}
39 | }
40 | \value{
41 | A data object according to the choice of \code{return_type}.
42 | }
43 | \description{
44 | Generates data from a partially linear regression model used in a blog
45 | article by Turrell (2018). The data generating process is defined as
46 | 
47 | \eqn{d_i = m_0(x_i' b) + v_i,}
48 | 
49 | \eqn{y_i = \theta d_i + g_0(x_i' b) + u_i,}
50 | 
51 | with \eqn{v_i \sim \mathcal{N}(0,1)}, \eqn{u_i \sim \mathcal{N}(0,1)}, and
52 | covariates \eqn{x_i \sim \mathcal{N}(0, \Sigma)}, where  \eqn{\Sigma}
53 | is a random symmetric, positive-definite matrix generated with
54 | \code{\link[clusterGeneration:genPositiveDefMat]{clusterGeneration::genPositiveDefMat()}}. \eqn{b} is a vector with entries
55 | \eqn{b_j=\frac{1}{j}} and the nuisance functions are given by
56 | 
57 | \eqn{m_0(x_i) = \frac{1}{2 \pi}
58 | \frac{\sinh(\gamma)}{\cosh(\gamma) - \cos(x_i-\nu)},}
59 | 
60 | \eqn{g_0(x_i) = \sin(x_i)^2.}
61 | }
62 | \references{
63 | Turrell, A. (2018), Econometrics in Python part I - Double
64 | machine learning, Markov Wanderer: A blog on economics, science, coding and
65 | data.
66 | \url{https://aeturrell.com/blog/posts/econometrics-in-python-parti-ml/}.
67 | }
68 | 


--------------------------------------------------------------------------------
/man/make_ssm_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/datasets.R
 3 | \name{make_ssm_data}
 4 | \alias{make_ssm_data}
 5 | \title{Generates data from a sample selection model (SSM).}
 6 | \usage{
 7 | make_ssm_data(
 8 |   n_obs = 8000,
 9 |   dim_x = 100,
10 |   theta = 1,
11 |   mar = TRUE,
12 |   return_type = "DoubleMLData"
13 | )
14 | }
15 | \arguments{
16 | \item{n_obs}{(\code{integer(1)}) \cr
17 | The number of observations to simulate.}
18 | 
19 | \item{dim_x}{(\code{integer(1)}) \cr
20 | The number of covariates.}
21 | 
22 | \item{theta}{(\code{numeric(1)}) \cr
23 | The value of the causal parameter.}
24 | 
25 | \item{mar}{(\code{logical(1)}) \cr
26 | Indicates whether missingness at random holds.}
27 | 
28 | \item{return_type}{(\code{character(1)}) \cr
29 | If \code{"DoubleMLData"}, returns a \code{DoubleMLData} object.
30 | If \code{"data.frame"} returns a \code{data.frame()}.
31 | If \code{"data.table"} returns a \code{data.table()}.
32 | Default is \code{"DoubleMLData"}.}
33 | }
34 | \value{
35 | Depending on the \code{return_type}, returns an object or set of objects as specified.
36 | }
37 | \description{
38 | The data generating process is defined as:
39 | }
40 | \details{
41 | \deqn{
42 | y_i = \theta d_i + x_i' \beta  + u_i,}
43 | 
44 | \deqn{s_i = 1\lbrace d_i + \gamma z_i + x_i' \beta  + v_i > 0 \rbrace,}
45 | 
46 | \deqn{d_i = 1\lbrace x_i' \beta  + w_i > 0 \rbrace,}
47 | 
48 | with \eqn{y_i} being observed if \eqn{s_i = 1} and covariates \eqn{x_i \sim \mathcal{N}(0, \Sigma^2_x)}, where
49 | \eqn{\Sigma^2_x} is a matrix with entries
50 | \eqn{\Sigma_{kj} = 0.5^{|j-k|}}.
51 | \eqn{\beta} is a \code{dim_x}-vector with entries \eqn{\beta_j=\frac{0.4}{j^2}}
52 | \eqn{z_i \sim \mathcal{N}(0, 1)},
53 | \eqn{(u_i,v_i) \sim \mathcal{N}(0, \Sigma^2_{u,v})},
54 | \eqn{w_i \sim \mathcal{N}(0, 1)}.
55 | 
56 | The data generating process is inspired by a process used in the simulation study (see Appendix E) of Bia,
57 | Huber and Lafférs (2023).
58 | }
59 | \references{
60 | Michela Bia, Martin Huber & Lukáš Lafférs (2023) Double Machine Learning for Sample Selection Models,
61 | Journal of Business & Economic Statistics, DOI: 10.1080/07350015.2023.2271071
62 | }
63 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-for-r/e8227c8ac99f8f888af1759028219fb604bcfa14/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/tests/testthat/helper-01-helper_functions.R:
--------------------------------------------------------------------------------
 1 | se_repeated = function(se_s, coefficients, theta_s) {
 2 |   se = sqrt(stats::median(se_s^2 + (theta_s - coefficients)^2))
 3 |   return(se)
 4 | }
 5 | 
 6 | 
 7 | sample_splitting = function(k, data) {
 8 | 
 9 |   resampling = mlr3::ResamplingCV$new()
10 |   resampling$param_set$values$folds = k
11 | 
12 |   dummy_task = mlr3::Task$new("dummy_resampling", "regr", data)
13 |   resampling = resampling$instantiate(dummy_task)
14 | 
15 |   n_iters = resampling$iters
16 |   train_ids = lapply(1:n_iters, function(x) resampling$train_set(x))
17 |   test_ids = lapply(1:n_iters, function(x) resampling$test_set(x))
18 | 
19 |   return(list(train_ids = train_ids, test_ids = test_ids))
20 | }
21 | 
22 | 
23 | draw_bootstrap_weights = function(bootstrap, n_rep_boot, n_obs) {
24 |   if (bootstrap == "Bayes") {
25 |     weights = stats::rexp(n_rep_boot * n_obs, rate = 1) - 1
26 |   } else if (bootstrap == "normal") {
27 |     weights = stats::rnorm(n_rep_boot * n_obs)
28 |   } else if (bootstrap == "wild") {
29 |     weights = stats::rnorm(n_rep_boot * n_obs) / sqrt(2) + (stats::rnorm(n_rep_boot * n_obs)^2 - 1) / 2
30 |   } else {
31 |     stop("invalid boot method")
32 |   }
33 |   weights = matrix(weights, nrow = n_rep_boot, ncol = n_obs, byrow = TRUE)
34 | 
35 |   return(weights)
36 | }
37 | 
38 | 
39 | functional_bootstrap = function(theta, se, psi, psi_a, k, smpls,
40 |   n_rep_boot, weights) {
41 |   score = psi
42 |   J = mean(psi_a)
43 |   boot_coef = matrix(NA_real_, nrow = 1, ncol = n_rep_boot)
44 |   boot_t_stat = matrix(NA_real_, nrow = 1, ncol = n_rep_boot)
45 |   for (i in seq(n_rep_boot)) {
46 |     boot_coef[1, i] = mean(weights[i, ] * 1 / J * score)
47 |     boot_t_stat[1, i] = boot_coef[1, i] / se
48 |   }
49 | 
50 |   res = list(boot_coef = boot_coef, boot_t_stat = boot_t_stat)
51 |   return(res)
52 | }
53 | 
54 | trim_vec = function(values, trimming_threshold) {
55 |   if (trimming_threshold > 0) {
56 |     values[values < trimming_threshold] = trimming_threshold
57 |     values[values > 1 - trimming_threshold] = 1 - trimming_threshold
58 |   }
59 |   return(values)
60 | }
61 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-04-simdata.R:
--------------------------------------------------------------------------------
  1 | # simulate data sets
  2 | setting = list(theta = 0.5, n = 1000, p = 20)
  3 | 
  4 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  5 | if (on_cran) {
  6 |   setting_irm = list(theta = 0.5, n = 1000, p = 20)
  7 | } else {
  8 |   setting_irm = list(theta = 0.5, n = 5000, p = 20)
  9 | }
 10 | 
 11 | setting_pliv_partial = list(theta = 1.0, n = 500)
 12 | 
 13 | set.seed(1282)
 14 | df = dgp1_plr(
 15 |   setting$theta,
 16 |   setting$n,
 17 |   setting$p
 18 | )
 19 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 20 | dml_data = double_ml_data_from_data_frame(df,
 21 |   y_col = "y",
 22 |   d_cols = "d", x_cols = Xnames
 23 | )
 24 | data_plr = list(
 25 |   df = df,
 26 |   dml_data = dml_data
 27 | )
 28 | 
 29 | set.seed(1282)
 30 | df = dgp1_iv(
 31 |   setting$theta,
 32 |   setting$n,
 33 |   setting$p
 34 | )
 35 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE] # note that Xnames includes z2
 36 | dml_data = double_ml_data_from_data_frame(df,
 37 |   y_col = "y",
 38 |   d_cols = "d", x_cols = Xnames, z_cols = "z"
 39 | )
 40 | data_pliv = list(
 41 |   df = df,
 42 |   dml_data = dml_data
 43 | )
 44 | 
 45 | set.seed(1282)
 46 | df = dgp1_irm(
 47 |   setting_irm$theta,
 48 |   setting_irm$n,
 49 |   setting_irm$p
 50 | )
 51 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 52 | dml_data = double_ml_data_from_data_frame(df,
 53 |   y_col = "y",
 54 |   d_cols = "d", x_cols = Xnames
 55 | )
 56 | data_irm = list(
 57 |   df = df,
 58 |   dml_data = dml_data
 59 | )
 60 | 
 61 | set.seed(1282)
 62 | df = dgp1_irm_binary(
 63 |   setting_irm$theta,
 64 |   setting_irm$n,
 65 |   setting_irm$p
 66 | )
 67 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 68 | dml_data = double_ml_data_from_data_frame(df,
 69 |   y_col = "y",
 70 |   d_cols = "d", x_cols = Xnames
 71 | )
 72 | data_irm_binary = list(
 73 |   df = df,
 74 |   dml_data = dml_data
 75 | )
 76 | 
 77 | set.seed(1282)
 78 | df = dgp1_irmiv(
 79 |   setting$theta,
 80 |   setting$n,
 81 |   setting$p
 82 | )
 83 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 84 | dml_data = double_ml_data_from_data_frame(df,
 85 |   y_col = "y",
 86 |   d_cols = "d", x_cols = Xnames, z_col = "z"
 87 | )
 88 | data_iivm = list(
 89 |   df = df,
 90 |   dml_data = dml_data
 91 | )
 92 | 
 93 | set.seed(1282)
 94 | df = dgp1_irmiv_binary(
 95 |   setting$theta,
 96 |   setting$n,
 97 |   setting$p
 98 | )
 99 | Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
100 | dml_data = double_ml_data_from_data_frame(df,
101 |   y_col = "y",
102 |   d_cols = "d", x_cols = Xnames, z_col = "z"
103 | )
104 | data_iivm_binary = list(
105 |   df = df,
106 |   dml_data = dml_data
107 | )
108 | 
109 | set.seed(1282)
110 | data_plr_multi = dgp1_toeplitz(
111 |   setting$n,
112 |   setting$p
113 | )
114 | 
115 | set.seed(1282)
116 | dim_z = 150
117 | df = make_pliv_CHS2015(
118 |   setting$n,
119 |   alpha = setting$theta,
120 |   dim_z = dim_z,
121 |   return_type = "data.frame"
122 | )
123 | Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE]
124 | dml_data = double_ml_data_from_data_frame(df,
125 |   y_col = "y",
126 |   d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)
127 | )
128 | data_pliv_partialXZ = list(
129 |   df = df,
130 |   dml_data = dml_data
131 | )
132 | 
133 | set.seed(1282)
134 | dim_z = 5
135 | df = make_pliv_CHS2015(
136 |   setting$n,
137 |   alpha = setting$theta,
138 |   dim_z = dim_z,
139 |   return_type = "data.frame"
140 | )
141 | Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE]
142 | dml_data = double_ml_data_from_data_frame(df,
143 |   y_col = "y",
144 |   d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)
145 | )
146 | data_pliv_partialX = list(
147 |   df = df,
148 |   dml_data = dml_data
149 | )
150 | 
151 | set.seed(1282)
152 | dim_z = 150
153 | df = make_data_pliv_partialZ(
154 |   setting$n,
155 |   alpha = setting$theta,
156 |   dim_x = 5
157 | )
158 | Xnames = names(df)[names(df) %in% c("y", "d", paste0("Z", 1:dim_z)) == FALSE]
159 | dml_data = double_ml_data_from_data_frame(df,
160 |   y_col = "y",
161 |   d_cols = "d", x_cols = Xnames, z_cols = paste0("Z", 1:dim_z)
162 | )
163 | data_pliv_partialZ = list(
164 |   df = df,
165 |   dml_data = dml_data
166 | )
167 | 
168 | set.seed(1282)
169 | df = make_data_ssm(
170 |   setting$n,
171 |   setting$p,
172 |   setting$theta,
173 |   mar = TRUE)
174 | Xnames = names(df)[names(df) %in% c("y", "d", "s") == FALSE]
175 | dml_data = double_ml_data_from_data_frame(df,
176 |   y_col = "y",
177 |   d_cols = "d", s_col = "s", x_cols = Xnames
178 | )
179 | data_ssm_mar = list(
180 |   df = df,
181 |   dml_data = dml_data
182 | )
183 | 
184 | set.seed(1282)
185 | df = make_data_ssm(
186 |   setting$n,
187 |   setting$p,
188 |   setting$theta,
189 |   mar = FALSE)
190 | Xnames = names(df)[names(df) %in% c("y", "d", "z", "s") == FALSE]
191 | dml_data = double_ml_data_from_data_frame(df,
192 |   y_col = "y",
193 |   d_cols = "d", z_cols = "z", s_col = "s", x_cols = Xnames
194 | )
195 | data_ssm_nonignorable = list(
196 |   df = df,
197 |   dml_data = dml_data
198 | )
199 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-12-p_adjust.R:
--------------------------------------------------------------------------------
  1 | #' Multiple Testing Adjustment of p-values for S3 objects \code{DML}
  2 | #'
  3 | #' Multiple hypotheses testing adjustment of p-values for double machine learning.
  4 | #'
  5 | #' Multiple testing adjustment is performed for S3 objects of class
  6 | #' \code{DML}. Implemented methods for multiple testing
  7 | #' adjustment are Romano-Wolf stepdown '\code{RW}' (default) and the adjustment
  8 | #' methods available in the \code{p.adjust} function of the \code{stats} package,
  9 | #' including the Bonferroni, Bonferroni-Holm, and Benjamini-Hochberg corrections,
 10 | #' see \code{\link{p.adjust.methods}}.
 11 | #'
 12 | #' Objects of class \code{DML} are constructed by
 13 | #' \code{\link{DML}}.
 14 | #'
 15 | #' @param x an object of S3 class \code{DML}.
 16 | #' @param method the method of p-value adjustment for multiple testing.
 17 | #'  Romano-Wolf stepdown ('\code{RW}') is chosen by default.
 18 | #' @param ... further arguments passed on to methods.
 19 | #' @rdname p_adjust
 20 | #' @aliases p_adjust.DML
 21 | #' @return A matrix with the estimated coefficients and the p-values that are
 22 | #'  adjusted according to the specified method.
 23 | #' @references J.P. Romano, M. Wolf (2005). Exact and approximate stepdown
 24 | #'  methods for multiple hypothesis testing. Journal of the American Statistical
 25 | #'  Association, 100(469), 94-108.
 26 | #' @references J.P. Romano, M. Wolf (2016). Efficient computation of adjusted
 27 | #'  p-values for resampling-based stepdown multiple testing. Statistics and
 28 | #'  Probability Letters, (113), 38-40.
 29 | #'
 30 | #' @export
 31 | 
 32 | p_adjust = function(x, ...) {
 33 |   UseMethod("p_adjust")
 34 | }
 35 | 
 36 | 
 37 | #' @describeIn p_adjust
 38 | #' @export
 39 | #'
 40 | p_adjust.DML = function(x, method = "RW", ...) {
 41 | 
 42 |   checkmate::checkClass(x, "DML")
 43 |   checkmate::checkChoice(method, c("RW", stats::p.adjust.methods))
 44 | 
 45 |   if (all(is.na(x$boot_theta))) {
 46 |     message("Note: Multiplier bootstrap is not active in DML and cannot be used
 47 |     for p-value adjustment.")
 48 |   }
 49 | 
 50 |   # n = x$samplesize
 51 |   B = ncol(x$boot_theta)
 52 |   k = length(x$coefficients)
 53 |   cf = x$coefficients
 54 |   se = x$se
 55 |   n = x$samplesize
 56 | 
 57 |   pinit = corr.padj = pval = vector(mode = "numeric", length = k)
 58 | 
 59 |   if (is.element(method, stats::p.adjust.methods)) {
 60 |     pval = stats::p.adjust(x$pval, method = method, n = k)
 61 |   }
 62 | 
 63 |   if (method == "RW") {
 64 | 
 65 | 
 66 |     # e = x$residuals$e
 67 |     # v = x$residuals$v
 68 |     # ev = e * v
 69 |     # Ev2 = colMeans(v^2)
 70 |     # Omegahat = matrix(NA_real_, ncol = k, nrow = k)
 71 |     # for (j in 1:k) {
 72 |     #   for (l in 1:k) {
 73 |     #     Omegahat[j, l] = Omegahat[l, j] = 1/(Ev2[j] * Ev2[l]) * mean(ev[, j] * ev[, l])
 74 |     #   }
 75 |     # }
 76 |     # se = sqrt(diag(Omegahat))
 77 |     #
 78 |     # Beta_i = matrix(NA_real_, ncol = k, nrow = B)
 79 |     # for (i in 1:B) {
 80 |     #   Beta_i[i, ] = MASS::mvrnorm(mu = rep(0, k), Sigma = Omegahat/n)
 81 |     # }
 82 | 
 83 |     tstats = cf / se
 84 |     stepdown.index = order(abs(tstats), decreasing = TRUE)
 85 |     ro = order(stepdown.index)
 86 |     Beta_i = x$boot_theta
 87 | 
 88 |     for (s in 1:k) {
 89 |       if (s == 1) {
 90 |         sim = apply(abs(Beta_i), 2, max)
 91 |         pinit[s] = pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
 92 |       }
 93 |       if (s > 1) {
 94 |         sim = apply(abs(Beta_i[-stepdown.index[1:(s - 1)], , drop = F]), 2, max)
 95 | 
 96 |         pinit[s] = pmin(1, (sum(sim >= abs(tstats[stepdown.index][s]))) / B)
 97 |       }
 98 | 
 99 |       for (j in 1:k) {
100 |         if (j == 1) {
101 |           corr.padj[j] = pinit[j]
102 |         }
103 | 
104 |         if (j > 1) {
105 |           corr.padj[j] = max(pinit[j], corr.padj[j - 1])
106 |         }
107 |       }
108 |       pval = corr.padj[ro]
109 | 
110 |     }
111 |   }
112 | 
113 |   res = as.matrix(cbind(cf, pval))
114 |   colnames(res) = c("Estimate.", "pval")
115 | 
116 |   return(res)
117 | }
118 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-14-dml_pliv_partial_z.R:
--------------------------------------------------------------------------------
  1 | dml_pliv_partial_z = function(data, y, d, z,
  2 |   n_folds,
  3 |   ml_r,
  4 |   dml_procedure, score,
  5 |   n_rep = 1, smpls = NULL,
  6 |   params_r = NULL) {
  7 | 
  8 |   if (is.null(smpls)) {
  9 |     smpls = lapply(1:n_rep, function(x) sample_splitting(n_folds, data))
 10 |   }
 11 | 
 12 |   all_thetas = all_ses = rep(NA_real_, n_rep)
 13 |   all_preds = list()
 14 | 
 15 |   for (i_rep in 1:n_rep) {
 16 |     this_smpl = smpls[[i_rep]]
 17 | 
 18 |     all_preds[[i_rep]] = fit_nuisance_pliv_partial_z(
 19 |       data, y, d, z,
 20 |       ml_r,
 21 |       this_smpl,
 22 |       params_r)
 23 | 
 24 |     residuals = compute_pliv_partial_z_residuals(
 25 |       data, y, d, z, n_folds,
 26 |       this_smpl,
 27 |       all_preds[[i_rep]])
 28 |     r_hat = residuals$r_hat
 29 |     D = data[, d]
 30 |     Y = data[, y]
 31 | 
 32 |     # DML 1
 33 |     if (dml_procedure == "dml1") {
 34 |       thetas = vars = rep(NA_real_, n_folds)
 35 |       for (i in 1:n_folds) {
 36 |         test_index = this_smpl$test_ids[[i]]
 37 |         orth_est = orth_pliv_partial_z_dml(
 38 |           r_hat = r_hat[test_index],
 39 |           y = Y[test_index],
 40 |           d = D[test_index],
 41 |           score = score)
 42 |         thetas[i] = orth_est$theta
 43 |       }
 44 |       all_thetas[i_rep] = mean(thetas, na.rm = TRUE)
 45 |       if (length(this_smpl$train_ids) == 1) {
 46 |         r_hat = r_hat[test_index]
 47 |         Y = Y[test_index]
 48 |         D = D[test_index]
 49 |       }
 50 |     }
 51 |     if (dml_procedure == "dml2") {
 52 |       orth_est = orth_pliv_partial_z_dml(
 53 |         r_hat = r_hat, y = Y, d = D,
 54 |         score = score)
 55 |       all_thetas[i_rep] = orth_est$theta
 56 |     }
 57 | 
 58 |     all_ses[i_rep] = sqrt(var_pliv_partial_z(
 59 |       theta = all_thetas[i_rep], r_hat = r_hat, y = Y, d = D,
 60 |       score = score))
 61 |   }
 62 | 
 63 |   theta = stats::median(all_thetas)
 64 |   if (length(this_smpl$train_ids) > 1) {
 65 |     n = nrow(data)
 66 |   } else {
 67 |     n = length(this_smpl$test_ids[[1]])
 68 |   }
 69 |   se = se_repeated(all_ses * sqrt(n), all_thetas, theta) / sqrt(n)
 70 | 
 71 |   t = theta / se
 72 |   pval = 2 * stats::pnorm(-abs(t))
 73 | 
 74 |   names(theta) = names(se) = d
 75 |   res = list(
 76 |     coef = theta, se = se, t = t, pval = pval,
 77 |     thetas = all_thetas, ses = all_ses,
 78 |     all_preds = all_preds, smpls = smpls)
 79 | 
 80 |   return(res)
 81 | }
 82 | 
 83 | fit_nuisance_pliv_partial_z = function(data, y, d, z,
 84 |   ml_r,
 85 |   smpls,
 86 |   params_r) {
 87 | 
 88 |   train_ids = smpls$train_ids
 89 |   test_ids = smpls$test_ids
 90 | 
 91 |   # nuisance r: E[D|X]
 92 |   r_indx = names(data) != y
 93 |   data_r = data[, r_indx, drop = FALSE]
 94 |   task_r = mlr3::TaskRegr$new(id = paste0("nuis_r_", d), backend = data_r, target = d)
 95 |   if (!is.null(params_r)) {
 96 |     ml_r$param_set$values = params_r
 97 |   }
 98 | 
 99 |   resampling_r = mlr3::rsmp("custom")
100 |   resampling_r$instantiate(task_r, train_ids, test_ids)
101 | 
102 |   r_r = mlr3::resample(task_r, ml_r, resampling_r, store_models = TRUE)
103 |   r_hat_list = lapply(r_r$predictions(), function(x) x$response)
104 | 
105 |   all_preds = list(
106 |     r_hat_list = r_hat_list)
107 | 
108 |   return(all_preds)
109 | }
110 | 
111 | compute_pliv_partial_z_residuals = function(data, y, d, z, n_folds, smpls,
112 |   all_preds) {
113 | 
114 |   test_ids = smpls$test_ids
115 | 
116 |   r_hat_list = all_preds$r_hat_list
117 |   n = nrow(data)
118 |   r_hat = rep(NA_real_, n)
119 | 
120 |   for (i in 1:n_folds) {
121 |     test_index = test_ids[[i]]
122 | 
123 |     r_hat[test_index] = r_hat_list[[i]]
124 |   }
125 |   residuals = list(r_hat = r_hat)
126 | 
127 |   return(residuals)
128 | }
129 | 
130 | 
131 | orth_pliv_partial_z_dml = function(r_hat, y, d, score) {
132 |   stopifnot(score == "partialling out")
133 |   theta = mean(r_hat * y) / mean(r_hat * d)
134 |   res = list(theta = theta)
135 |   return(res)
136 | }
137 | 
138 | 
139 | var_pliv_partial_z = function(theta, r_hat, y, d, score) {
140 |   stopifnot(score == "partialling out")
141 |   var = mean(1 / length(r_hat) * 1 / (mean(r_hat * d))^2 *
142 |     mean(((y - d * theta) * r_hat)^2))
143 |   return(c(var))
144 | }
145 | 
146 | 
147 | bootstrap_pliv_partial_z = function(theta, se, data, y, d, z, n_folds, smpls,
148 |   all_preds, bootstrap,
149 |   n_rep_boot, n_rep = 1) {
150 |   for (i_rep in 1:n_rep) {
151 |     residuals = compute_pliv_partial_z_residuals(
152 |       data, y, d, z, n_folds,
153 |       smpls[[i_rep]],
154 |       all_preds[[i_rep]])
155 |     r_hat = residuals$r_hat
156 |     D = data[, d]
157 |     Y = data[, y]
158 | 
159 |     psi = (Y - D * theta[i_rep]) * r_hat
160 |     psi_a = -r_hat * D
161 | 
162 |     n = length(psi)
163 |     weights = draw_bootstrap_weights(bootstrap, n_rep_boot, n)
164 |     this_res = functional_bootstrap(
165 |       theta[i_rep], se[i_rep], psi, psi_a, n_folds,
166 |       smpls[[i_rep]],
167 |       n_rep_boot, weights)
168 |     if (i_rep == 1) {
169 |       boot_res = this_res
170 |     } else {
171 |       boot_res$boot_coef = cbind(boot_res$boot_coef, this_res$boot_coef)
172 |       boot_res$boot_t_stat = cbind(boot_res$boot_t_stat, this_res$boot_t_stat)
173 |     }
174 |   }
175 |   return(boot_res)
176 | }
177 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-16-dml_cluster.R:
--------------------------------------------------------------------------------
  1 | est_one_way_cluster_dml2 = function(psi_a, psi_b,
  2 |   cluster_var,
  3 |   smpls_one_split) {
  4 |   test_ids = smpls_one_split$test_ids
  5 |   n_folds = length(test_ids)
  6 |   psi_a_subsample = 0
  7 |   psi_b_subsample = 0
  8 |   for (i in 1:n_folds) {
  9 |     test_index = test_ids[[i]]
 10 |     I_k = unique(cluster_var[test_index])
 11 |     const = 1 / length(I_k)
 12 |     psi_a_subsample = psi_a_subsample + const * sum(psi_a[test_index])
 13 |     psi_b_subsample = psi_b_subsample + const * sum(psi_b[test_index])
 14 |   }
 15 |   theta = -psi_b_subsample / psi_a_subsample
 16 |   return(theta)
 17 | }
 18 | 
 19 | var_one_way_cluster = function(psi, psi_a,
 20 |   cluster_var,
 21 |   smpls_one_split) {
 22 | 
 23 |   test_ids = smpls_one_split$test_ids
 24 |   n_folds = length(test_ids)
 25 |   gamma_hat = 0
 26 |   j_hat = 0
 27 |   for (i_fold in 1:n_folds) {
 28 |     test_index = test_ids[[i_fold]]
 29 |     I_k = unique(cluster_var[test_index])
 30 |     const = 1 / length(I_k)
 31 |     for (i in I_k) {
 32 |       ind = (cluster_var == i)
 33 |       for (val_i in psi[ind]) {
 34 |         for (val_j in psi[ind]) {
 35 |           gamma_hat = gamma_hat + const * val_i * val_j
 36 |         }
 37 |       }
 38 |       j_hat = j_hat + const * sum(psi_a[ind])
 39 |     }
 40 |   }
 41 |   gamma_hat = gamma_hat / n_folds
 42 |   j_hat = j_hat / n_folds
 43 |   var = gamma_hat / (j_hat^2) / length(unique(cluster_var))
 44 |   return(var)
 45 | }
 46 | 
 47 | est_two_way_cluster_dml2 = function(psi_a, psi_b,
 48 |   cluster_var1,
 49 |   cluster_var2,
 50 |   smpls_one_split) {
 51 | 
 52 |   test_ids = smpls_one_split$test_ids
 53 |   n_folds = length(test_ids)
 54 |   psi_a_subsample = 0
 55 |   psi_b_subsample = 0
 56 |   for (i in 1:n_folds) {
 57 |     test_index = test_ids[[i]]
 58 |     I_k = unique(cluster_var1[test_index])
 59 |     J_l = unique(cluster_var2[test_index])
 60 |     const = 1 / (length(I_k) * length(J_l))
 61 |     psi_a_subsample = psi_a_subsample + const * sum(psi_a[test_index])
 62 |     psi_b_subsample = psi_b_subsample + const * sum(psi_b[test_index])
 63 |   }
 64 |   theta = -psi_b_subsample / psi_a_subsample
 65 |   return(theta)
 66 | }
 67 | 
 68 | var_two_way_cluster = function(psi, psi_a,
 69 |   cluster_var1,
 70 |   cluster_var2,
 71 |   smpls_one_split) {
 72 | 
 73 |   test_ids = smpls_one_split$test_ids
 74 |   n_folds = length(test_ids)
 75 |   gamma_hat = 0
 76 |   j_hat = 0
 77 |   for (i_fold in 1:n_folds) {
 78 |     test_index = test_ids[[i_fold]]
 79 |     I_k = unique(cluster_var1[test_index])
 80 |     J_l = unique(cluster_var2[test_index])
 81 |     const = min(length(I_k), length(J_l)) / (length(I_k) * length(J_l))^2
 82 |     for (i in I_k) {
 83 |       for (j in J_l) {
 84 |         for (j_ in J_l) {
 85 |           ind1 = (cluster_var1 == i) & (cluster_var2 == j)
 86 |           ind2 = (cluster_var1 == i) & (cluster_var2 == j_)
 87 |           gamma_hat = gamma_hat + const * psi[ind1] * psi[ind2]
 88 |         }
 89 |       }
 90 |     }
 91 |     for (j in J_l) {
 92 |       for (i in I_k) {
 93 |         for (i_ in I_k) {
 94 |           ind1 = (cluster_var1 == i) & (cluster_var2 == j)
 95 |           ind2 = (cluster_var1 == i_) & (cluster_var2 == j)
 96 |           gamma_hat = gamma_hat + const * psi[ind1] * psi[ind2]
 97 |         }
 98 |       }
 99 |     }
100 |     j_hat = j_hat + sum(psi_a[test_index]) / (length(I_k) * length(J_l))
101 |   }
102 |   gamma_hat = gamma_hat / n_folds
103 |   j_hat = j_hat / n_folds
104 |   n_clusters1 = length(unique(cluster_var1))
105 |   n_clusters2 = length(unique(cluster_var2))
106 |   var_scaling_factor = min(n_clusters1, n_clusters2)
107 |   var = gamma_hat / (j_hat^2) / var_scaling_factor
108 |   return(var)
109 | }
110 | 


--------------------------------------------------------------------------------
/tests/testthat/print_outputs/dml_cluster_data.txt:
--------------------------------------------------------------------------------
 1 | > print(dml_cluster_data)
 2 | ================= DoubleMLClusterData Object ==================
 3 | 
 4 | 
 5 | ------------------ Data summary      ------------------
 6 | Outcome variable: Y
 7 | Treatment variable(s): D
 8 | Cluster variable(s): cluster_var_i, cluster_var_j
 9 | Covariates: X1, X2, X3, X4, X5
10 | Instrument(s): Z
11 | Selection variable: 
12 | No. Observations: 100
13 | 
14 | 


--------------------------------------------------------------------------------
/tests/testthat/print_outputs/dml_data.txt:
--------------------------------------------------------------------------------
 1 | > print(dml_data)
 2 | ================= DoubleMLData Object ==================
 3 | 
 4 | 
 5 | ------------------ Data summary      ------------------
 6 | Outcome variable: y
 7 | Treatment variable(s): d
 8 | Covariates: X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X20
 9 | Instrument(s): 
10 | Selection variable: 
11 | No. Observations: 100
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/testthat/print_outputs/dml_pliv.txt:
--------------------------------------------------------------------------------
 1 | > print(dml_pliv)
 2 | ================= DoubleMLPLIV Object ==================
 3 | 
 4 | 
 5 | ------------------ Data summary      ------------------
 6 | Outcome variable: Y
 7 | Treatment variable(s): D
 8 | Covariates: X1, X2, X3, X4, X5
 9 | Instrument(s): Z
10 | Selection variable: 
11 | Cluster variable(s): cluster_var_i, cluster_var_j
12 | No. Observations: 100
13 | 
14 | ------------------ Score & algorithm ------------------
15 | Score function: partialling out
16 | DML algorithm: dml2
17 | 
18 | ------------------ Machine learner   ------------------
19 | ml_l: regr.rpart
20 | ml_m: regr.rpart
21 | ml_r: regr.rpart
22 | 
23 | ------------------ Resampling        ------------------
24 | No. folds per cluster: 2
25 | No. folds: 4
26 | No. repeated sample splits: 1
27 | Apply cross-fitting: TRUE
28 | 
29 | ------------------ Fit summary       ------------------
30 |  Estimates and significance testing of the effect of target variables
31 |   Estimate. Std. Error t value Pr(>|t|)    
32 | D    1.0138     0.1667    6.08  1.2e-09 ***
33 | ---
34 | Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/tests/testthat/print_outputs/dml_plr.txt:
--------------------------------------------------------------------------------
 1 | > print(dml_plr)
 2 | ================= DoubleMLPLR Object ==================
 3 | 
 4 | 
 5 | ------------------ Data summary      ------------------
 6 | Outcome variable: y
 7 | Treatment variable(s): d
 8 | Covariates: X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12, X13, X14, X15, X16, X17, X18, X19, X20
 9 | Instrument(s): 
10 | Selection variable: 
11 | No. Observations: 100
12 | 
13 | ------------------ Score & algorithm ------------------
14 | Score function: partialling out
15 | DML algorithm: dml2
16 | 
17 | ------------------ Machine learner   ------------------
18 | ml_l: regr.rpart
19 | ml_m: regr.rpart
20 | 
21 | ------------------ Resampling        ------------------
22 | No. folds: 2
23 | No. repeated sample splits: 1
24 | Apply cross-fitting: TRUE
25 | 
26 | ------------------ Fit summary       ------------------
27 |  Estimates and significance testing of the effect of target variables
28 |   Estimate. Std. Error t value Pr(>|t|)    
29 | d    0.4067     0.1035   3.929 8.54e-05 ***
30 | ---
31 | Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_active_bindings.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for active bindings of class DoubleML")
  2 | 
  3 | test_that("Not setable fields", {
  4 |   set.seed(3141)
  5 |   dml_data = make_plr_CCDDHNR2018(n_obs = 101)
  6 |   ml_g = lrn("regr.ranger")
  7 |   ml_m = ml_g$clone()
  8 |   dml_plr = DoubleMLPLR$new(dml_data, ml_g, ml_m)
  9 | 
 10 |   msg = "can't set field all_coef"
 11 |   expect_error(dml_plr$all_coef <- 5,
 12 |     regexp = msg)
 13 |   msg = "can't set field all_dml1_coef"
 14 |   expect_error(dml_plr$all_dml1_coef <- 5,
 15 |     regexp = msg)
 16 |   msg = "can't set field all_se"
 17 |   expect_error(dml_plr$all_se <- 5,
 18 |     regexp = msg)
 19 |   msg = "can't set field apply_cross_fitting"
 20 |   expect_error(dml_plr$apply_cross_fitting <- FALSE,
 21 |     regexp = msg)
 22 |   msg = "can't set field boot_coef"
 23 |   expect_error(dml_plr$boot_coef <- 5,
 24 |     regexp = msg)
 25 |   msg = "can't set field boot_t_stat"
 26 |   expect_error(dml_plr$boot_t_stat <- 5,
 27 |     regexp = msg)
 28 |   msg = "can't set field coef"
 29 |   expect_error(dml_plr$coef <- 5,
 30 |     regexp = msg)
 31 |   msg = "can't set field data"
 32 |   expect_error(dml_plr$data <- "abc",
 33 |     regexp = msg)
 34 |   msg = "can't set field dml_procedure"
 35 |   expect_error(dml_plr$dml_procedure <- "abc",
 36 |     regexp = msg)
 37 |   msg = "can't set field draw_sample_splitting"
 38 |   expect_error(dml_plr$draw_sample_splitting <- FALSE,
 39 |     regexp = msg)
 40 |   msg = "can't set field learner"
 41 |   expect_error(dml_plr$learner <- "abc",
 42 |     regexp = msg)
 43 |   msg = "can't set field models"
 44 |   expect_error(dml_plr$models <- 5,
 45 |     regexp = msg)
 46 |   msg = "can't set field n_folds"
 47 |   expect_error(dml_plr$n_folds <- 5,
 48 |     regexp = msg)
 49 |   msg = "can't set field n_rep"
 50 |   expect_error(dml_plr$n_rep <- 5,
 51 |     regexp = msg)
 52 |   msg = "can't set field params"
 53 |   expect_error(dml_plr$params <- 5,
 54 |     regexp = msg)
 55 |   msg = "can't set field psi"
 56 |   expect_error(dml_plr$psi <- 5,
 57 |     regexp = msg)
 58 |   msg = "can't set field psi_a"
 59 |   expect_error(dml_plr$psi_a <- 5,
 60 |     regexp = msg)
 61 |   msg = "can't set field psi_b"
 62 |   expect_error(dml_plr$psi_b <- 5,
 63 |     regexp = msg)
 64 |   msg = "can't set field predictions"
 65 |   expect_error(dml_plr$predictions <- 5,
 66 |     regexp = msg)
 67 |   msg = "can't set field pval"
 68 |   expect_error(dml_plr$pval <- 5,
 69 |     regexp = msg)
 70 |   msg = "can't set field score"
 71 |   expect_error(dml_plr$score <- "abc",
 72 |     regexp = msg)
 73 |   msg = "can't set field se"
 74 |   expect_error(dml_plr$se <- 5,
 75 |     regexp = msg)
 76 |   msg = "can't set field smpls"
 77 |   expect_error(dml_plr$smpls <- 5,
 78 |     regexp = msg)
 79 |   msg = "can't set field smpls_cluster"
 80 |   expect_error(dml_plr$smpls_cluster <- 5,
 81 |     regexp = msg)
 82 |   msg = "can't set field t_stat"
 83 |   expect_error(dml_plr$t_stat <- 5,
 84 |     regexp = msg)
 85 |   msg = "can't set field tuning_res"
 86 |   expect_error(dml_plr$tuning_res <- list(a = 5),
 87 |     regexp = msg)
 88 | 
 89 |   dml_data = make_pliv_CHS2015(n_obs = 101)
 90 |   ml_g = lrn("regr.ranger")
 91 |   ml_m = ml_g$clone()
 92 |   ml_r = ml_g$clone()
 93 |   dml_pliv = DoubleMLPLIV$new(dml_data, ml_g, ml_m, ml_r)
 94 | 
 95 |   msg = "can't set field partialX"
 96 |   expect_error(dml_pliv$partialX <- FALSE,
 97 |     regexp = msg)
 98 |   msg = "can't set field partialZ"
 99 |   expect_error(dml_pliv$partialZ <- FALSE,
100 |     regexp = msg)
101 | 
102 |   dml_data = make_irm_data(n_obs = 101)
103 |   ml_g = lrn("regr.ranger")
104 |   ml_m = lrn("classif.ranger")
105 |   dml_irm = DoubleMLIRM$new(dml_data, ml_g, ml_m)
106 | 
107 |   msg = "can't set field trimming_rule"
108 |   expect_error(dml_irm$trimming_rule <- "abc",
109 |     regexp = msg)
110 |   msg = "can't set field trimming_threshold"
111 |   expect_error(dml_irm$trimming_threshold <- 0.1,
112 |     regexp = msg)
113 | 
114 |   dml_data = make_iivm_data(n_obs = 101)
115 |   ml_g = lrn("regr.ranger")
116 |   ml_m = lrn("classif.ranger")
117 |   ml_r = ml_m$clone()
118 |   dml_iivm = DoubleMLIIVM$new(dml_data, ml_g, ml_m, ml_r)
119 | 
120 |   msg = "can't set field subgroups"
121 |   expect_error(dml_iivm$subgroups <- "abc",
122 |     regexp = msg)
123 |   msg = "can't set field trimming_rule"
124 |   expect_error(dml_iivm$trimming_rule <- "abc",
125 |     regexp = msg)
126 |   msg = "can't set field trimming_threshold"
127 |   expect_error(dml_iivm$trimming_threshold <- 0.1,
128 |     regexp = msg)
129 | })
130 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_cluster_not_implemented.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for not yet implemented cluster features")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | test_that("Not yet implemented cluster features", {
 6 |   ml_g = ml_m = ml_r = "regr.rpart"
 7 | 
 8 |   set.seed(3141)
 9 |   dml_cluster_data_pliv = make_pliv_multiway_cluster_CKMS2021(N = 10, M = 10)
10 |   dml_pliv_cluster = DoubleMLPLIV$new(dml_cluster_data_pliv,
11 |     ml_g, ml_m, ml_r,
12 |     n_folds = 2)
13 |   dml_pliv_cluster$fit()
14 | 
15 |   msg = "bootstrap not yet implemented with clustering."
16 |   expect_error(dml_pliv_cluster$bootstrap(), regexp = msg)
17 | 
18 |   dml_data = make_plr_CCDDHNR2018(n_obs = 100)
19 |   dml_plr = DoubleMLPLR$new(
20 |     dml_data,
21 |     ml_g, ml_m)
22 |   smpls = dml_plr$smpls
23 |   msg = paste(
24 |     "Externally setting the sample splitting for DoubleML is not",
25 |     "yet implemented with clustering.")
26 |   expect_error(dml_pliv_cluster$set_sample_splitting(smpls), regexp = msg)
27 | 
28 |   dt = data.table::copy(dml_cluster_data_pliv$data)
29 |   dt$cluster_var_k = dt$cluster_var_i + dt$cluster_var_j - 2
30 |   data_multiway = DoubleMLClusterData$new(dt,
31 |     y_col = "Y", d_cols = "D",
32 |     x_cols = c("X1", "X5"), z_cols = "Z",
33 |     cluster_cols = c(
34 |       "cluster_var_i",
35 |       "cluster_var_j",
36 |       "cluster_var_k"))
37 |   expect_equal(data_multiway$n_cluster_vars, 3)
38 |   msg = "Multi-way \\(n_ways > 2\\) clustering not yet implemented."
39 |   expect_error(DoubleMLPLIV$new(
40 |     data_multiway,
41 |     ml_g, ml_m, ml_r), regexp = msg)
42 | 
43 | 
44 |   msg = paste(
45 |     "No cross-fitting \\(`apply_cross_fitting = False`\\) is not yet",
46 |     "implemented with clustering.")
47 |   expect_error(DoubleMLPLIV$new(dml_cluster_data_pliv,
48 |     ml_g, ml_m, ml_r,
49 |     n_folds = 1), regexp = msg)
50 |   expect_error(DoubleMLPLIV$new(dml_cluster_data_pliv,
51 |     ml_g, ml_m, ml_r,
52 |     apply_cross_fitting = FALSE, n_folds = 2),
53 |   regexp = msg)
54 | })
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_data_cluster.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for DoubleMLCluster (Additional tests)")
 2 | 
 3 | test_that("Unit tests for DoubleMLData", {
 4 | 
 5 |   set.seed(1234)
 6 |   N = 25 # number of observations (first dimension)
 7 |   M = 25 # number of observations (second dimension)
 8 |   dim_x = 10 # dimension of x
 9 |   data_one_way = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x,
10 |     omega_X = c(0.25, 0),
11 |     omega_epsilon = c(0.25, 0),
12 |     omega_v = c(0.25, 0),
13 |     omega_V = c(0.25, 0))
14 |   data_one_way$cluster_cols = "cluster_var_i"
15 | 
16 |   data_model = data_one_way$data_model
17 |   data_model$S = rbinom(nrow(data_model), prob = 0.5, 1)
18 | 
19 |   # z_cols but no s_col and x_cols
20 |   dml_data = DoubleMLClusterData$new(data_model,
21 |     y_col = "Y",
22 |     d_cols = "D",
23 |     z_cols = "Z",
24 |     cluster_cols = "cluster_var_i")
25 |   expect_null(dml_data$s_col)
26 |   expect_data_table(dml_data$data_model)
27 |   x_cols_exp = c(paste0("X", 1:10), "S")
28 |   expect_identical(dml_data$x_cols, x_cols_exp)
29 | 
30 |   # z_cols and s_col, but no x_cols (L. 502)
31 |   dml_data = DoubleMLClusterData$new(data_model,
32 |     y_col = "Y",
33 |     d_cols = "D",
34 |     s_col = "S",
35 |     cluster_cols = "cluster_var_i")
36 |   expect_data_table(dml_data$data_model)
37 |   x_cols_exp = c(paste0("X", 1:10), "Z")
38 |   expect_identical(dml_data$x_cols, x_cols_exp)
39 | 
40 |   msg = paste("At least one variable/column is set as selection variable",
41 |     "\\('s_col'\\) and as a cluster variable \\('cluster_cols'\\).")
42 | 
43 |   expect_error(DoubleMLClusterData$new(data_model,
44 |     x_cols = c("X1", "X2"),
45 |     y_col = "Y",
46 |     d_cols = "D",
47 |     z_cols = "Z",
48 |     s_col = "S",
49 |     cluster_cols = "S"),
50 |   regexp = msg)
51 | })
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_datasets.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for datasets functionalities")
  2 | 
  3 | test_cases = expand.grid(
  4 |   return_type = c(
  5 |     "data.frame", "data.table",
  6 |     "matrix", "DoubleMLData"
  7 |   ),
  8 |   polynomial_features = c(TRUE, FALSE),
  9 |   instrument = c(TRUE, FALSE),
 10 |   stringsAsFactors = FALSE
 11 | )
 12 | 
 13 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 14 | 
 15 | testthat::skip_on_cran()
 16 | patrick::with_parameters_test_that("Unit tests for datasets functionalities:",
 17 |   .cases = test_cases,
 18 |   {
 19 |     n_obs = 100
 20 | 
 21 |     # Test CCDDHNR2018
 22 |     if (return_type != "matrix") {
 23 |       df = make_plr_CCDDHNR2018(return_type = return_type)
 24 |       expect_is(df, paste0(return_type))
 25 |     } else {
 26 |       df = make_plr_CCDDHNR2018(return_type = return_type)
 27 |       expect_is(df, "list")
 28 |       expect_is(df$X, "matrix")
 29 |       expect_is(df$y, "matrix")
 30 |       expect_is(df$d, "matrix")
 31 |     }
 32 | 
 33 |     # Test CHS2015
 34 |     if (return_type != "matrix") {
 35 |       df = make_pliv_CHS2015(n_obs, return_type = return_type)
 36 |       expect_is(df, paste0(return_type))
 37 |     } else {
 38 |       df = make_pliv_CHS2015(n_obs, return_type = return_type)
 39 |       expect_is(df, "list")
 40 |       expect_is(df$X, "matrix")
 41 |       expect_is(df$y, "matrix")
 42 |       expect_is(df$d, "matrix")
 43 |       expect_is(df$z, "matrix")
 44 |     }
 45 | 
 46 |     # Test CKMS2019
 47 |     N = 10
 48 |     M = 10
 49 |     if (return_type == "DoubleMLData") {
 50 |       df = make_pliv_multiway_cluster_CKMS2021(N, M,
 51 |         return_type = "DoubleMLClusterData")
 52 |       expect_is(df, "DoubleMLClusterData")
 53 |     } else if (return_type != "matrix") {
 54 |       df = make_pliv_multiway_cluster_CKMS2021(N, M, return_type = return_type)
 55 |       expect_is(df, paste0(return_type))
 56 |     } else {
 57 |       df = make_pliv_multiway_cluster_CKMS2021(N, M, return_type = return_type)
 58 |       expect_is(df, "list")
 59 |       expect_is(df$X, "matrix")
 60 |       expect_is(df$y, "matrix")
 61 |       expect_is(df$d, "matrix")
 62 |       expect_is(df$z, "matrix")
 63 |     }
 64 | 
 65 |     # Test IRM
 66 |     if (return_type != "matrix") {
 67 |       df = make_irm_data(return_type = return_type)
 68 |       expect_is(df, paste0(return_type))
 69 |     } else {
 70 |       df = make_irm_data(return_type = return_type)
 71 |       expect_is(df, "list")
 72 |       expect_is(df$X, "matrix")
 73 |       expect_is(df$y, "matrix")
 74 |       expect_is(df$d, "matrix")
 75 |     }
 76 | 
 77 |     # Test IIVM
 78 |     if (return_type != "matrix") {
 79 |       df = make_iivm_data(return_type = return_type)
 80 |       expect_is(df, paste0(return_type))
 81 |     } else {
 82 |       df = make_iivm_data(return_type = return_type)
 83 |       expect_is(df, "list")
 84 |       expect_is(df$X, "matrix")
 85 |       expect_is(df$y, "matrix")
 86 |       expect_is(df$d, "matrix")
 87 |       expect_is(df$z, "matrix")
 88 |     }
 89 | 
 90 |     # Test PLR (Turrell)
 91 |     if (return_type != "matrix") {
 92 |       df = make_plr_turrell2018(return_type = return_type)
 93 |       expect_is(df, paste0(return_type))
 94 |     } else {
 95 |       df = make_plr_turrell2018(return_type = return_type)
 96 |       expect_is(df, "list")
 97 |       expect_is(df$X, "matrix")
 98 |       expect_is(df$y, "matrix")
 99 |       expect_is(df$d, "matrix")
100 |     }
101 | 
102 |     # Test fetch_401k
103 |     if (return_type != "matrix") {
104 |       df = fetch_401k(
105 |         return_type = return_type, polynomial_features = polynomial_features,
106 |         instrument = instrument
107 |       )
108 |       expect_is(df, paste0(return_type))
109 |     }
110 | 
111 |     # Test fetch_bonus
112 |     if (return_type != "matrix") {
113 |       df = fetch_bonus(return_type = return_type, polynomial_features = polynomial_features)
114 |       expect_is(df, paste0(return_type))
115 |     }
116 | 
117 |     # Test ssm, mar=TRUE
118 |     mar = TRUE
119 |     if (return_type != "matrix") {
120 |       df = make_ssm_data(mar = mar, return_type = return_type)
121 |       expect_is(df, paste0(return_type))
122 |     } else {
123 |       df = make_ssm_data(mar = mar, return_type = return_type)
124 |       expect_is(df, "list")
125 |       expect_is(df$X, "matrix")
126 |       expect_is(df$y, "matrix")
127 |       expect_is(df$d, "matrix")
128 |       expect_is(df$s, "matrix")
129 |     }
130 | 
131 |     # Test ssm, mar=FALSE
132 |     mar = FALSE
133 |     if (return_type != "matrix") {
134 |       df = make_ssm_data(mar = mar, return_type = return_type)
135 |       expect_is(df, paste0(return_type))
136 |     } else {
137 |       df = make_ssm_data(mar = mar, return_type = return_type)
138 |       expect_is(df, "list")
139 |       expect_is(df$X, "matrix")
140 |       expect_is(df$y, "matrix")
141 |       expect_is(df$d, "matrix")
142 |       expect_is(df$z, "matrix")
143 |       expect_is(df$s, "matrix")
144 |     }
145 |   }
146 | )
147 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_iivm.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IIVM")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "rpart",
11 |     dml_procedure = "dml2",
12 |     score = "LATE",
13 |     trimming_threshold = c(1e-5),
14 |     stringsAsFactors = FALSE)
15 | } else {
16 |   test_cases = expand.grid(
17 |     learner = c("cv_glmnet", "graph_learner"),
18 |     dml_procedure = c("dml1", "dml2"),
19 |     score = "LATE",
20 |     trimming_threshold = c(1e-5),
21 |     stringsAsFactors = FALSE)
22 | }
23 | 
24 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
25 | 
26 | patrick::with_parameters_test_that("Unit tests for IIVM:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_iivm(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     iivm_hat = dml_irmiv(data_iivm$df,
33 |       y = "y", d = "d", z = "z",
34 |       n_folds = 5,
35 |       ml_g = learner_pars$ml_g$clone(),
36 |       ml_m = learner_pars$ml_m$clone(),
37 |       ml_r = learner_pars$ml_r$clone(),
38 |       dml_procedure = dml_procedure, score = score,
39 |       trimming_threshold = trimming_threshold)
40 |     theta = iivm_hat$coef
41 |     se = iivm_hat$se
42 | 
43 |     boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses,
44 |       data_iivm$df,
45 |       y = "y", d = "d", z = "z",
46 |       n_folds = 5, smpls = iivm_hat$smpls,
47 |       all_preds = iivm_hat$all_preds,
48 |       score = score,
49 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
50 |       trimming_threshold = trimming_threshold)$boot_coef
51 | 
52 |     set.seed(3141)
53 |     double_mliivm_obj = DoubleMLIIVM$new(
54 |       data = data_iivm$dml_data,
55 |       n_folds = 5,
56 |       ml_g = learner_pars$ml_g$clone(),
57 |       ml_m = learner_pars$ml_m$clone(),
58 |       ml_r = learner_pars$ml_r$clone(),
59 |       dml_procedure = dml_procedure,
60 |       trimming_threshold = trimming_threshold,
61 |       score = score)
62 |     double_mliivm_obj$fit()
63 |     theta_obj = double_mliivm_obj$coef
64 |     se_obj = double_mliivm_obj$se
65 | 
66 |     # bootstrap
67 |     double_mliivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
68 |     boot_theta_obj = double_mliivm_obj$boot_coef
69 | 
70 |     # at the moment the object result comes without a name
71 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
72 |     expect_equal(se, se_obj, tolerance = 1e-8)
73 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
74 |   }
75 | )
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_iivm_binary_outcome.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IIVM, binary outcome")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "log_reg",
11 |     dml_procedure = "dml2",
12 |     score = "LATE",
13 |     trimming_threshold = 0.025,
14 |     stringsAsFactors = FALSE)
15 | } else {
16 |   test_cases = expand.grid(
17 |     learner = "cv_glmnet",
18 |     dml_procedure = c("dml1", "dml2"),
19 |     score = "LATE",
20 |     trimming_threshold = 0.025,
21 |     stringsAsFactors = FALSE)
22 | }
23 | 
24 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
25 | 
26 | patrick::with_parameters_test_that("Unit tests for IIVM:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_iivm_binary(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     iivm_hat = dml_irmiv(data_iivm_binary$df,
33 |       y = "y", d = "d", z = "z",
34 |       n_folds = 5,
35 |       ml_g = learner_pars$ml_g$clone(),
36 |       ml_m = learner_pars$ml_m$clone(),
37 |       ml_r = learner_pars$ml_r$clone(),
38 |       dml_procedure = dml_procedure,
39 |       trimming_threshold = trimming_threshold,
40 |       score = score)
41 |     theta = iivm_hat$coef
42 |     se = iivm_hat$se
43 | 
44 |     boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses,
45 |       data_iivm_binary$df,
46 |       y = "y", d = "d", z = "z",
47 |       n_folds = 5, smpls = iivm_hat$smpls,
48 |       all_preds = iivm_hat$all_preds,
49 |       trimming_threshold = trimming_threshold,
50 |       score = score,
51 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
52 | 
53 |     set.seed(3141)
54 |     double_mliivm_obj = DoubleMLIIVM$new(
55 |       data = data_iivm_binary$dml_data,
56 |       n_folds = 5,
57 |       ml_g = learner_pars$ml_g$clone(),
58 |       ml_m = learner_pars$ml_m$clone(),
59 |       ml_r = learner_pars$ml_r$clone(),
60 |       dml_procedure = dml_procedure,
61 |       trimming_threshold = trimming_threshold,
62 |       score = score)
63 |     double_mliivm_obj$fit()
64 |     theta_obj = double_mliivm_obj$coef
65 |     se_obj = double_mliivm_obj$se
66 | 
67 |     # bootstrap
68 |     double_mliivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
69 |     boot_theta_obj = double_mliivm_obj$boot_coef
70 | 
71 |     # at the moment the object result comes without a name
72 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
73 |     expect_equal(se, se_obj, tolerance = 1e-8)
74 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
75 |   }
76 | )
77 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_iivm_trim.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IIVM")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 6 | if (on_cran) {
 7 |   test_cases = expand.grid(
 8 |     learner = "rpart",
 9 |     dml_procedure = "dml2",
10 |     score = "LATE",
11 |     trimming_rule = c("truncate"),
12 |     trimming_threshold = c(0.05),
13 |     stringsAsFactors = FALSE)
14 | } else {
15 |   test_cases = expand.grid(
16 |     learner = "rpart",
17 |     dml_procedure = c("dml1", "dml2"),
18 |     score = "LATE",
19 |     trimming_rule = c("truncate"),
20 |     trimming_threshold = c(1e-12, 0.05),
21 |     stringsAsFactors = FALSE)
22 | }
23 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | 
25 | patrick::with_parameters_test_that("Unit tests for IIVM:",
26 |   .cases = test_cases, {
27 |     learner_pars = get_default_mlmethod_iivm(learner)
28 |     n_rep_boot = 498
29 | 
30 |     set.seed(3141)
31 |     iivm_hat = dml_irmiv(data_iivm$df,
32 |       y = "y", d = "d", z = "z",
33 |       n_folds = 5,
34 |       ml_g = learner_pars$ml_g$clone(),
35 |       ml_m = learner_pars$ml_m$clone(),
36 |       ml_r = learner_pars$ml_r$clone(),
37 |       dml_procedure = dml_procedure, score = score,
38 |       trimming_threshold = trimming_threshold)
39 |     theta = iivm_hat$coef
40 |     se = iivm_hat$se
41 | 
42 |     boot_theta = bootstrap_irmiv(iivm_hat$thetas, iivm_hat$ses,
43 |       data_iivm$df,
44 |       y = "y", d = "d", z = "z",
45 |       n_folds = 5, smpls = iivm_hat$smpls,
46 |       all_preds = iivm_hat$all_preds,
47 |       score = score,
48 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
49 |       trimming_threshold = trimming_threshold)$boot_coef
50 | 
51 |     set.seed(3141)
52 | 
53 |     # we rename the z variable to have non default names in the unit tests
54 |     data = data_iivm$df
55 |     names(data)[names(data) == "z"] = "Z_IV"
56 | 
57 |     Xnames = names(data)[names(data) %in% c("y", "d", "Z_IV") == FALSE]
58 | 
59 |     data_ml = double_ml_data_from_data_frame(data,
60 |       y_col = "y",
61 |       d_cols = "d", x_cols = Xnames, z_col = "Z_IV")
62 | 
63 |     double_mliivm_obj = DoubleMLIIVM$new(data_ml,
64 |       n_folds = 5,
65 |       ml_g = learner_pars$ml_g$clone(),
66 |       ml_m = learner_pars$ml_m$clone(),
67 |       ml_r = learner_pars$ml_r$clone(),
68 |       dml_procedure = dml_procedure,
69 |       trimming_threshold = trimming_threshold,
70 |       score = score)
71 | 
72 |     double_mliivm_obj$fit()
73 |     theta_obj = double_mliivm_obj$coef
74 |     se_obj = double_mliivm_obj$se
75 | 
76 |     # bootstrap
77 |     double_mliivm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
78 |     boot_theta_obj = double_mliivm_obj$boot_coef
79 | 
80 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
81 |     expect_equal(se, se_obj, tolerance = 1e-8)
82 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
83 |   }
84 | )
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_iivm_tuning.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for tuning of IIVM")
 2 | 
 3 | requireNamespace("lgr")
 4 | 
 5 | logger = lgr::get_logger("bbotk")
 6 | logger$set_threshold("warn")
 7 | lgr::get_logger("mlr3")$set_threshold("warn")
 8 | 
 9 | tune_settings = list(
10 |   rsmp_tune = rsmp("cv", folds = 3),
11 |   measure = list(
12 |     "ml_m" = "classif.ce",
13 |     "ml_g" = "regr.mse",
14 |     "ml_r" = "classif.ce"),
15 |   terminator = mlr3tuning::trm("evals", n_evals = 5),
16 |   algorithm = tnr("random_search"))
17 | 
18 | learner = "rpart"
19 | 
20 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
21 | if (on_cran) {
22 |   test_cases = expand.grid(
23 |     learner_list = learner,
24 |     dml_procedure = "dml2",
25 |     score = "LATE",
26 |     AT = c(TRUE),
27 |     NT = c(TRUE),
28 |     n_rep = c(1),
29 |     tune_on_folds = FALSE,
30 |     stringsAsFactors = FALSE)
31 | } else {
32 |   test_cases = expand.grid(
33 |     learner_list = learner,
34 |     dml_procedure = c("dml1", "dml2"),
35 |     score = "LATE",
36 |     AT = c(TRUE, FALSE),
37 |     NT = c(TRUE, FALSE),
38 |     n_rep = c(1, 3),
39 |     tune_on_folds = c(FALSE, TRUE),
40 |     stringsAsFactors = FALSE)
41 | }
42 | 
43 | 
44 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
45 | 
46 | patrick::with_parameters_test_that("Unit tests for tuning of IIVM:",
47 |   .cases = test_cases, {
48 |     n_rep_boot = 498
49 |     n_folds = 2
50 | 
51 |     set.seed(3141)
52 |     double_mliivm_obj_tuned = DoubleMLIIVM$new(
53 |       data = data_iivm$dml_data,
54 |       n_folds = n_folds,
55 |       ml_g = "regr.rpart",
56 |       ml_m = "classif.rpart",
57 |       ml_r = "classif.rpart",
58 |       subgroups = list(
59 |         always_takers = AT,
60 |         never_takers = NT),
61 |       dml_procedure = dml_procedure,
62 |       score = score,
63 |       n_rep = n_rep)
64 | 
65 |     param_grid = list(
66 |       "ml_m" = paradox::ps(
67 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
68 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
69 |       "ml_g" = paradox::ps(
70 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
71 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
72 |       "ml_r" = paradox::ps(
73 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
74 |         minsplit = paradox::p_int(lower = 1, upper = 2)))
75 | 
76 |     double_mliivm_obj_tuned$tune(param_set = param_grid, tune_on_folds = tune_on_folds, tune_settings = tune_settings)
77 |     double_mliivm_obj_tuned$fit()
78 | 
79 |     theta_obj_tuned = double_mliivm_obj_tuned$coef
80 |     se_obj_tuned = double_mliivm_obj_tuned$se
81 | 
82 |     # bootstrap
83 |     # double_mlirm_obj_tuned$bootstrap(method = 'normal',  n_rep = n_rep_boot)
84 |     # boot_theta_obj_tuned = double_mlirm_obj_tuned$boot_coef
85 | 
86 | 
87 |     # restrictions to test
88 |     # Functional (tbd) vs OOP implementation (handling randomness in param selection!?)
89 |     expect_is(theta_obj_tuned, "numeric")
90 |     expect_is(se_obj_tuned, "numeric")
91 |   }
92 | )
93 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_iivm_user_score.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IIVM, callable score")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | score_fct = function(y, z, d, g0_hat, g1_hat, m_hat, r0_hat,
 8 |   r1_hat, smpls) {
 9 | 
10 |   u0_hat = y - g0_hat
11 |   u1_hat = y - g1_hat
12 |   w0_hat = d - r0_hat
13 |   w1_hat = d - r1_hat
14 |   psi_b = g1_hat - g0_hat + z * (u1_hat) / m_hat -
15 |     (1 - z) * u0_hat / (1 - m_hat)
16 |   psi_a = -1 * (r1_hat - r0_hat + z * (w1_hat) / m_hat -
17 |     (1 - z) * w0_hat / (1 - m_hat))
18 | 
19 |   psis = list(psi_a = psi_a, psi_b = psi_b)
20 |   return(psis)
21 | }
22 | 
23 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
24 | if (on_cran) {
25 |   test_cases = expand.grid(
26 |     learner = "regr.rpart",
27 |     learner_m = "classif.rpart",
28 |     dml_procedure = "dml2",
29 |     trimming_threshold = c(0),
30 |     stringsAsFactors = FALSE)
31 | } else {
32 |   test_cases = expand.grid(
33 |     learner = "regr.glmnet",
34 |     learner_m = "classif.glmnet",
35 |     dml_procedure = c("dml1", "dml2"),
36 |     trimming_threshold = c(0),
37 |     stringsAsFactors = FALSE)
38 | }
39 | 
40 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
41 | 
42 | patrick::with_parameters_test_that("Unit tests for IIVM, callable score:",
43 |   .cases = test_cases, {
44 |     n_rep_boot = 498
45 |     set.seed(3141)
46 |     double_mliivm_obj = DoubleMLIIVM$new(
47 |       data = data_iivm$dml_data,
48 |       n_folds = 5,
49 |       ml_m = lrn(learner_m),
50 |       ml_g = lrn(learner),
51 |       ml_r = lrn(learner_m),
52 |       dml_procedure = dml_procedure,
53 |       trimming_threshold = trimming_threshold,
54 |       score = "LATE")
55 |     double_mliivm_obj$fit()
56 |     theta_obj = double_mliivm_obj$coef
57 |     se_obj = double_mliivm_obj$se
58 | 
59 |     set.seed(3141)
60 |     double_mliivm_obj_score = DoubleMLIIVM$new(
61 |       data = data_iivm$dml_data,
62 |       n_folds = 5,
63 |       ml_m = lrn(learner_m),
64 |       ml_g = lrn(learner),
65 |       ml_r = lrn(learner_m),
66 |       dml_procedure = dml_procedure,
67 |       trimming_threshold = trimming_threshold,
68 |       score = score_fct)
69 |     double_mliivm_obj_score$fit()
70 |     theta_obj_score = double_mliivm_obj_score$coef
71 |     se_obj_score = double_mliivm_obj_score$se
72 | 
73 |     expect_equal(theta_obj, theta_obj_score, tolerance = 1e-8)
74 |     expect_equal(se_obj, se_obj_score, tolerance = 1e-8)
75 |   }
76 | )
77 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IRM")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "rpart",
11 |     dml_procedure = "dml1",
12 |     score = "ATTE",
13 |     trimming_threshold = 0,
14 |     stringsAsFactors = FALSE)
15 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = c("cv_glmnet", "graph_learner"),
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = c("ATE", "ATTE"),
21 |     trimming_threshold = 0,
22 |     stringsAsFactors = FALSE)
23 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | }
25 | 
26 | patrick::with_parameters_test_that("Unit tests for IRM:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_irm(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     irm_hat = dml_irm(data_irm$df,
33 |       y = "y", d = "d",
34 |       n_folds = 5,
35 |       ml_g = learner_pars$ml_g$clone(), ml_m = learner_pars$ml_m$clone(),
36 |       dml_procedure = dml_procedure, score = score)
37 |     theta = irm_hat$coef
38 |     se = irm_hat$se
39 | 
40 |     boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses,
41 |       data_irm$df,
42 |       y = "y", d = "d",
43 |       n_folds = 5, smpls = irm_hat$smpls,
44 |       all_preds = irm_hat$all_preds,
45 |       score = score,
46 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
47 | 
48 | 
49 |     set.seed(3141)
50 |     double_mlirm_obj = DoubleMLIRM$new(
51 |       data = data_irm$dml_data,
52 |       n_folds = 5,
53 |       ml_g = learner_pars$ml_g$clone(),
54 |       ml_m = learner_pars$ml_m$clone(),
55 |       dml_procedure = dml_procedure,
56 |       score = score,
57 |       trimming_threshold = trimming_threshold)
58 | 
59 |     double_mlirm_obj$fit()
60 |     theta_obj = double_mlirm_obj$coef
61 |     se_obj = double_mlirm_obj$se
62 | 
63 |     # bootstrap
64 |     double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
65 |     boot_theta_obj = double_mlirm_obj$boot_coef
66 | 
67 |     # at the moment the object result comes without a name
68 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
69 |     expect_equal(se, se_obj, tolerance = 1e-8)
70 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
71 |   }
72 | )
73 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm_binary_outcome.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IRM, binary outcome")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "rpart",
11 |     dml_procedure = "dml1",
12 |     score = "ATTE",
13 |     trimming_threshold = 0,
14 |     stringsAsFactors = FALSE
15 |   )
16 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
17 | } else {
18 |   test_cases = expand.grid(
19 |     learner = "cv_glmnet",
20 |     dml_procedure = c("dml1", "dml2"),
21 |     score = c("ATE", "ATTE"),
22 |     trimming_threshold = 0,
23 |     stringsAsFactors = FALSE
24 |   )
25 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
26 | }
27 | 
28 | patrick::with_parameters_test_that("Unit tests for IRM:",
29 |   .cases = test_cases,
30 |   {
31 |     learner_pars = get_default_mlmethod_irm_binary(learner)
32 |     n_rep_boot = 498
33 | 
34 |     set.seed(3141)
35 |     irm_hat = dml_irm(data_irm_binary$df,
36 |       y = "y", d = "d",
37 |       n_folds = 5,
38 |       ml_g = learner_pars$ml_g$clone(), ml_m = learner_pars$ml_m$clone(),
39 |       dml_procedure = dml_procedure, score = score
40 |     )
41 |     theta = irm_hat$coef
42 |     se = irm_hat$se
43 | 
44 |     boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses,
45 |       data_irm_binary$df,
46 |       y = "y", d = "d",
47 |       n_folds = 5, smpls = irm_hat$smpls,
48 |       all_preds = irm_hat$all_preds,
49 |       score = score,
50 |       bootstrap = "normal", n_rep_boot = n_rep_boot
51 |     )$boot_coef
52 | 
53 | 
54 |     set.seed(3141)
55 |     double_mlirm_obj = DoubleMLIRM$new(
56 |       data = data_irm_binary$dml_data,
57 |       n_folds = 5,
58 |       ml_g = learner_pars$ml_g$clone(),
59 |       ml_m = learner_pars$ml_m$clone(),
60 |       dml_procedure = dml_procedure,
61 |       score = score,
62 |       trimming_threshold = trimming_threshold
63 |     )
64 | 
65 |     double_mlirm_obj$fit()
66 |     theta_obj = double_mlirm_obj$coef
67 |     se_obj = double_mlirm_obj$se
68 | 
69 |     # bootstrap
70 |     double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
71 |     boot_theta_obj = double_mlirm_obj$boot_coef
72 | 
73 |     # at the moment the object result comes without a name
74 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
75 |     expect_equal(se, se_obj, tolerance = 1e-8)
76 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
77 |   }
78 | )
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm_loaded_mlr3learner.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for IRM")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     dml_procedure = "dml1",
 11 |     score = "ATTE",
 12 |     trimming_threshold = 0,
 13 |     stringsAsFactors = FALSE)
 14 | } else {
 15 |   test_cases = expand.grid(
 16 |     dml_procedure = c("dml1", "dml2"),
 17 |     score = c("ATE", "ATTE"),
 18 |     trimming_threshold = 0,
 19 |     stringsAsFactors = FALSE)
 20 | }
 21 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 22 | 
 23 | patrick::with_parameters_test_that("Unit tests for IRM:",
 24 |   .cases = test_cases, {
 25 |     set.seed(3141)
 26 |     n_rep_boot = 212
 27 | 
 28 |     # unloaded learners (access by name)
 29 |     learner_regr_name = "regr.ranger"
 30 |     regr_params = list("num.trees" = 10, "max.depth" = 2)
 31 |     learner_classif_name = "classif.ranger"
 32 |     classif_params = list("num.trees" = 10, "max.depth" = 2)
 33 |     # learner_regr_name = "regr.rpart"
 34 |     # regr_params = list("cp" = 0.01, "minsplit" = 20)
 35 |     # learner_classif_name = "classif.rpart"
 36 |     # classif_params = list("cp" = 0.01, "minsplit" = 20)
 37 |     # learner_regr_name = "regr.cv_glmnet"
 38 |     # regr_params = list("s" = "lambda.min", "family" = "gaussian", "nfolds" = 5)
 39 |     # learner_classif_name = "classif.cv_glmnet"
 40 |     # classif_params = list("s" = "lambda.min", "nfolds" = 5)
 41 | 
 42 |     # loaded learners (mlr3)
 43 |     loaded_regr_learner = mlr3::lrn("regr.ranger", "num.trees" = 10, "max.depth" = 2)
 44 |     loaded_classif_learner = mlr3::lrn("classif.ranger", "num.trees" = 10, "max.depth" = 2)
 45 |     # loaded_regr_learner = mlr3::lrn("regr.rpart", "cp" = 0.1, "minsplit" = 20)
 46 |     # loaded_classif_learner = mlr3::lrn("classif.rpart", "cp" = 0.1, "minsplit" = 20)
 47 |     # loaded_regr_learner = mlr3::lrn("regr.cv_glmnet", "s" = "lambda.min", "family" = "gaussian", "nfolds" = 5)
 48 |     # loaded_classif_learner = mlr3::lrn("classif.cv_glmnet", "s" = "lambda.min", "nfolds" = 5)
 49 | 
 50 |     set.seed(2)
 51 |     double_mlirm = DoubleMLIRM$new(
 52 |       data = data_irm$dml_data,
 53 |       n_folds = 5,
 54 |       ml_g = learner_regr_name,
 55 |       ml_m = learner_classif_name,
 56 |       dml_procedure = dml_procedure,
 57 |       score = score,
 58 |       trimming_threshold = trimming_threshold)
 59 |     # set params for nuisance part m
 60 |     double_mlirm$set_ml_nuisance_params(
 61 |       learner = "ml_m",
 62 |       treat_var = "d",
 63 |       params = classif_params)
 64 |     # set params for nuisance part g
 65 |     double_mlirm$set_ml_nuisance_params(
 66 |       learner = "ml_g0",
 67 |       treat_var = "d",
 68 |       params = regr_params)
 69 |     double_mlirm$set_ml_nuisance_params(
 70 |       learner = "ml_g1",
 71 |       treat_var = "d",
 72 |       params = regr_params)
 73 |     double_mlirm$fit()
 74 |     theta = double_mlirm$coef
 75 |     se = double_mlirm$se
 76 |     double_mlirm$bootstrap(method = "normal", n_rep = n_rep_boot)
 77 |     boot_theta = double_mlirm$boot_coef
 78 | 
 79 | 
 80 | 
 81 |     set.seed(2)
 82 |     double_mlirm_loaded = DoubleMLIRM$new(
 83 |       data = data_irm$dml_data,
 84 |       n_folds = 5,
 85 |       ml_g = loaded_regr_learner,
 86 |       ml_m = loaded_classif_learner,
 87 |       dml_procedure = dml_procedure,
 88 |       score = score,
 89 |       trimming_threshold = trimming_threshold)
 90 |     double_mlirm_loaded$fit()
 91 |     theta_loaded = double_mlirm_loaded$coef
 92 |     se_loaded = double_mlirm_loaded$se
 93 |     double_mlirm_loaded$bootstrap(method = "normal", n_rep = n_rep_boot)
 94 |     boot_theta_loaded = double_mlirm_loaded$boot_coef
 95 | 
 96 |     expect_equal(theta, theta_loaded, tolerance = 1e-8)
 97 |     expect_equal(se, se_loaded, tolerance = 1e-8)
 98 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_loaded), tolerance = 1e-8)
 99 |   }
100 | )
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm_trim.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IRM propensity score trimming")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 6 | if (on_cran) {
 7 |   test_cases = expand.grid(
 8 |     learner = "rpart",
 9 |     dml_procedure = "dml2",
10 |     score = "ATTE",
11 |     trimming_rule = c("truncate"),
12 |     trimming_threshold = c(0.05),
13 |     stringsAsFactors = FALSE)
14 | } else {
15 |   test_cases = expand.grid(
16 |     learner = "rpart",
17 |     dml_procedure = c("dml1", "dml2"),
18 |     score = c("ATE", "ATTE"),
19 |     trimming_rule = c("truncate"),
20 |     trimming_threshold = c(1e-12, 0.05),
21 |     stringsAsFactors = FALSE)
22 | }
23 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | 
25 | patrick::with_parameters_test_that("Unit tests for IRM:",
26 |   .cases = test_cases, {
27 |     learner_pars = get_default_mlmethod_irm(learner)
28 |     n_rep_boot = 498
29 | 
30 |     set.seed(3141)
31 |     irm_hat = dml_irm(data_irm$df,
32 |       y = "y", d = "d",
33 |       n_folds = 5,
34 |       ml_g = learner_pars$ml_g$clone(),
35 |       ml_m = learner_pars$ml_m$clone(),
36 |       dml_procedure = dml_procedure, score = score,
37 |       trimming_threshold = trimming_threshold)
38 |     theta = irm_hat$coef
39 |     se = irm_hat$se
40 | 
41 |     boot_theta = bootstrap_irm(irm_hat$thetas, irm_hat$ses,
42 |       data_irm$df,
43 |       y = "y", d = "d",
44 |       n_folds = 5, smpls = irm_hat$smpls,
45 |       all_preds = irm_hat$all_preds,
46 |       score = score,
47 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
48 |       trimming_threshold = trimming_threshold)$boot_coef
49 | 
50 |     set.seed(3141)
51 |     double_mlirm_obj = DoubleMLIRM$new(
52 |       data = data_irm$dml_data,
53 |       n_folds = 5,
54 |       ml_g = learner_pars$ml_g$clone(),
55 |       ml_m = learner_pars$ml_m$clone(),
56 |       dml_procedure = dml_procedure,
57 |       score = score,
58 |       trimming_rule = trimming_rule,
59 |       trimming_threshold = trimming_threshold)
60 | 
61 |     double_mlirm_obj$fit()
62 |     theta_obj = double_mlirm_obj$coef
63 |     se_obj = double_mlirm_obj$se
64 | 
65 |     # bootstrap
66 |     double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
67 |     boot_theta_obj = double_mlirm_obj$boot_coef
68 | 
69 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
70 |     expect_equal(se, se_obj, tolerance = 1e-8)
71 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
72 |   }
73 | )
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm_tuning.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for tuning of IRM")
  2 | 
  3 | requireNamespace("lgr")
  4 | 
  5 | logger = lgr::get_logger("bbotk")
  6 | logger$set_threshold("warn")
  7 | lgr::get_logger("mlr3")$set_threshold("warn")
  8 | 
  9 | # settings for parameter provision
 10 | learner = "rpart"
 11 | 
 12 | learner_list = list("mlmethod_m" = learner, "mlmethod_g" = learner)
 13 | 
 14 | # tune_settings = list(n_folds_tune = 3,
 15 | #                       n_rep_tune = 1,
 16 | #                       rsmp_tune = "cv",
 17 | #                       measure = list("ml_g" = "regr.mse",
 18 | #                                      "ml_m" = "classif.ce"),
 19 | #                       terminator = mlr3tuning::trm("evals", n_evals = 5),
 20 | #                       algorithm = "grid_search",
 21 | #                       tuner = "grid_search",
 22 | #                       resolution = 5)
 23 | 
 24 | # only minimum amount of input for tuning
 25 | tune_settings = list(
 26 |   terminator = mlr3tuning::trm("evals", n_evals = 5),
 27 |   resolution = 5)
 28 | 
 29 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 30 | if (on_cran) {
 31 |   test_cases = expand.grid(
 32 |     learner = learner,
 33 |     dml_procedure = "dml2",
 34 |     score = "ATE",
 35 |     tune_on_folds = FALSE,
 36 |     n_rep = c(1),
 37 |     stringsAsFactors = FALSE)
 38 | } else {
 39 |   test_cases = expand.grid(
 40 |     learner = learner,
 41 |     dml_procedure = c("dml1", "dml2"),
 42 |     score = c("ATE", "ATTE"),
 43 |     tune_on_folds = c(FALSE, TRUE),
 44 |     n_rep = c(1, 3),
 45 |     stringsAsFactors = FALSE)
 46 | }
 47 | 
 48 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 49 | 
 50 | # skip('Skip tests for tuning')
 51 | patrick::with_parameters_test_that("Unit tests for tuning of PLR:",
 52 |   .cases = test_cases, {
 53 |     n_rep_boot = 498
 54 |     n_folds = 2
 55 | 
 56 |     # TODO: Functional Test Case
 57 |     set.seed(3141)
 58 |     learner_pars = get_default_mlmethod_irm(learner)
 59 | 
 60 |     double_mlirm_obj_tuned = DoubleMLIRM$new(
 61 |       data = data_irm$dml_data,
 62 |       n_folds = n_folds,
 63 |       ml_g = learner_pars$mlmethod$mlmethod_g,
 64 |       ml_m = learner_pars$mlmethod$mlmethod_m,
 65 |       dml_procedure = dml_procedure,
 66 |       score = score)
 67 | 
 68 |     param_grid = list(
 69 |       "ml_g" = paradox::ps(
 70 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
 71 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
 72 |       "ml_m" = paradox::ps(
 73 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
 74 |         minsplit = paradox::p_int(lower = 1, upper = 2)))
 75 | 
 76 |     double_mlirm_obj_tuned$tune(param_set = param_grid, tune_on_folds = tune_on_folds, tune_settings)
 77 |     double_mlirm_obj_tuned$fit()
 78 | 
 79 |     theta_obj_tuned = double_mlirm_obj_tuned$coef
 80 |     se_obj_tuned = double_mlirm_obj_tuned$se
 81 | 
 82 |     # TODO: bootstrap
 83 |     # double_mlirm_obj_tuned$bootstrap(method = 'normal',  n_rep = n_rep_boot)
 84 |     # boot_theta_obj_tuned = double_mlirm_obj_tuned$boot_coef
 85 | 
 86 | 
 87 |     # restrictions to test
 88 |     # Functional (tbd) vs OOP implementation (handling randomness in param selection!?)
 89 |     expect_is(theta_obj_tuned, "numeric")
 90 |     expect_is(se_obj_tuned, "numeric")
 91 | 
 92 |     # loaded learner
 93 |     loaded_regr_learner = mlr3::lrn("regr.rpart", "cp" = 0.1, "minsplit" = 20)
 94 |     loaded_classif_learner = mlr3::lrn("classif.rpart", "cp" = 0.1, "minsplit" = 20)
 95 |     double_mlirm_obj_loaded_tuned = DoubleMLIRM$new(
 96 |       data = data_irm$dml_data,
 97 |       n_folds = n_folds,
 98 |       ml_g = loaded_regr_learner,
 99 |       ml_m = loaded_classif_learner,
100 |       dml_procedure = dml_procedure,
101 |       score = score)
102 |     double_mlirm_obj_loaded_tuned$tune(param_set = param_grid, tune_on_folds = tune_on_folds, tune_settings)
103 |     double_mlirm_obj_loaded_tuned$fit()
104 | 
105 |     theta_obj_loaded_tuned = double_mlirm_obj_loaded_tuned$coef
106 |     se_obj_loaded_tuned = double_mlirm_obj_loaded_tuned$se
107 | 
108 |     # TODO: bootstrap
109 |     # double_mlirm_obj_loaded_tuned$bootstrap(method = 'normal',  n_rep = n_rep_boot)
110 |     # boot_theta_obj_loaded_tuned = double_mlirm_obj_loaded_tuned$boot_coef
111 | 
112 | 
113 |     # restrictions to test
114 |     # Functional (tbd) vs OOP implementation (handling randomness in param selection!?)
115 |     expect_is(theta_obj_loaded_tuned, "numeric")
116 |     expect_is(se_obj_loaded_tuned, "numeric")
117 |   }
118 | )
119 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_irm_user_score.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for IRM, callable score")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | # externally provided score function
 8 | score_fct = function(y, d, g0_hat, g1_hat, m_hat, smpls) {
 9 |   n_obs = length(y)
10 |   u1_hat = y - g1_hat
11 |   u0_hat = y - g0_hat
12 | 
13 |   psi_b = g1_hat - g0_hat + d * (u1_hat) / m_hat -
14 |     (1 - d) * u0_hat / (1 - m_hat)
15 |   psi_a = rep(-1, n_obs)
16 |   psis = list(
17 |     psi_a = psi_a,
18 |     psi_b = psi_b)
19 |   return(psis)
20 | }
21 | 
22 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
23 | if (on_cran) {
24 |   test_cases = expand.grid(
25 |     learner = "regr.rpart",
26 |     learner_m = "classif.rpart",
27 |     dml_procedure = "dml2",
28 |     trimming_threshold = 1e-5,
29 |     stringsAsFactors = FALSE)
30 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
31 | } else {
32 |   test_cases = expand.grid(
33 |     learner = "regr.glmnet",
34 |     learner_m = "classif.glmnet",
35 |     dml_procedure = c("dml1", "dml2"),
36 |     trimming_threshold = c(1e-5, 0.01),
37 |     stringsAsFactors = FALSE)
38 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
39 | }
40 | 
41 | patrick::with_parameters_test_that("Unit tests for IRM, callable score:",
42 |   .cases = test_cases, {
43 |     n_rep_boot = 498
44 | 
45 |     set.seed(3141)
46 |     double_mlirm_obj = DoubleMLIRM$new(
47 |       data = data_irm$dml_data,
48 |       n_folds = 5,
49 |       ml_g = lrn(learner),
50 |       ml_m = lrn(learner_m),
51 |       dml_procedure = dml_procedure,
52 |       score = "ATE",
53 |       trimming_threshold = trimming_threshold)
54 |     double_mlirm_obj$fit()
55 |     theta_obj = double_mlirm_obj$coef
56 |     se_obj = double_mlirm_obj$se
57 |     double_mlirm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
58 |     boot_theta_obj = double_mlirm_obj$boot_coef
59 | 
60 |     set.seed(3141)
61 |     double_mlirm_obj_score = DoubleMLIRM$new(
62 |       data = data_irm$dml_data,
63 |       n_folds = 5,
64 |       ml_g = lrn(learner),
65 |       ml_m = lrn(learner_m),
66 |       dml_procedure = dml_procedure,
67 |       score = score_fct,
68 |       trimming_threshold = trimming_threshold)
69 |     double_mlirm_obj_score$fit()
70 |     theta_obj_score = double_mlirm_obj_score$coef
71 |     se_obj_score = double_mlirm_obj_score$se
72 | 
73 |     double_mlirm_obj_score$bootstrap(method = "normal", n_rep = n_rep_boot)
74 |     boot_theta_score = double_mlirm_obj_score$boot_coef
75 | 
76 |     expect_equal(theta_obj_score, theta_obj, tolerance = 1e-8)
77 |     expect_equal(se_obj_score, se_obj, tolerance = 1e-8)
78 |     expect_equal(as.vector(boot_theta_score), as.vector(boot_theta_obj), tolerance = 1e-8)
79 |   }
80 | )
81 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "regr.lm",
11 |     dml_procedure = "dml1",
12 |     score = "partialling out",
13 |     stringsAsFactors = FALSE)
14 | } else {
15 |   test_cases = expand.grid(
16 |     learner = c("regr.lm", "regr.glmnet", "graph_learner"),
17 |     dml_procedure = c("dml1", "dml2"),
18 |     score = c("partialling out", "IV-type"),
19 |     stringsAsFactors = FALSE)
20 | }
21 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
22 | 
23 | patrick::with_parameters_test_that("Unit tests for PLIV:",
24 |   .cases = test_cases, {
25 |     learner_pars = get_default_mlmethod_pliv(learner)
26 |     n_rep_boot = 498
27 | 
28 |     set.seed(3141)
29 |     pliv_hat = dml_pliv(data_pliv$df,
30 |       y = "y", d = "d", z = "z",
31 |       n_folds = 5,
32 |       ml_l = learner_pars$ml_l$clone(),
33 |       ml_m = learner_pars$ml_m$clone(),
34 |       ml_r = learner_pars$ml_r$clone(),
35 |       ml_g = learner_pars$ml_g$clone(),
36 |       dml_procedure = dml_procedure, score = score)
37 |     theta = pliv_hat$coef
38 |     se = pliv_hat$se
39 | 
40 |     boot_theta = bootstrap_pliv(pliv_hat$thetas, pliv_hat$ses,
41 |       data_pliv$df,
42 |       y = "y", d = "d", z = "z",
43 |       n_folds = 5, smpls = pliv_hat$smpls,
44 |       all_preds = pliv_hat$all_preds,
45 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
46 |       score = score)$boot_coef
47 | 
48 |     set.seed(3141)
49 |     if (score == "partialling out") {
50 |       double_mlpliv_obj = DoubleMLPLIV$new(
51 |         data = data_pliv$dml_data,
52 |         n_folds = 5,
53 |         ml_l = learner_pars$ml_l$clone(),
54 |         ml_m = learner_pars$ml_m$clone(),
55 |         ml_r = learner_pars$ml_r$clone(),
56 |         dml_procedure = dml_procedure,
57 |         score = score)
58 |     } else {
59 |       double_mlpliv_obj = DoubleMLPLIV$new(
60 |         data = data_pliv$dml_data,
61 |         n_folds = 5,
62 |         ml_l = learner_pars$ml_l$clone(),
63 |         ml_m = learner_pars$ml_m$clone(),
64 |         ml_r = learner_pars$ml_r$clone(),
65 |         ml_g = learner_pars$ml_g$clone(),
66 |         dml_procedure = dml_procedure,
67 |         score = score)
68 |     }
69 | 
70 |     double_mlpliv_obj$fit(store_predictions = T)
71 |     theta_obj = double_mlpliv_obj$coef
72 |     se_obj = double_mlpliv_obj$se
73 | 
74 |     # bootstrap
75 |     double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
76 |     boot_theta_obj = double_mlpliv_obj$boot_coef
77 | 
78 |     # at the moment the object result comes without a name
79 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
80 |     expect_equal(se, se_obj, tolerance = 1e-8)
81 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
82 |   }
83 | )
84 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_exception_handling.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for exception handling and deprecation warnings of PLIV")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | logger = lgr::get_logger("bbotk")
 6 | logger$set_threshold("warn")
 7 | lgr::get_logger("mlr3")$set_threshold("warn")
 8 | 
 9 | test_that("Unit tests for deprecation warnings of PLIV", {
10 |   set.seed(3141)
11 |   dml_data_pliv = make_pliv_CHS2015(n_obs = 51, dim_z = 1)
12 |   ml_l = lrn("regr.ranger")
13 |   ml_g = lrn("regr.ranger")
14 |   ml_m = lrn("regr.ranger")
15 |   ml_r = lrn("regr.ranger")
16 |   msg = paste0("The argument ml_g was renamed to ml_l.")
17 |   expect_warning(DoubleMLPLIV$new(dml_data_pliv,
18 |     ml_g = ml_g, ml_m = ml_m, ml_r = ml_r),
19 |   regexp = msg)
20 | 
21 |   msg = paste(
22 |     "For score = 'IV-type', learners",
23 |     "ml_l, ml_m, ml_r and ml_g need to be specified.")
24 |   expect_error(DoubleMLPLIV$new(dml_data_pliv,
25 |     ml_l = ml_l, ml_m = ml_m, ml_r = ml_r,
26 |     score = "IV-type"),
27 |   regexp = msg)
28 | 
29 |   dml_obj = DoubleMLPLIV$new(dml_data_pliv,
30 |     ml_l = ml_g, ml_m = ml_m, ml_r = ml_r)
31 | 
32 |   msg = paste0("Learner ml_g was renamed to ml_l.")
33 |   expect_warning(dml_obj$set_ml_nuisance_params(
34 |     "ml_g", "d", list("num.trees" = 10)),
35 |   regexp = msg)
36 | 
37 |   par_grids = list(
38 |     "ml_g" = paradox::ps(
39 |       num.trees = paradox::p_int(lower = 9, upper = 10)),
40 |     "ml_m" = paradox::ps(
41 |       num.trees = paradox::p_int(lower = 10, upper = 11)),
42 |     "ml_r" = paradox::ps(
43 |       num.trees = paradox::p_int(lower = 10, upper = 11)))
44 | 
45 |   msg = paste0("Learner ml_g was renamed to ml_l.")
46 |   expect_warning(dml_obj$tune(par_grids),
47 |     regexp = msg)
48 | 
49 |   tune_settings = list(
50 |     n_folds_tune = 5,
51 |     rsmp_tune = mlr3::rsmp("cv", folds = 5),
52 |     measure = list(ml_g = "regr.mse", ml_m = "regr.mae"),
53 |     terminator = mlr3tuning::trm("evals", n_evals = 20),
54 |     algorithm = mlr3tuning::tnr("grid_search"),
55 |     resolution = 5)
56 |   expect_warning(dml_obj$tune(par_grids, tune_settings = tune_settings),
57 |     regexp = msg)
58 | })
59 | 
60 | test_that("Unit tests of exception handling for DoubleMLPLIV", {
61 |   set.seed(3141)
62 |   dml_data_pliv = make_pliv_CHS2015(n_obs = 51, dim_z = 1)
63 |   ml_l = lrn("regr.ranger")
64 |   ml_m = lrn("regr.ranger")
65 |   ml_r = lrn("regr.ranger")
66 |   ml_g = lrn("regr.ranger")
67 | 
68 | 
69 |   msg = paste0(
70 |     "A learner ml_g has been provided for ",
71 |     "score = 'partialling out' but will be ignored.")
72 |   expect_warning(DoubleMLPLIV$new(dml_data_pliv,
73 |     ml_l = ml_l, ml_m = ml_m, ml_r = ml_r,
74 |     ml_g = ml_g,
75 |     score = "partialling out"),
76 |   regexp = msg)
77 | })
78 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_one_way_cluster.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLIV with one-way clustering")
  2 | 
  3 | lgr::get_logger("mlr3")$set_threshold("warn")
  4 | 
  5 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  6 | if (on_cran) {
  7 |   test_cases = expand.grid(
  8 |     learner = "regr.lm",
  9 |     dml_procedure = "dml1",
 10 |     score = "partialling out",
 11 |     stringsAsFactors = FALSE)
 12 | } else {
 13 |   test_cases = expand.grid(
 14 |     learner = c("regr.lm", "regr.glmnet"),
 15 |     dml_procedure = c("dml1", "dml2"),
 16 |     score = c("partialling out", "IV-type"),
 17 |     stringsAsFactors = FALSE)
 18 | }
 19 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 20 | 
 21 | set.seed(1234)
 22 | N = 25 # number of observations (first dimension)
 23 | M = 25 # number of observations (second dimension)
 24 | dim_x = 100 # dimension of x
 25 | data_one_way = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x,
 26 |   omega_X = c(0.25, 0),
 27 |   omega_epsilon = c(0.25, 0),
 28 |   omega_v = c(0.25, 0),
 29 |   omega_V = c(0.25, 0))
 30 | data_one_way$cluster_cols = "cluster_var_i"
 31 | 
 32 | patrick::with_parameters_test_that("Unit tests for PLIV with one-way clustering:",
 33 |   .cases = test_cases, {
 34 |     learner_pars = get_default_mlmethod_pliv(learner)
 35 | 
 36 |     n_folds = 2
 37 |     n_rep = 2
 38 |     set.seed(3141)
 39 |     if (score == "IV-type") {
 40 |       ml_g = learner_pars$ml_g$clone()
 41 |     } else {
 42 |       ml_g = NULL
 43 |     }
 44 |     double_mlpliv_obj = DoubleMLPLIV$new(
 45 |       data = data_one_way,
 46 |       n_folds = n_folds,
 47 |       n_rep = n_rep,
 48 |       ml_l = learner_pars$ml_l$clone(),
 49 |       ml_m = learner_pars$ml_m$clone(),
 50 |       ml_r = learner_pars$ml_r$clone(),
 51 |       ml_g = ml_g,
 52 |       dml_procedure = dml_procedure,
 53 |       score = score)
 54 | 
 55 |     set.seed(3141)
 56 |     double_mlpliv_obj$fit()
 57 |     theta_obj = double_mlpliv_obj$coef
 58 |     se_obj = double_mlpliv_obj$se
 59 | 
 60 |     set.seed(3141)
 61 |     if (score == "IV-type") {
 62 |       ml_g = learner_pars$ml_g$clone()
 63 |     } else {
 64 |       ml_g = NULL
 65 |     }
 66 |     df = as.data.frame(data_one_way$data)
 67 |     cluster_var = df$cluster_var_i
 68 |     # need to drop variables as x is not explicitly set
 69 |     df = df[, !(names(df) %in% c("cluster_var_i", "cluster_var_j"))]
 70 |     pliv_hat = dml_pliv(df,
 71 |       y = "Y", d = "D", z = "Z",
 72 |       n_folds = n_folds,
 73 |       ml_l = learner_pars$ml_l$clone(),
 74 |       ml_m = learner_pars$ml_m$clone(),
 75 |       ml_r = learner_pars$ml_r$clone(),
 76 |       ml_g = ml_g,
 77 |       dml_procedure = dml_procedure, score = score,
 78 |       smpls = double_mlpliv_obj$smpls,
 79 |       n_rep = n_rep)
 80 | 
 81 |     thetas = rep(NA_real_, n_rep)
 82 |     ses = rep(NA_real_, n_rep)
 83 |     for (i_rep in 1:n_rep) {
 84 |       this_smpl = double_mlpliv_obj$smpls[[i_rep]]
 85 |       residuals = compute_pliv_residuals(df,
 86 |         y = "Y", d = "D", z = "Z",
 87 |         n_folds = n_folds,
 88 |         smpls = this_smpl,
 89 |         all_preds = pliv_hat$all_preds[[i_rep]])
 90 |       y_minus_l_hat = residuals$y_minus_l_hat
 91 |       d_minus_r_hat = residuals$d_minus_r_hat
 92 |       z_minus_m_hat = residuals$z_minus_m_hat
 93 |       y_minus_g_hat = residuals$y_minus_g_hat
 94 |       D = df[, "D"]
 95 | 
 96 |       if (score == "partialling out") psi_a = -z_minus_m_hat * d_minus_r_hat
 97 |       if (score == "IV-type") psi_a = -D * z_minus_m_hat
 98 |       if (dml_procedure == "dml2") {
 99 |         if (score == "partialling out") psi_b = z_minus_m_hat * y_minus_l_hat
100 |         if (score == "IV-type") psi_b = z_minus_m_hat * y_minus_g_hat
101 |         theta = est_one_way_cluster_dml2(
102 |           psi_a, psi_b,
103 |           cluster_var,
104 |           this_smpl)
105 |       } else {
106 |         theta = pliv_hat$thetas[i_rep]
107 |       }
108 |       if (score == "partialling out") psi = (y_minus_l_hat - d_minus_r_hat * theta) * z_minus_m_hat
109 |       if (score == "IV-type") psi = (y_minus_g_hat - D * theta) * z_minus_m_hat
110 |       var = var_one_way_cluster(
111 |         psi, psi_a,
112 |         cluster_var,
113 |         this_smpl)
114 |       ses[i_rep] = sqrt(var)
115 |       thetas[i_rep] = theta
116 |     }
117 | 
118 | 
119 |     theta = stats::median(thetas)
120 |     var_scaling_factor = length(unique(cluster_var))
121 |     se = se_repeated(
122 |       ses * sqrt(var_scaling_factor),
123 |       thetas, theta) / sqrt(var_scaling_factor)
124 |     names(theta) = "D"
125 |     names(se) = "D"
126 | 
127 |     # at the moment the object result comes without a name
128 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
129 |     expect_equal(se, se_obj, tolerance = 1e-8)
130 |   }
131 | )
132 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_partial_functional_initializer.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLIV, partialling out X, Z, XZ")
  2 | 
  3 | lgr::get_logger("mlr3")$set_threshold("warn")
  4 | 
  5 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  6 | if (on_cran) {
  7 |   test_cases = expand.grid(
  8 |     learner = "regr.lm",
  9 |     dml_procedure = "dml2",
 10 |     score = "partialling out",
 11 |     stringsAsFactors = FALSE)
 12 | } else {
 13 |   test_cases = expand.grid(
 14 |     learner = c("regr.lm", "regr.cv_glmnet"),
 15 |     dml_procedure = c("dml1", "dml2"),
 16 |     score = "partialling out",
 17 |     stringsAsFactors = FALSE)
 18 | }
 19 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 20 | 
 21 | patrick::with_parameters_test_that("Unit tests for PLIV (partialX functional initialization):",
 22 |   .cases = test_cases, {
 23 |     learner_pars = get_default_mlmethod_pliv(learner)
 24 |     df = data_pliv$df
 25 |     Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE]
 26 |     data_ml = double_ml_data_from_data_frame(df,
 27 |       y_col = "y",
 28 |       d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2"))
 29 | 
 30 |     # Partial out X (default PLIV)
 31 |     set.seed(3141)
 32 |     double_mlpliv_obj = DoubleMLPLIV$new(data_ml,
 33 |       n_folds = 5,
 34 |       ml_l = learner_pars$ml_l$clone(),
 35 |       ml_m = learner_pars$ml_m$clone(),
 36 |       ml_r = learner_pars$ml_r$clone(),
 37 |       dml_procedure = dml_procedure,
 38 |       score = score)
 39 | 
 40 |     double_mlpliv_obj$fit()
 41 |     theta_obj = double_mlpliv_obj$coef
 42 |     se_obj = double_mlpliv_obj$se
 43 | 
 44 |     # Partial out X
 45 |     set.seed(3141)
 46 |     double_mlpliv_partX = DoubleMLPLIV.partialX(data_ml,
 47 |       n_folds = 5,
 48 |       ml_l = learner_pars$ml_l$clone(),
 49 |       ml_m = learner_pars$ml_m$clone(),
 50 |       ml_r = learner_pars$ml_r$clone(),
 51 |       dml_procedure = dml_procedure,
 52 |       score = score)
 53 | 
 54 |     double_mlpliv_partX$fit()
 55 |     theta_partX = double_mlpliv_partX$coef
 56 |     se_partX = double_mlpliv_partX$se
 57 | 
 58 |     expect_equal(theta_partX, theta_obj, tolerance = 1e-8)
 59 |     expect_equal(se_partX, se_obj, tolerance = 1e-8)
 60 |   }
 61 | )
 62 | 
 63 | patrick::with_parameters_test_that("Unit tests for PLIV (partialZ functional initialization):",
 64 |   .cases = test_cases, {
 65 |     learner_pars = get_default_mlmethod_pliv(learner)
 66 |     df = data_pliv$df
 67 |     Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE]
 68 |     data_ml = double_ml_data_from_data_frame(df,
 69 |       y_col = "y",
 70 |       d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2"))
 71 |     # Partial out Z
 72 |     set.seed(3141)
 73 |     double_mlpliv_partZ = DoubleMLPLIV$new(data_ml,
 74 |       n_folds = 5,
 75 |       ml_l = NULL,
 76 |       ml_m = NULL,
 77 |       ml_r = learner_pars$ml_r$clone(),
 78 |       dml_procedure = dml_procedure,
 79 |       score = score,
 80 |       partialX = FALSE, partialZ = TRUE)
 81 | 
 82 |     double_mlpliv_partZ$fit()
 83 |     theta_partZ = double_mlpliv_partZ$coef
 84 |     se_partZ = double_mlpliv_partZ$se
 85 | 
 86 |     set.seed(3141)
 87 |     double_mlpliv_partZ_fun = DoubleMLPLIV.partialZ(data_ml,
 88 |       n_folds = 5,
 89 |       ml_r = learner_pars$ml_r$clone(),
 90 |       dml_procedure = dml_procedure,
 91 |       score = score)
 92 | 
 93 |     double_mlpliv_partZ_fun$fit()
 94 |     theta_partZ_fun = double_mlpliv_partZ_fun$coef
 95 |     se_partZ_fun = double_mlpliv_partZ_fun$se
 96 | 
 97 |     expect_equal(theta_partZ, theta_partZ_fun, tolerance = 1e-8)
 98 |     expect_equal(se_partZ, se_partZ_fun, tolerance = 1e-8)
 99 |   }
100 | )
101 | 
102 | patrick::with_parameters_test_that("Unit tests for PLIV (partialXZ functional initialization):",
103 |   .cases = test_cases, {
104 |     learner_pars = get_default_mlmethod_pliv(learner)
105 |     df = data_pliv$df
106 |     Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE]
107 |     data_ml = double_ml_data_from_data_frame(df,
108 |       y_col = "y",
109 |       d_cols = "d", x_cols = Xnames, z_cols = c("z", "z2"))
110 | 
111 |     set.seed(3141)
112 |     double_mlpliv_partXZ = DoubleMLPLIV$new(data_ml,
113 |       n_folds = 5,
114 |       ml_l = learner_pars$ml_l$clone(),
115 |       ml_m = learner_pars$ml_m$clone(),
116 |       ml_r = learner_pars$ml_r$clone(),
117 |       dml_procedure = dml_procedure,
118 |       score = score,
119 |       partialX = TRUE, partialZ = TRUE)
120 | 
121 |     double_mlpliv_partXZ$fit()
122 |     theta_partXZ = double_mlpliv_partXZ$coef
123 |     se_partXZ = double_mlpliv_partXZ$se
124 | 
125 |     set.seed(3141)
126 |     double_mlpliv_partXZ_fun = DoubleMLPLIV.partialXZ(data_ml,
127 |       n_folds = 5,
128 |       ml_l = learner_pars$ml_l$clone(),
129 |       ml_m = learner_pars$ml_m$clone(),
130 |       ml_r = learner_pars$ml_r$clone(),
131 |       dml_procedure = dml_procedure,
132 |       score = score)
133 | 
134 |     double_mlpliv_partXZ_fun$fit()
135 |     theta_partXZ_fun = double_mlpliv_partXZ_fun$coef
136 |     se_partXZ_fun = double_mlpliv_partXZ_fun$se
137 | 
138 |     expect_equal(theta_partXZ, theta_partXZ_fun, tolerance = 1e-8)
139 |     expect_equal(se_partXZ, se_partXZ_fun, tolerance = 1e-8)
140 |   }
141 | )
142 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_partial_functional_initializer_IVtype.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV, partialling out X, Z, XZ")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 6 | if (on_cran) {
 7 |   test_cases = expand.grid(
 8 |     learner = "regr.lm",
 9 |     dml_procedure = "dml2",
10 |     score = "IV-type",
11 |     stringsAsFactors = FALSE)
12 | } else {
13 |   test_cases = expand.grid(
14 |     learner = c("regr.lm", "regr.cv_glmnet"),
15 |     dml_procedure = c("dml1", "dml2"),
16 |     score = "IV-type",
17 |     stringsAsFactors = FALSE)
18 | }
19 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
20 | 
21 | patrick::with_parameters_test_that("Unit tests for PLIV (partialX functional initialization):",
22 |   .cases = test_cases, {
23 |     learner_pars = get_default_mlmethod_pliv(learner)
24 |     df = data_pliv$df
25 |     Xnames = names(df)[names(df) %in% c("y", "d", "z", "z2") == FALSE]
26 |     data_ml = double_ml_data_from_data_frame(df,
27 |       y_col = "y",
28 |       d_cols = "d", x_cols = Xnames, z_cols = "z")
29 | 
30 |     # Partial out X (default PLIV)
31 |     set.seed(3141)
32 |     double_mlpliv_obj = DoubleMLPLIV$new(data_ml,
33 |       n_folds = 5,
34 |       ml_l = learner_pars$ml_l$clone(),
35 |       ml_m = learner_pars$ml_m$clone(),
36 |       ml_r = learner_pars$ml_r$clone(),
37 |       ml_g = learner_pars$ml_g$clone(),
38 |       dml_procedure = dml_procedure,
39 |       score = score)
40 | 
41 |     double_mlpliv_obj$fit()
42 |     theta_obj = double_mlpliv_obj$coef
43 |     se_obj = double_mlpliv_obj$se
44 | 
45 |     # Partial out X
46 |     set.seed(3141)
47 |     double_mlpliv_partX = DoubleMLPLIV.partialX(data_ml,
48 |       n_folds = 5,
49 |       ml_l = learner_pars$ml_l$clone(),
50 |       ml_m = learner_pars$ml_m$clone(),
51 |       ml_r = learner_pars$ml_r$clone(),
52 |       ml_g = learner_pars$ml_g$clone(),
53 |       dml_procedure = dml_procedure,
54 |       score = score)
55 | 
56 |     double_mlpliv_partX$fit()
57 |     theta_partX = double_mlpliv_partX$coef
58 |     se_partX = double_mlpliv_partX$se
59 | 
60 |     expect_equal(theta_partX, theta_obj, tolerance = 1e-8)
61 |     expect_equal(se_partX, se_obj, tolerance = 1e-8)
62 |   }
63 | )
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_partial_x.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV.partialX")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | skip_on_cran()
 6 | 
 7 | test_cases = expand.grid(
 8 |   learner = c("regr.lm", "regr.glmnet"),
 9 |   dml_procedure = c("dml1", "dml2"),
10 |   score = "partialling out",
11 |   stringsAsFactors = FALSE)
12 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
13 | 
14 | patrick::with_parameters_test_that("Unit tests for PLIV.partialX:",
15 |   .cases = test_cases, {
16 |     learner_pars = get_default_mlmethod_pliv(learner)
17 |     n_rep_boot = 498
18 | 
19 |     set.seed(3141)
20 |     dim_z = 5
21 |     pliv_hat = dml_pliv_partial_x(data_pliv_partialX$df,
22 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
23 |       n_folds = 5,
24 |       ml_l = learner_pars$ml_l$clone(),
25 |       ml_m = learner_pars$ml_m$clone(),
26 |       ml_r = learner_pars$ml_r$clone(),
27 |       dml_procedure = dml_procedure, score = score)
28 |     theta = pliv_hat$coef
29 |     se = pliv_hat$se
30 | 
31 |     set.seed(3141)
32 |     boot_theta = bootstrap_pliv_partial_x(pliv_hat$thetas, pliv_hat$ses,
33 |       data_pliv_partialX$df,
34 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
35 |       n_folds = 5, smpls = pliv_hat$smpls,
36 |       all_preds = pliv_hat$all_preds,
37 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
38 | 
39 |     set.seed(3141)
40 |     double_mlpliv_obj = DoubleMLPLIV.partialX(data_pliv_partialX$dml_data,
41 |       ml_l = learner_pars$ml_l$clone(),
42 |       ml_m = learner_pars$ml_m$clone(),
43 |       ml_r = learner_pars$ml_r$clone(),
44 |       n_folds = 5,
45 |       score = score,
46 |       dml_procedure = dml_procedure)
47 | 
48 |     double_mlpliv_obj$fit()
49 |     theta_obj = double_mlpliv_obj$coef
50 |     se_obj = double_mlpliv_obj$se
51 | 
52 |     # bootstrap
53 |     set.seed(3141)
54 |     double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
55 |     boot_theta_obj = double_mlpliv_obj$boot_coef
56 | 
57 |     # at the moment the object result comes without a name
58 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
59 |     expect_equal(se, se_obj, tolerance = 1e-8)
60 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
61 |   }
62 | )
63 | 
64 | test_that("Unit tests for PLIV.partialX invalid score", {
65 |   msg = paste(
66 |     "Callable score not implemented for DoubleMLPLIV with",
67 |     "partialX=TRUE and partialZ=FALSE with several instruments.")
68 |   double_mlplr_obj = DoubleMLPLIV.partialX(
69 |     data_pliv_partialX$dml_data,
70 |     ml_l = mlr3::lrn("regr.rpart"),
71 |     ml_m = mlr3::lrn("regr.rpart"),
72 |     ml_r = mlr3::lrn("regr.rpart"),
73 |     score = function(x) {
74 |       return(mean(x))
75 |     }
76 |   )
77 |   expect_error(double_mlplr_obj$fit(),
78 |     regexp = msg)
79 | })
80 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_partial_xz.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV.partialXZ")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | skip_on_cran()
 8 | 
 9 | test_cases = expand.grid(
10 |   learner = c("regr.lm", "regr.glmnet"),
11 |   dml_procedure = c("dml1", "dml2"),
12 |   score = "partialling out",
13 |   stringsAsFactors = FALSE)
14 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
15 | 
16 | patrick::with_parameters_test_that("Unit tests for PLIV.partialXZ:",
17 |   .cases = test_cases, {
18 |     learner_pars = get_default_mlmethod_pliv(learner)
19 |     n_rep_boot = 498
20 | 
21 |     set.seed(3141)
22 |     dim_z = 150
23 |     pliv_hat = dml_pliv_partial_xz(data_pliv_partialXZ$df,
24 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
25 |       n_folds = 5,
26 |       ml_l = learner_pars$ml_l$clone(),
27 |       ml_m = learner_pars$ml_m$clone(),
28 |       ml_r = learner_pars$ml_r$clone(),
29 |       dml_procedure = dml_procedure, score = score)
30 |     theta = pliv_hat$coef
31 |     se = pliv_hat$se
32 | 
33 |     boot_theta = bootstrap_pliv_partial_xz(pliv_hat$thetas, pliv_hat$ses,
34 |       data_pliv_partialXZ$df,
35 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
36 |       n_folds = 5, smpls = pliv_hat$smpls,
37 |       all_preds = pliv_hat$all_preds,
38 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
39 | 
40 |     set.seed(3141)
41 |     double_mlpliv_obj = DoubleMLPLIV.partialXZ(data_pliv_partialXZ$dml_data,
42 |       ml_l = learner_pars$ml_l$clone(),
43 |       ml_m = learner_pars$ml_m$clone(),
44 |       ml_r = learner_pars$ml_r$clone(),
45 |       n_folds = 5,
46 |       score = score,
47 |       dml_procedure = dml_procedure)
48 | 
49 |     double_mlpliv_obj$fit(store_predictions = TRUE)
50 |     theta_obj = double_mlpliv_obj$coef
51 |     se_obj = double_mlpliv_obj$se
52 | 
53 |     # bootstrap
54 |     double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
55 |     boot_theta_obj = double_mlpliv_obj$boot_coef
56 | 
57 |     # at the moment the object result comes without a name
58 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
59 |     expect_equal(se, se_obj, tolerance = 1e-8)
60 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
61 |   }
62 | )
63 | 
64 | test_that("Unit tests for PLIV.partialXZ invalid score", {
65 |   msg = paste(
66 |     "Callable score not implemented for DoubleMLPLIV with",
67 |     "partialX=TRUE and partialZ=TRUE.")
68 |   double_mlplr_obj = DoubleMLPLIV.partialXZ(
69 |     data_pliv_partialXZ$dml_data,
70 |     ml_l = mlr3::lrn("regr.rpart"),
71 |     ml_m = mlr3::lrn("regr.rpart"),
72 |     ml_r = mlr3::lrn("regr.rpart"),
73 |     score = function(x) {
74 |       return(mean(x))
75 |     }
76 |   )
77 |   expect_error(double_mlplr_obj$fit(),
78 |     regexp = msg)
79 | })
80 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_partial_z.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV.partialZ")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | skip_on_cran()
 6 | 
 7 | test_cases = expand.grid(
 8 |   learner = c("regr.lm", "regr.glmnet"),
 9 |   dml_procedure = c("dml1", "dml2"),
10 |   score = "partialling out",
11 |   stringsAsFactors = FALSE)
12 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
13 | 
14 | patrick::with_parameters_test_that("Unit tests for PLIV.partialZ:",
15 |   .cases = test_cases, {
16 |     learner_pars = get_default_mlmethod_pliv(learner)
17 |     n_rep_boot = 498
18 | 
19 |     set.seed(3141)
20 |     dim_z = 150
21 |     pliv_hat = dml_pliv_partial_z(data_pliv_partialZ$df,
22 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
23 |       n_folds = 5,
24 |       ml_r = learner_pars$ml_r$clone(),
25 |       dml_procedure = dml_procedure, score = score)
26 |     theta = pliv_hat$coef
27 |     se = pliv_hat$se
28 | 
29 |     boot_theta = bootstrap_pliv_partial_z(pliv_hat$thetas, pliv_hat$ses,
30 |       data_pliv_partialZ$df,
31 |       y = "y", d = "d", z = paste0("Z", 1:dim_z),
32 |       n_folds = 5, smpls = pliv_hat$smpls,
33 |       all_preds = pliv_hat$all_preds,
34 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
35 | 
36 |     set.seed(3141)
37 |     double_mlpliv_obj = DoubleMLPLIV.partialZ(data_pliv_partialZ$dml_data,
38 |       ml_r = learner_pars$ml_r$clone(),
39 |       n_folds = 5,
40 |       score = score,
41 |       dml_procedure = dml_procedure)
42 | 
43 |     double_mlpliv_obj$fit()
44 |     theta_obj = double_mlpliv_obj$coef
45 |     se_obj = double_mlpliv_obj$se
46 | 
47 |     # bootstrap
48 |     double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
49 |     boot_theta_obj = double_mlpliv_obj$boot_coef
50 | 
51 |     # at the moment the object result comes without a name
52 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
53 |     expect_equal(se, se_obj, tolerance = 1e-8)
54 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
55 |   }
56 | )
57 | 
58 | test_that("Unit tests for PLIV.partialZ invalid score", {
59 |   msg = paste(
60 |     "Callable score not implemented for DoubleMLPLIV with",
61 |     "partialX=FALSE and partialZ=TRUE.")
62 |   double_mlplr_obj = DoubleMLPLIV.partialZ(
63 |     data_pliv_partialZ$dml_data,
64 |     ml_r = mlr3::lrn("regr.rpart"),
65 |     score = function(x) {
66 |       return(mean(x))
67 |     }
68 |   )
69 |   expect_error(double_mlplr_obj$fit(),
70 |     regexp = msg)
71 | })
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_two_way_cluster.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLIV with two-way clustering")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     learner = "regr.lm",
 11 |     dml_procedure = "dml1",
 12 |     score = "partialling out",
 13 |     stringsAsFactors = FALSE)
 14 | } else {
 15 |   test_cases = expand.grid(
 16 |     learner = c("regr.lm", "regr.glmnet"),
 17 |     dml_procedure = c("dml1", "dml2"),
 18 |     score = c("partialling out", "IV-type"),
 19 |     stringsAsFactors = FALSE)
 20 | }
 21 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 22 | 
 23 | set.seed(1234)
 24 | N = 25 # number of observations (first dimension)
 25 | M = 25 # number of observations (second dimension)
 26 | dim_x = 100 # dimension of x
 27 | data_two_way = make_pliv_multiway_cluster_CKMS2021(N, M, dim_x)
 28 | 
 29 | patrick::with_parameters_test_that("Unit tests for PLIV with two-way clustering:",
 30 |   .cases = test_cases, {
 31 |     learner_pars = get_default_mlmethod_pliv(learner)
 32 | 
 33 |     set.seed(3141)
 34 |     if (score == "IV-type") {
 35 |       ml_g = learner_pars$ml_g$clone()
 36 |     } else {
 37 |       ml_g = NULL
 38 |     }
 39 |     double_mlpliv_obj = DoubleMLPLIV$new(
 40 |       data = data_two_way,
 41 |       n_folds = 2,
 42 |       ml_l = learner_pars$ml_l$clone(),
 43 |       ml_m = learner_pars$ml_m$clone(),
 44 |       ml_r = learner_pars$ml_r$clone(),
 45 |       ml_g = ml_g,
 46 |       dml_procedure = dml_procedure,
 47 |       score = score)
 48 | 
 49 |     set.seed(3141)
 50 |     double_mlpliv_obj$fit()
 51 |     theta_obj = double_mlpliv_obj$coef
 52 |     se_obj = double_mlpliv_obj$se
 53 | 
 54 |     set.seed(3141)
 55 |     df = as.data.frame(data_two_way$data)
 56 |     cluster_var1 = df$cluster_var_i
 57 |     cluster_var2 = df$cluster_var_j
 58 |     # need to drop variables as x is not explicitly set
 59 |     df = df[, !(names(df) %in% c("cluster_var_i", "cluster_var_j"))]
 60 |     if (score == "IV-type") {
 61 |       ml_g = learner_pars$ml_g$clone()
 62 |     } else {
 63 |       ml_g = NULL
 64 |     }
 65 |     pliv_hat = dml_pliv(df,
 66 |       y = "Y", d = "D", z = "Z",
 67 |       n_folds = 4,
 68 |       ml_l = learner_pars$ml_l$clone(),
 69 |       ml_m = learner_pars$ml_m$clone(),
 70 |       ml_r = learner_pars$ml_r$clone(),
 71 |       ml_g = ml_g,
 72 |       dml_procedure = dml_procedure, score = score,
 73 |       smpls = double_mlpliv_obj$smpls)
 74 | 
 75 | 
 76 |     this_smpl = double_mlpliv_obj$smpls[[1]]
 77 |     residuals = compute_pliv_residuals(df,
 78 |       y = "Y", d = "D", z = "Z",
 79 |       n_folds = 4,
 80 |       smpls = this_smpl,
 81 |       all_preds = pliv_hat$all_preds[[1]])
 82 |     y_minus_l_hat = residuals$y_minus_l_hat
 83 |     d_minus_r_hat = residuals$d_minus_r_hat
 84 |     z_minus_m_hat = residuals$z_minus_m_hat
 85 |     y_minus_g_hat = residuals$y_minus_g_hat
 86 |     D = df[, "D"]
 87 | 
 88 |     if (score == "partialling out") psi_a = -z_minus_m_hat * d_minus_r_hat
 89 |     if (score == "IV-type") psi_a = -D * z_minus_m_hat
 90 |     if (dml_procedure == "dml2") {
 91 |       if (score == "partialling out") psi_b = z_minus_m_hat * y_minus_l_hat
 92 |       if (score == "IV-type") psi_b = z_minus_m_hat * y_minus_g_hat
 93 |       theta = est_two_way_cluster_dml2(
 94 |         psi_a, psi_b,
 95 |         cluster_var1,
 96 |         cluster_var2,
 97 |         this_smpl)
 98 |     } else {
 99 |       theta = pliv_hat$coef
100 |     }
101 |     if (score == "partialling out") psi = (y_minus_l_hat - d_minus_r_hat * theta) * z_minus_m_hat
102 |     if (score == "IV-type") psi = (y_minus_g_hat - D * theta) * z_minus_m_hat
103 |     var = var_two_way_cluster(
104 |       psi, psi_a,
105 |       cluster_var1,
106 |       cluster_var2,
107 |       this_smpl)
108 |     se = sqrt(var)
109 |     names(theta) = "D"
110 |     names(se) = "D"
111 | 
112 |     # at the moment the object result comes without a name
113 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
114 |     expect_equal(se, se_obj, tolerance = 1e-8)
115 |   }
116 | )
117 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_pliv_user_score.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLIV, callable score")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | score_fct_po = function(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls) {
 8 |   u_hat = y - l_hat
 9 |   w_hat = d - r_hat
10 |   v_hat = z - m_hat
11 |   psi_a = -w_hat * v_hat
12 |   psi_b = v_hat * u_hat
13 |   psis = list(
14 |     psi_a = psi_a,
15 |     psi_b = psi_b)
16 | }
17 | 
18 | score_fct_iv = function(y, z, d, l_hat, m_hat, r_hat, g_hat, smpls) {
19 |   v_hat = z - m_hat
20 |   psi_a = -d * v_hat
21 |   psi_b = v_hat * (y - g_hat)
22 |   psis = list(
23 |     psi_a = psi_a,
24 |     psi_b = psi_b)
25 | }
26 | 
27 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
28 | if (on_cran) {
29 |   test_cases = expand.grid(
30 |     learner = "regr.lm",
31 |     dml_procedure = "dml2",
32 |     score = "partialling out",
33 |     stringsAsFactors = FALSE)
34 | } else {
35 |   test_cases = expand.grid(
36 |     learner = c("regr.lm", "regr.glmnet"),
37 |     dml_procedure = c("dml1", "dml2"),
38 |     score = c("partialling out", "IV-type"),
39 |     stringsAsFactors = FALSE)
40 | }
41 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
42 | 
43 | patrick::with_parameters_test_that("Unit tests for PLIV, callable score:",
44 |   .cases = test_cases, {
45 |     n_rep_boot = 498
46 | 
47 |     if (score == "partialling out") {
48 |       score_fct = score_fct_po
49 |       ml_g = NULL
50 |     } else if (score == "IV-type") {
51 |       score_fct = score_fct_iv
52 |       ml_g = lrn(learner)
53 |     }
54 | 
55 |     set.seed(3141)
56 |     double_mlpliv_obj = DoubleMLPLIV$new(
57 |       data = data_pliv$dml_data,
58 |       n_folds = 5,
59 |       ml_l = lrn(learner),
60 |       ml_m = lrn(learner),
61 |       ml_r = lrn(learner),
62 |       ml_g = ml_g,
63 |       dml_procedure = dml_procedure,
64 |       score = score)
65 | 
66 |     double_mlpliv_obj$fit()
67 |     theta_obj = double_mlpliv_obj$coef
68 |     se_obj = double_mlpliv_obj$se
69 | 
70 |     double_mlpliv_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
71 |     boot_theta_obj = double_mlpliv_obj$boot_coef
72 | 
73 |     set.seed(3141)
74 |     double_mlpliv_obj_score = DoubleMLPLIV$new(
75 |       data = data_pliv$dml_data,
76 |       n_folds = 5,
77 |       ml_l = lrn(learner),
78 |       ml_m = lrn(learner),
79 |       ml_r = lrn(learner),
80 |       ml_g = ml_g,
81 |       dml_procedure = dml_procedure,
82 |       score = score_fct)
83 | 
84 |     double_mlpliv_obj_score$fit()
85 |     theta_obj_score = double_mlpliv_obj_score$coef
86 |     se_obj_score = double_mlpliv_obj_score$se
87 | 
88 |     double_mlpliv_obj_score$bootstrap(method = "normal", n_rep = n_rep_boot)
89 |     boot_theta_score = double_mlpliv_obj_score$boot_coef
90 | 
91 |     expect_equal(theta_obj, theta_obj_score, tolerance = 1e-8)
92 |     expect_equal(se_obj, se_obj_score, tolerance = 1e-8)
93 |     expect_equal(as.vector(boot_theta_score), as.vector(boot_theta_obj), tolerance = 1e-8)
94 |   }
95 | )
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLR")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "regr.lm",
11 |     dml_procedure = "dml2",
12 |     score = "partialling out",
13 |     stringsAsFactors = FALSE)
14 | } else {
15 |   test_cases = expand.grid(
16 |     learner = c("regr.lm", "regr.cv_glmnet", "graph_learner"),
17 |     dml_procedure = c("dml1", "dml2"),
18 |     score = c("IV-type", "partialling out"),
19 |     stringsAsFactors = FALSE)
20 | }
21 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
22 | 
23 | patrick::with_parameters_test_that("Unit tests for PLR:",
24 |   .cases = test_cases, {
25 |     learner_pars = get_default_mlmethod_plr(learner)
26 |     n_rep_boot = 498
27 | 
28 |     set.seed(3141)
29 |     n_folds = 5
30 |     plr_hat = dml_plr(data_plr$df,
31 |       y = "y", d = "d",
32 |       n_folds = n_folds,
33 |       ml_l = learner_pars$ml_l$clone(),
34 |       ml_m = learner_pars$ml_m$clone(),
35 |       ml_g = learner_pars$ml_g$clone(),
36 |       dml_procedure = dml_procedure, score = score)
37 |     theta = plr_hat$coef
38 |     se = plr_hat$se
39 |     t = plr_hat$t
40 |     pval = plr_hat$pval
41 |     # ci = confint(plr_hat, level = 0.95, joint = FALSE)
42 | 
43 |     boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses,
44 |       data_plr$df,
45 |       y = "y", d = "d",
46 |       n_folds = n_folds, smpls = plr_hat$smpls,
47 |       all_preds = plr_hat$all_preds,
48 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
49 |       score = score)$boot_coef
50 | 
51 |     set.seed(3141)
52 |     if (score == "partialling out") {
53 |       double_mlplr_obj = DoubleMLPLR$new(
54 |         data = data_plr$dml_data,
55 |         ml_l = learner_pars$ml_g$clone(),
56 |         ml_m = learner_pars$ml_m$clone(),
57 |         dml_procedure = dml_procedure,
58 |         n_folds = n_folds,
59 |         score = score)
60 |     } else {
61 |       double_mlplr_obj = DoubleMLPLR$new(
62 |         data = data_plr$dml_data,
63 |         ml_l = learner_pars$ml_l$clone(),
64 |         ml_m = learner_pars$ml_m$clone(),
65 |         ml_g = learner_pars$ml_g$clone(),
66 |         dml_procedure = dml_procedure,
67 |         n_folds = n_folds,
68 |         score = score)
69 |     }
70 | 
71 |     double_mlplr_obj$fit()
72 |     theta_obj = double_mlplr_obj$coef
73 |     se_obj = double_mlplr_obj$se
74 |     t_obj = double_mlplr_obj$t_stat
75 |     pval_obj = double_mlplr_obj$pval
76 |     # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
77 | 
78 |     # bootstrap
79 |     double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
80 |     boot_theta_obj = double_mlplr_obj$boot_coef
81 | 
82 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
83 |     expect_equal(se, se_obj, tolerance = 1e-8)
84 |     expect_equal(t, t_obj, tolerance = 1e-8)
85 |     expect_equal(pval, pval_obj, tolerance = 1e-8)
86 |     # expect_equal(ci, ci_obj, tolerance = 1e-8)
87 | 
88 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
89 |   }
90 | )
91 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_classifier.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLR with a classifier for ml_m")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     l_learner = c("regr.rpart", "classif.rpart"),
 11 |     m_learner = "classif.rpart",
 12 |     g_learner = "regr.rpart",
 13 |     dml_procedure = "dml2",
 14 |     score = "partialling out",
 15 |     stringsAsFactors = FALSE)
 16 | } else {
 17 |   test_cases = expand.grid(
 18 |     l_learner = c("regr.rpart", "classif.rpart"),
 19 |     m_learner = "classif.cv_glmnet",
 20 |     g_learner = "regr.cv_glmnet",
 21 |     dml_procedure = c("dml1", "dml2"),
 22 |     score = c("IV-type", "partialling out"),
 23 |     stringsAsFactors = FALSE)
 24 | }
 25 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 26 | 
 27 | patrick::with_parameters_test_that("Unit tests for PLR with classifier for ml_m:",
 28 |   .cases = test_cases, {
 29 |     n_rep_boot = 498
 30 |     n_folds = 3
 31 | 
 32 |     ml_l = mlr3::lrn(l_learner)
 33 |     ml_m = mlr3::lrn(m_learner)
 34 |     ml_g = mlr3::lrn(g_learner)
 35 | 
 36 |     if (ml_l$task_type == "regr") {
 37 | 
 38 |       set.seed(3141)
 39 |       if (score == "IV-type") {
 40 |         ml_g = ml_g$clone()
 41 |       } else {
 42 |         ml_g = NULL
 43 |       }
 44 |       plr_hat = dml_plr(data_irm$df,
 45 |         y = "y", d = "d",
 46 |         n_folds = n_folds,
 47 |         ml_l = ml_l$clone(),
 48 |         ml_m = ml_m$clone(),
 49 |         ml_g = ml_g,
 50 |         dml_procedure = dml_procedure, score = score)
 51 |       theta = plr_hat$coef
 52 |       se = plr_hat$se
 53 | 
 54 |       boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses,
 55 |         data_irm$df,
 56 |         y = "y", d = "d",
 57 |         n_folds = n_folds, smpls = plr_hat$smpls,
 58 |         all_preds = plr_hat$all_preds,
 59 |         bootstrap = "normal", n_rep_boot = n_rep_boot,
 60 |         score = score)$boot_coef
 61 | 
 62 |       t = plr_hat$t
 63 |       pval = plr_hat$pval
 64 | 
 65 |       set.seed(3141)
 66 |       if (score == "IV-type") {
 67 |         ml_g = ml_g$clone()
 68 |       } else {
 69 |         ml_g = NULL
 70 |       }
 71 |       double_mlplr_obj = DoubleMLPLR$new(
 72 |         data = data_irm$dml_data,
 73 |         ml_l = ml_l$clone(),
 74 |         ml_m = ml_m$clone(),
 75 |         ml_g = ml_g,
 76 |         dml_procedure = dml_procedure,
 77 |         n_folds = n_folds,
 78 |         score = score)
 79 |       double_mlplr_obj$fit()
 80 |       theta_obj = double_mlplr_obj$coef
 81 |       se_obj = double_mlplr_obj$se
 82 |       t_obj = double_mlplr_obj$t_stat
 83 |       pval_obj = double_mlplr_obj$pval
 84 |       # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
 85 | 
 86 |       # bootstrap
 87 |       double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
 88 |       boot_theta_obj = double_mlplr_obj$boot_coef
 89 | 
 90 |       expect_equal(theta, theta_obj, tolerance = 1e-8)
 91 |       expect_equal(se, se_obj, tolerance = 1e-8)
 92 |       expect_equal(t, t_obj, tolerance = 1e-8)
 93 |       expect_equal(pval, pval_obj, tolerance = 1e-8)
 94 |       # expect_equal(ci, ci_obj, tolerance = 1e-8)
 95 | 
 96 |     } else if (ml_l$task_type == "classif") {
 97 |       msg = "Invalid learner provided for ml_l: 'learner\\$task_type' must be 'regr'"
 98 |       if (score == "IV-type") {
 99 |         ml_g = ml_g$clone()
100 |       } else {
101 |         ml_g = NULL
102 |       }
103 |       expect_error(DoubleMLPLR$new(
104 |         data = data_irm$dml_data,
105 |         ml_l = ml_l$clone(),
106 |         ml_m = ml_m$clone(),
107 |         ml_g = ml_g,
108 |         dml_procedure = dml_procedure,
109 |         n_folds = n_folds,
110 |         score = score),
111 |       regexp = msg)
112 |     }
113 |   }
114 | )
115 | 
116 | test_that("Unit tests for exception handling of PLR with classifier for ml_m:", {
117 |   # Only binary outcome with values 0 and 1 is allowed when ml_m is a classifier
118 | 
119 |   # Test with 0 and 2
120 |   df = data_irm$df
121 |   df["d"] = df["d"] * 2
122 |   dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d")
123 |   double_mlplr_obj = DoubleMLPLR$new(
124 |     data = dml_data,
125 |     ml_l = mlr3::lrn("regr.rpart"),
126 |     ml_m = mlr3::lrn("classif.rpart"))
127 |   msg = paste(
128 |     "Assertion on 'levels\\(data\\[\\[target\\]\\])' failed: .* set \\{'0','1'\\}")
129 |   expect_error(double_mlplr_obj$fit(),
130 |     regexp = msg)
131 | 
132 |   # Test with 0.5 and 1
133 |   df = data_irm$df
134 |   df["d"] = (df["d"] + 2) / 2
135 |   dml_data = double_ml_data_from_data_frame(df, y_col = "y", d_cols = "d")
136 |   double_mlplr_obj = DoubleMLPLR$new(
137 |     data = dml_data,
138 |     ml_l = mlr3::lrn("regr.rpart"),
139 |     ml_m = mlr3::lrn("classif.rpart"))
140 |   msg = paste(
141 |     "Assertion on 'levels\\(data\\[\\[target\\]\\])' failed: .* set \\{'0','1'\\}")
142 |   expect_error(double_mlplr_obj$fit(),
143 |     regexp = msg)
144 | })
145 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_export_preds.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for the export of predictions via fit(store_predictions = TRUE); uses PLR")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     l_learner = "regr.rpart",
 11 |     m_learner = "regr.rpart",
 12 |     g_learner = "regr.rpart",
 13 |     dml_procedure = "dml2",
 14 |     score = "partialling out",
 15 |     stringsAsFactors = FALSE)
 16 | } else {
 17 |   test_cases = expand.grid(
 18 |     l_learner = c("regr.rpart", "regr.lm"),
 19 |     m_learner = c("regr.rpart", "regr.lm"),
 20 |     g_learner = c("regr.rpart", "regr.lm"),
 21 |     dml_procedure = "dml2",
 22 |     score = c("partialling out", "IV-type"),
 23 |     stringsAsFactors = FALSE)
 24 | }
 25 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 26 | 
 27 | patrick::with_parameters_test_that("Unit tests for for the export of predictions:",
 28 |   .cases = test_cases, {
 29 |     n_folds = 3
 30 | 
 31 |     set.seed(3141)
 32 |     df = data_plr$df
 33 |     dml_data = data_plr$dml_data
 34 | 
 35 |     if (score == "IV-type") {
 36 |       ml_g = lrn(g_learner)
 37 |     } else {
 38 |       ml_g = NULL
 39 |     }
 40 |     double_mlplr_obj = DoubleMLPLR$new(
 41 |       data = dml_data,
 42 |       ml_l = lrn(l_learner),
 43 |       ml_m = lrn(m_learner),
 44 |       ml_g = ml_g,
 45 |       dml_procedure = dml_procedure,
 46 |       n_folds = n_folds,
 47 |       score = score)
 48 |     set.seed(3141)
 49 |     double_mlplr_obj$fit(store_predictions = TRUE, store_models = TRUE)
 50 | 
 51 |     set.seed(3141)
 52 |     Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 53 |     indx = (names(df) %in% c(Xnames, "y"))
 54 |     data = df[, indx]
 55 |     task = mlr3::TaskRegr$new(id = "ml_l", backend = data, target = "y")
 56 |     resampling_smpls = rsmp("custom")$instantiate(
 57 |       task,
 58 |       double_mlplr_obj$smpls[[1]]$train_ids,
 59 |       double_mlplr_obj$smpls[[1]]$test_ids)
 60 |     resampling_pred = resample(task, lrn(l_learner), resampling_smpls)
 61 |     preds_l = as.data.table(resampling_pred$prediction())
 62 |     data.table::setorder(preds_l, "row_ids")
 63 | 
 64 |     Xnames = names(df)[names(df) %in% c("y", "d", "z") == FALSE]
 65 |     indx = (names(df) %in% c(Xnames, "d"))
 66 |     data = df[, indx]
 67 |     task = mlr3::TaskRegr$new(id = "ml_m", backend = data, target = "d")
 68 |     resampling_smpls = rsmp("custom")$instantiate(
 69 |       task,
 70 |       double_mlplr_obj$smpls[[1]]$train_ids,
 71 |       double_mlplr_obj$smpls[[1]]$test_ids)
 72 |     resampling_pred = resample(task, lrn(m_learner), resampling_smpls)
 73 |     preds_m = as.data.table(resampling_pred$prediction())
 74 |     data.table::setorder(preds_m, "row_ids")
 75 | 
 76 |     if (score == "IV-type") {
 77 |       d = df[["d"]]
 78 |       y = df[["y"]]
 79 |       psi_a = -(d - preds_m$response) * (d - preds_m$response)
 80 |       psi_b = (d - preds_m$response) * (y - preds_l$response)
 81 |       theta_initial = -mean(psi_b, na.rm = TRUE) / mean(psi_a, na.rm = TRUE)
 82 | 
 83 |       data_aux = cbind(df, "y_minus_theta_d" = y - theta_initial * d)
 84 |       Xnames = names(data_aux)[names(data_aux) %in%
 85 |         c("y", "d", "z", "y_minus_theta_d") == FALSE]
 86 |       indx = (names(data_aux) %in% c(Xnames, "y_minus_theta_d"))
 87 |       data = data_aux[, indx]
 88 |       task = mlr3::TaskRegr$new(
 89 |         id = "ml_g", backend = data,
 90 |         target = "y_minus_theta_d")
 91 |       resampling_smpls = rsmp("custom")$instantiate(
 92 |         task,
 93 |         double_mlplr_obj$smpls[[1]]$train_ids,
 94 |         double_mlplr_obj$smpls[[1]]$test_ids)
 95 |       resampling_pred = resample(task, lrn(g_learner), resampling_smpls)
 96 |       preds_g = as.data.table(resampling_pred$prediction())
 97 |       data.table::setorder(preds_g, "row_ids")
 98 | 
 99 |       expect_equal(as.vector(double_mlplr_obj$predictions$ml_g),
100 |         as.vector(preds_g$response),
101 |         tolerance = 1e-8)
102 |       expect_class(
103 |         double_mlplr_obj$models$ml_g$d[[1]][[1]],
104 |         "LearnerRegr")
105 |     }
106 | 
107 |     expect_equal(as.vector(double_mlplr_obj$predictions$ml_l),
108 |       as.vector(preds_l$response),
109 |       tolerance = 1e-8)
110 |     expect_class(
111 |       double_mlplr_obj$models$ml_l$d[[1]][[1]],
112 |       "LearnerRegr")
113 | 
114 |     expect_equal(as.vector(double_mlplr_obj$predictions$ml_m),
115 |       as.vector(preds_m$response),
116 |       tolerance = 1e-8)
117 |     expect_class(
118 |       double_mlplr_obj$models$ml_m$d[[1]][[1]],
119 |       "LearnerRegr")
120 |   }
121 | )
122 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_loaded_mlr3learner.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLR")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     dml_procedure = "dml1",
 11 |     score = "IV-type",
 12 |     stringsAsFactors = FALSE)
 13 | } else {
 14 |   test_cases = expand.grid(
 15 |     dml_procedure = c("dml1", "dml2"),
 16 |     score = c("IV-type", "partialling out"),
 17 |     stringsAsFactors = FALSE)
 18 | }
 19 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 20 | 
 21 | patrick::with_parameters_test_that("Unit tests for PLR:",
 22 |   .cases = test_cases, {
 23 |     n_folds = 2
 24 |     n_rep_boot = 498
 25 |     set.seed(3141)
 26 | 
 27 |     # load learner by name
 28 |     learner_name = "regr.rpart"
 29 |     params = list("cp" = 0.01, "minsplit" = 20)
 30 | 
 31 |     set.seed(123)
 32 |     if (score == "IV-type") {
 33 |       ml_g = learner_name
 34 |     } else {
 35 |       ml_g = NULL
 36 |     }
 37 |     double_mlplr = DoubleMLPLR$new(
 38 |       data = data_plr$dml_data,
 39 |       ml_l = learner_name,
 40 |       ml_m = learner_name,
 41 |       ml_g = ml_g,
 42 |       dml_procedure = dml_procedure,
 43 |       n_folds = n_folds,
 44 |       score = score)
 45 | 
 46 |     # set params for nuisance part m
 47 |     double_mlplr$set_ml_nuisance_params(
 48 |       learner = "ml_m",
 49 |       treat_var = "d",
 50 |       params = params)
 51 | 
 52 |     # set params for nuisance part l
 53 |     double_mlplr$set_ml_nuisance_params(
 54 |       learner = "ml_l",
 55 |       treat_var = "d",
 56 |       params = params)
 57 | 
 58 |     if (score == "IV-type") {
 59 |       # set params for nuisance part g
 60 |       double_mlplr$set_ml_nuisance_params(
 61 |         learner = "ml_g",
 62 |         treat_var = "d",
 63 |         params = params)
 64 |     }
 65 | 
 66 |     double_mlplr$fit()
 67 |     theta = double_mlplr$coef
 68 |     se = double_mlplr$se
 69 |     t = double_mlplr$t_stat
 70 |     pval = double_mlplr$pval
 71 |     ci = double_mlplr$confint(level = 0.95, joint = FALSE)
 72 |     double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot)
 73 |     boot_theta = double_mlplr$boot_coef
 74 | 
 75 |     set.seed(123)
 76 |     loaded_learner = mlr3::lrn("regr.rpart", "cp" = 0.01, "minsplit" = 20)
 77 |     if (score == "IV-type") {
 78 |       ml_g = loaded_learner
 79 |     } else {
 80 |       ml_g = NULL
 81 |     }
 82 |     double_mlplr_loaded = DoubleMLPLR$new(
 83 |       data = data_plr$dml_data,
 84 |       ml_l = loaded_learner,
 85 |       ml_m = loaded_learner,
 86 |       ml_g = ml_g,
 87 |       dml_procedure = dml_procedure,
 88 |       n_folds = n_folds,
 89 |       score = score)
 90 | 
 91 |     double_mlplr_loaded$fit()
 92 |     theta_loaded = double_mlplr_loaded$coef
 93 |     se_loaded = double_mlplr_loaded$se
 94 |     t_loaded = double_mlplr_loaded$t_stat
 95 |     pval_loaded = double_mlplr_loaded$pval
 96 |     ci_loaded = double_mlplr_loaded$confint(level = 0.95, joint = FALSE)
 97 |     double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot)
 98 |     boot_theta_loaded = double_mlplr$boot_coef
 99 | 
100 |     set.seed(123)
101 |     semiloaded_learner = mlr3::lrn("regr.rpart")
102 |     if (score == "IV-type") {
103 |       ml_g = semiloaded_learner
104 |     } else {
105 |       ml_g = NULL
106 |     }
107 |     double_mlplr_semiloaded = DoubleMLPLR$new(
108 |       data = data_plr$dml_data,
109 |       ml_l = semiloaded_learner,
110 |       ml_m = semiloaded_learner,
111 |       ml_g = ml_g,
112 |       dml_procedure = dml_procedure,
113 |       n_folds = n_folds,
114 |       score = score)
115 |     # set params for nuisance part m
116 |     double_mlplr_semiloaded$set_ml_nuisance_params(
117 |       learner = "ml_m",
118 |       treat_var = "d",
119 |       params = params)
120 | 
121 |     # set params for nuisance part l
122 |     double_mlplr_semiloaded$set_ml_nuisance_params(
123 |       learner = "ml_l",
124 |       treat_var = "d",
125 |       params = params)
126 | 
127 |     if (score == "IV-type") {
128 |       # set params for nuisance part g
129 |       double_mlplr_semiloaded$set_ml_nuisance_params(
130 |         learner = "ml_g",
131 |         treat_var = "d",
132 |         params = params)
133 |     }
134 | 
135 |     double_mlplr_semiloaded$fit()
136 |     theta_semiloaded = double_mlplr_semiloaded$coef
137 |     se_semiloaded = double_mlplr_semiloaded$se
138 |     t_semiloaded = double_mlplr_semiloaded$t_stat
139 |     pval_semiloaded = double_mlplr_semiloaded$pval
140 |     ci_semiloaded = double_mlplr_semiloaded$confint(level = 0.95, joint = FALSE)
141 |     double_mlplr$bootstrap(method = "normal", n_rep = n_rep_boot)
142 |     boot_theta_semiloaded = double_mlplr$boot_coef
143 | 
144 |     expect_equal(theta, theta_loaded, tolerance = 1e-8)
145 |     expect_equal(se, se_loaded, tolerance = 1e-8)
146 |     expect_equal(t, t_loaded, tolerance = 1e-8)
147 |     expect_equal(pval, pval_loaded, tolerance = 1e-8)
148 |     expect_equal(ci, ci_loaded, tolerance = 1e-8)
149 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_loaded), tolerance = 1e-8)
150 | 
151 |     expect_equal(theta_semiloaded, theta_loaded, tolerance = 1e-8)
152 |     expect_equal(se_semiloaded, se_loaded, tolerance = 1e-8)
153 |     expect_equal(t_semiloaded, t_loaded, tolerance = 1e-8)
154 |     expect_equal(pval_semiloaded, pval_loaded, tolerance = 1e-8)
155 |     expect_equal(ci_semiloaded, ci_loaded, tolerance = 1e-8)
156 |     expect_equal(as.vector(boot_theta_semiloaded), as.vector(boot_theta_loaded), tolerance = 1e-8)
157 |   }
158 | )
159 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_multitreat.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLR (mulitple treatment case)")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     learner = "regr.lm",
 11 |     dml_procedure = "dml2",
 12 |     score = "partialling out",
 13 |     stringsAsFactors = FALSE)
 14 | } else {
 15 |   test_cases = expand.grid(
 16 |     learner = c("regr.lm", "regr.cv_glmnet"),
 17 |     dml_procedure = c("dml1", "dml2"),
 18 |     score = c("IV-type", "partialling out"),
 19 |     stringsAsFactors = FALSE)
 20 | }
 21 | 
 22 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 23 | 
 24 | patrick::with_parameters_test_that("Unit tests for PLR:",
 25 |   .cases = test_cases, {
 26 |     learner_pars = get_default_mlmethod_plr(learner)
 27 |     n_rep_boot = 498
 28 | 
 29 |     n_folds = 5
 30 | 
 31 |     set.seed(3141)
 32 |     if (score == "IV-type") {
 33 |       ml_g = learner_pars$ml_g$clone()
 34 |     } else {
 35 |       ml_g = NULL
 36 |     }
 37 |     plr_hat = dml_plr_multitreat(data_plr_multi,
 38 |       y = "y", d = c("d1", "d2", "d3"),
 39 |       n_folds = n_folds,
 40 |       ml_l = learner_pars$ml_l$clone(),
 41 |       ml_m = learner_pars$ml_m$clone(),
 42 |       ml_g = ml_g,
 43 |       dml_procedure = dml_procedure, score = score)
 44 |     theta = plr_hat$coef
 45 |     se = plr_hat$se
 46 |     t = plr_hat$t
 47 |     pval = plr_hat$pval
 48 |     # ci_ptwise = confint(plr_hat, joint = FALSE, level = 0.95)
 49 | 
 50 |     set.seed(3141)
 51 |     boot_theta = boot_plr_multitreat(plr_hat$thetas, plr_hat$ses,
 52 |       data_plr_multi,
 53 |       y = "y", d = c("d1", "d2", "d3"),
 54 |       n_folds = n_folds, smpls = plr_hat$smpls,
 55 |       all_preds = plr_hat$all_preds,
 56 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
 57 |       score = score)$boot_coef
 58 | 
 59 |     set.seed(3141)
 60 |     Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "d3", "z") == FALSE]
 61 |     data_ml = double_ml_data_from_data_frame(data_plr_multi,
 62 |       y_col = "y",
 63 |       d_cols = c("d1", "d2", "d3"), x_cols = Xnames)
 64 | 
 65 |     if (score == "IV-type") {
 66 |       ml_g = learner_pars$ml_g$clone()
 67 |     } else {
 68 |       ml_g = NULL
 69 |     }
 70 |     double_mlplr_obj = DoubleMLPLR$new(data_ml,
 71 |       ml_l = learner_pars$ml_l$clone(),
 72 |       ml_m = learner_pars$ml_m$clone(),
 73 |       ml_g = ml_g,
 74 |       dml_procedure = dml_procedure,
 75 |       n_folds = n_folds,
 76 |       score = score)
 77 | 
 78 |     double_mlplr_obj$fit()
 79 |     theta_obj = double_mlplr_obj$coef
 80 |     se_obj = double_mlplr_obj$se
 81 |     t_obj = double_mlplr_obj$t_stat
 82 |     pval_obj = double_mlplr_obj$pval
 83 | 
 84 |     # bootstrap
 85 |     set.seed(3141)
 86 |     double_mlplr_obj$bootstrap(method = "normal", n_rep_boot = n_rep_boot)
 87 |     boot_theta_obj = double_mlplr_obj$boot_coef
 88 | 
 89 |     # joint confint
 90 |     ci_ptwise_obj = double_mlplr_obj$confint(joint = FALSE, level = 0.95)
 91 |     ci_joint_obj = double_mlplr_obj$confint(joint = TRUE, level = 0.95)
 92 | 
 93 |     # at the moment the object result comes without a name
 94 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
 95 |     expect_equal(se, se_obj, tolerance = 1e-8)
 96 |     expect_equal(t, t_obj, tolerance = 1e-8)
 97 |     expect_equal(pval, pval_obj, tolerance = 1e-8)
 98 | 
 99 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
100 |     # expect_equal(ci_ptwise, ci_ptwise_obj)
101 |     # expect_equal(ci_joint, ci_joint_obj)
102 |   }
103 | )
104 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_nocrossfit.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLR, no cross-fitting")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
  8 | if (on_cran) {
  9 |   test_cases = expand.grid(
 10 |     learner = "regr.lm",
 11 |     dml_procedure = "dml2",
 12 |     score = "partialling out",
 13 |     apply_cross_fitting = FALSE,
 14 |     n_folds = c(1, 2),
 15 |     stringsAsFactors = FALSE)
 16 | } else {
 17 |   test_cases = expand.grid(
 18 |     learner = "regr.lm",
 19 |     dml_procedure = c("dml1", "dml2"),
 20 |     score = c("IV-type", "partialling out"),
 21 |     apply_cross_fitting = FALSE,
 22 |     n_folds = c(1, 2),
 23 |     stringsAsFactors = FALSE)
 24 | }
 25 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 26 | 
 27 | patrick::with_parameters_test_that("Unit tests for PLR:",
 28 |   .cases = test_cases, {
 29 |     learner_pars = get_default_mlmethod_plr(learner)
 30 |     n_rep_boot = 498
 31 | 
 32 |     set.seed(3141)
 33 |     df = data_plr$df
 34 |     if (n_folds == 2) {
 35 |       my_task = Task$new("help task", "regr", df)
 36 |       my_sampling = rsmp("holdout", ratio = 0.5)$instantiate(my_task)
 37 |       train_ids = list(my_sampling$train_set(1))
 38 |       test_ids = list(my_sampling$test_set(1))
 39 | 
 40 |       smpls = list(list(train_ids = train_ids, test_ids = test_ids))
 41 |     } else {
 42 |       smpls = list(list(
 43 |         train_ids = list(seq(nrow(df))),
 44 |         test_ids = list(seq(nrow(df)))))
 45 |     }
 46 |     if (score == "IV-type") {
 47 |       ml_g = learner_pars$ml_g$clone()
 48 |     } else {
 49 |       ml_g = NULL
 50 |     }
 51 |     plr_hat = dml_plr(df,
 52 |       y = "y", d = "d",
 53 |       n_folds = 1,
 54 |       ml_l = learner_pars$ml_l$clone(),
 55 |       ml_m = learner_pars$ml_m$clone(),
 56 |       ml_g = ml_g,
 57 |       dml_procedure = dml_procedure, score = score,
 58 |       smpls = smpls)
 59 |     theta = plr_hat$coef
 60 |     se = plr_hat$se
 61 |     t = plr_hat$t
 62 |     pval = plr_hat$pval
 63 | 
 64 |     set.seed(3141)
 65 |     if (score == "IV-type") {
 66 |       ml_g = learner_pars$ml_g$clone()
 67 |     } else {
 68 |       ml_g = NULL
 69 |     }
 70 |     double_mlplr_obj = DoubleMLPLR$new(
 71 |       data = data_plr$dml_data,
 72 |       ml_l = learner_pars$ml_l$clone(),
 73 |       ml_m = learner_pars$ml_m$clone(),
 74 |       ml_g = ml_g,
 75 |       dml_procedure = dml_procedure,
 76 |       n_folds = n_folds,
 77 |       score = score,
 78 |       apply_cross_fitting = apply_cross_fitting)
 79 | 
 80 |     double_mlplr_obj$fit(store_predictions = TRUE)
 81 |     theta_obj = double_mlplr_obj$coef
 82 |     se_obj = double_mlplr_obj$se
 83 |     t_obj = double_mlplr_obj$t_stat
 84 |     pval_obj = double_mlplr_obj$pval
 85 |     ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
 86 | 
 87 | 
 88 |     if (n_folds == 2) {
 89 |       if (score == "IV-type") {
 90 |         ml_g = learner_pars$ml_g$clone()
 91 |       } else {
 92 |         ml_g = NULL
 93 |       }
 94 |       dml_plr_obj_external = DoubleMLPLR$new(
 95 |         data = data_plr$dml_data,
 96 |         ml_l = learner_pars$ml_l$clone(),
 97 |         ml_m = learner_pars$ml_m$clone(),
 98 |         ml_g = ml_g,
 99 |         dml_procedure = dml_procedure,
100 |         n_folds = n_folds,
101 |         score = score,
102 |         draw_sample_splitting = FALSE, apply_cross_fitting = FALSE)
103 | 
104 |       set.seed(3141)
105 |       # set up a task and cross-validation resampling scheme in mlr3
106 |       my_task = Task$new("help task", "regr", df)
107 |       my_sampling = rsmp("holdout", ratio = 0.5)$instantiate(my_task)
108 |       train_ids = list(my_sampling$train_set(1))
109 |       test_ids = list(my_sampling$test_set(1))
110 | 
111 |       smpls = list(list(train_ids = train_ids, test_ids = test_ids))
112 | 
113 |       dml_plr_obj_external$set_sample_splitting(smpls)
114 |       dml_plr_obj_external$fit()
115 | 
116 |       theta_external = dml_plr_obj_external$coef
117 |       se_external = dml_plr_obj_external$se
118 |       t_external = dml_plr_obj_external$t_stat
119 |       pval_external = dml_plr_obj_external$pval
120 |       ci_external = dml_plr_obj_external$confint(level = 0.95, joint = FALSE)
121 | 
122 |       expect_identical(double_mlplr_obj$smpls, dml_plr_obj_external$smpls)
123 |       expect_equal(theta_external, theta_obj, tolerance = 1e-8)
124 |       expect_equal(se_external, se_obj, tolerance = 1e-8)
125 |       expect_equal(t_external, t_obj, tolerance = 1e-8)
126 |       expect_equal(pval_external, pval_obj, tolerance = 1e-8)
127 |       expect_equal(ci_external, ci_obj, tolerance = 1e-8)
128 | 
129 |       expect_equal(theta, theta_obj, tolerance = 1e-8)
130 |       expect_equal(se, se_obj, tolerance = 1e-8)
131 |       expect_equal(t, t_obj, tolerance = 1e-8)
132 |       expect_equal(pval, pval_obj, tolerance = 1e-8)
133 | 
134 |     } else {
135 |       expect_equal(theta, theta_obj, tolerance = 1e-8)
136 |       expect_equal(se, se_obj, tolerance = 1e-8)
137 |       expect_equal(t, t_obj, tolerance = 1e-8)
138 |       expect_equal(pval, pval_obj, tolerance = 1e-8)
139 |     }
140 | 
141 |     # expect_equal(as.vector(plr_hat$boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
142 |   }
143 | )
144 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_nonorth.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for PLR")
  2 | 
  3 | library("mlr3learners")
  4 | 
  5 | lgr::get_logger("mlr3")$set_threshold("warn")
  6 | 
  7 | non_orth_score_w_g = function(y, d, l_hat, m_hat, g_hat, smpls) {
  8 |   u_hat = y - g_hat
  9 |   psi_a = -1 * d * d
 10 |   psi_b = d * u_hat
 11 |   psis = list(psi_a = psi_a, psi_b = psi_b)
 12 |   return(psis)
 13 | }
 14 | 
 15 | non_orth_score_w_l = function(y, d, l_hat, m_hat, g_hat, smpls) {
 16 | 
 17 |   p_a = -(d - m_hat) * (d - m_hat)
 18 |   p_b = (d - m_hat) * (y - l_hat)
 19 |   theta_initial = -mean(p_b, na.rm = TRUE) / mean(p_a, na.rm = TRUE)
 20 |   g_hat = l_hat - theta_initial * m_hat
 21 | 
 22 |   u_hat = y - g_hat
 23 |   psi_a = -1 * d * d
 24 |   psi_b = d * u_hat
 25 |   psis = list(psi_a = psi_a, psi_b = psi_b)
 26 |   return(psis)
 27 | }
 28 | 
 29 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 30 | if (on_cran) {
 31 |   test_cases = expand.grid(
 32 |     learner = "regr.lm",
 33 |     dml_procedure = "dml1",
 34 |     which_score = c("non_orth_score_w_g"),
 35 |     n_folds = c(3),
 36 |     n_rep = c(2),
 37 |     stringsAsFactors = FALSE)
 38 | } else {
 39 |   test_cases = expand.grid(
 40 |     learner = c("regr.lm", "regr.cv_glmnet"),
 41 |     dml_procedure = c("dml1", "dml2"),
 42 |     which_score = c(
 43 |       "non_orth_score_w_g",
 44 |       "non_orth_score_w_l"),
 45 |     n_folds = c(2, 3),
 46 |     n_rep = c(1, 2),
 47 |     stringsAsFactors = FALSE)
 48 | }
 49 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 50 | 
 51 | patrick::with_parameters_test_that("Unit tests for PLR:",
 52 |   .cases = test_cases, {
 53 |     learner_pars = get_default_mlmethod_plr(learner)
 54 | 
 55 |     if (which_score == "non_orth_score_w_g") {
 56 |       score = non_orth_score_w_g
 57 |       ml_g = learner_pars$ml_g$clone()
 58 |     } else if (which_score == "non_orth_score_w_l") {
 59 |       score = non_orth_score_w_l
 60 |       ml_g = NULL
 61 |     }
 62 | 
 63 |     n_rep_boot = 498
 64 |     set.seed(3141)
 65 |     double_mlplr_obj = DoubleMLPLR$new(
 66 |       data = data_plr$dml_data,
 67 |       ml_l = learner_pars$ml_l$clone(),
 68 |       ml_m = learner_pars$ml_m$clone(),
 69 |       ml_g = ml_g,
 70 |       dml_procedure = dml_procedure,
 71 |       n_folds = n_folds,
 72 |       score = score)
 73 | 
 74 |     double_mlplr_obj$fit()
 75 |     theta_obj = double_mlplr_obj$coef
 76 |     se_obj = double_mlplr_obj$se
 77 |     t_obj = double_mlplr_obj$t_stat
 78 |     pval_obj = double_mlplr_obj$pval
 79 |     ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
 80 | 
 81 | 
 82 |     expect_is(theta_obj, "numeric")
 83 |     expect_is(se_obj, "numeric")
 84 |     expect_is(t_obj, "numeric")
 85 |     expect_is(pval_obj, "numeric")
 86 |     expect_is(ci_obj, "matrix")
 87 | 
 88 | 
 89 |     if (n_folds == 2 & n_rep == 1) {
 90 |       double_mlplr_nocf = DoubleMLPLR$new(
 91 |         data = data_plr$dml_data,
 92 |         ml_l = learner_pars$ml_l$clone(),
 93 |         ml_m = learner_pars$ml_m$clone(),
 94 |         ml_g = ml_g,
 95 |         dml_procedure = dml_procedure,
 96 |         n_folds = n_folds,
 97 |         score = score,
 98 |         apply_cross_fitting = FALSE)
 99 | 
100 |       double_mlplr_nocf$fit()
101 |       theta_nocf = double_mlplr_nocf$coef
102 |       se_nocf = double_mlplr_nocf$se
103 |       t_nocf = double_mlplr_nocf$t_stat
104 |       pval_nocf = double_mlplr_nocf$pval
105 |       ci_nocf = double_mlplr_nocf$confint(level = 0.95, joint = FALSE)
106 | 
107 |       expect_is(theta_nocf, "numeric")
108 |       expect_is(se_nocf, "numeric")
109 |       expect_is(t_nocf, "numeric")
110 |       expect_is(pval_nocf, "numeric")
111 |       expect_is(ci_nocf, "matrix")
112 | 
113 |     }
114 | 
115 |     # expect_equal(as.vector(plr_hat$boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
116 |   }
117 | )
118 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_p_adjust.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLR (p_adjust)")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "regr.rpart",
11 |     dml_procedure = "dml1",
12 |     score = "partialling out",
13 |     method = c("romano-wolf"),
14 |     apply_cross_fitting = c(TRUE),
15 |     stringsAsFactors = FALSE)
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = "regr.cv_glmnet",
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = c("IV-type", "partialling out"),
21 |     method = c("romano-wolf", "bonferroni"),
22 |     apply_cross_fitting = c(TRUE, FALSE),
23 |     stringsAsFactors = FALSE)
24 | }
25 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
26 | 
27 | patrick::with_parameters_test_that("Unit tests for PLR:",
28 |   .cases = test_cases, {
29 |     learner_pars = get_default_mlmethod_plr(learner)
30 | 
31 |     n_rep_boot = 498
32 | 
33 |     if (!apply_cross_fitting) {
34 |       n_folds = 2
35 |     } else {
36 |       n_folds = 5
37 |     }
38 | 
39 |     set.seed(1)
40 |     n = 100 # sample size
41 |     p = 25 # number of variables
42 |     s = 3 # nubmer of non-zero variables
43 |     X = matrix(rnorm(n * p), ncol = p)
44 |     colnames(X) = paste("X", 1:p, sep = "")
45 |     beta = c(rep(3, s), rep(0, p - s))
46 |     y = 1 + X %*% beta + rnorm(n)
47 |     data = data.frame(cbind(y, X))
48 |     colnames(data)[1] = "y"
49 | 
50 |     # index for hypoth testing
51 |     k = 10
52 |     data_ml = double_ml_data_from_data_frame(data,
53 |       x_cols = colnames(X)[(k + 1):p],
54 |       y_col = "y",
55 |       d_cols = colnames(X)[1:k])
56 |     if (score == "IV-type") {
57 |       ml_g = learner_pars$ml_g$clone()
58 |     } else {
59 |       ml_g = NULL
60 |     }
61 |     double_mlplr_obj = DoubleMLPLR$new(data_ml,
62 |       ml_l = learner_pars$ml_l$clone(),
63 |       ml_m = learner_pars$ml_m$clone(),
64 |       ml_g = ml_g,
65 |       dml_procedure = dml_procedure,
66 |       n_folds = n_folds,
67 |       score = score,
68 |       apply_cross_fitting = apply_cross_fitting)
69 |     double_mlplr_obj$fit()
70 |     double_mlplr_obj$bootstrap()
71 |     double_mlplr_obj$p_adjust(method = method)
72 |     expect_true(is.matrix(double_mlplr_obj$p_adjust(method = method)))
73 |   }
74 | )
75 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_rep_cross_fit.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLR with repeated cross-fitting")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "regr.lm",
11 |     dml_procedure = "dml1",
12 |     score = "partialling out",
13 |     n_rep = c(5),
14 |     stringsAsFactors = FALSE)
15 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = c("regr.lm", "regr.cv_glmnet"),
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = c("IV-type", "partialling out"),
21 |     n_rep = c(2, 5),
22 |     stringsAsFactors = FALSE)
23 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | }
25 | 
26 | patrick::with_parameters_test_that("Unit tests for PLR:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_plr(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     n_folds = 5
33 |     if (score == "IV-type") {
34 |       ml_g = learner_pars$ml_g$clone()
35 |     } else {
36 |       ml_g = NULL
37 |     }
38 |     plr_hat = dml_plr(data_plr$df,
39 |       y = "y", d = "d",
40 |       n_folds = n_folds, n_rep = n_rep,
41 |       ml_l = learner_pars$ml_l$clone(),
42 |       ml_m = learner_pars$ml_m$clone(),
43 |       ml_g = ml_g,
44 |       dml_procedure = dml_procedure, score = score)
45 |     theta = plr_hat$coef
46 |     se = plr_hat$se
47 |     t = plr_hat$t
48 |     pval = plr_hat$pval
49 |     # ci = confint(plr_hat, level = 0.95, joint = FALSE)
50 | 
51 |     boot_theta = bootstrap_plr(plr_hat$thetas, plr_hat$ses,
52 |       data_plr$df,
53 |       y = "y", d = "d",
54 |       n_folds = n_folds, n_rep = n_rep,
55 |       smpls = plr_hat$smpls,
56 |       all_preds = plr_hat$all_preds,
57 |       bootstrap = "normal", n_rep_boot = n_rep_boot,
58 |       score = score)$boot_coef
59 | 
60 |     set.seed(3141)
61 |     if (score == "IV-type") {
62 |       ml_g = learner_pars$ml_g$clone()
63 |     } else {
64 |       ml_g = NULL
65 |     }
66 |     double_mlplr_obj = DoubleMLPLR$new(
67 |       data = data_plr$dml_data,
68 |       ml_l = learner_pars$ml_l$clone(),
69 |       ml_m = learner_pars$ml_m$clone(),
70 |       ml_g = ml_g,
71 |       dml_procedure = dml_procedure,
72 |       n_folds = n_folds,
73 |       score = score,
74 |       n_rep = n_rep)
75 | 
76 |     double_mlplr_obj$fit()
77 |     theta_obj = double_mlplr_obj$coef
78 |     se_obj = double_mlplr_obj$se
79 |     t_obj = double_mlplr_obj$t_stat
80 |     pval_obj = double_mlplr_obj$pval
81 |     # ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
82 | 
83 |     # bootstrap
84 |     double_mlplr_obj$bootstrap(method = "normal", n_rep_boot = n_rep_boot)
85 |     boot_theta_obj = double_mlplr_obj$boot_coef
86 | 
87 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
88 |     expect_equal(se, se_obj, tolerance = 1e-8)
89 |     expect_equal(t, t_obj, tolerance = 1e-8)
90 |     expect_equal(pval, pval_obj, tolerance = 1e-8)
91 |     # expect_equal(ci, ci_obj, tolerance = 1e-8)
92 | 
93 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
94 |   }
95 | )
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_set_samples.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLR with external sample provision")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "regr.rpart",
11 |     dml_procedure = "dml2",
12 |     score = "partialling out",
13 |     n_folds = c(2),
14 |     n_rep = c(1),
15 |     stringsAsFactors = FALSE)
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = "regr.cv_glmnet",
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = c("IV-type", "partialling out"),
21 |     n_folds = c(2, 3),
22 |     n_rep = c(1, 3),
23 |     stringsAsFactors = FALSE)
24 | }
25 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
26 | 
27 | patrick::with_parameters_test_that("PLR with external sample provision:",
28 |   .cases = test_cases, {
29 |     learner_pars = get_default_mlmethod_plr(learner)
30 |     n_rep_boot = 346
31 | 
32 |     set.seed(3141)
33 |     Xnames = names(data_plr$df)[names(data_plr$df) %in% c("y", "d", "z") == FALSE]
34 |     data_ml = double_ml_data_from_data_frame(data_plr$df,
35 |       y_col = "y",
36 |       d_cols = "d", x_cols = Xnames)
37 | 
38 |     if (score == "IV-type") {
39 |       ml_g = learner_pars$ml_g$clone()
40 |     } else {
41 |       ml_g = NULL
42 |     }
43 |     double_mlplr_obj = DoubleMLPLR$new(data_ml,
44 |       ml_l = learner_pars$ml_l$clone(),
45 |       ml_m = learner_pars$ml_m$clone(),
46 |       ml_g = ml_g,
47 |       dml_procedure = dml_procedure,
48 |       n_folds = n_folds,
49 |       score = score,
50 |       n_rep = n_rep)
51 | 
52 |     set.seed(123)
53 |     double_mlplr_obj$fit()
54 |     theta_obj = double_mlplr_obj$coef
55 |     se_obj = double_mlplr_obj$se
56 |     double_mlplr_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
57 |     boot_theta_obj = double_mlplr_obj$boot_coef
58 | 
59 |     # External sample provision
60 |     SAMPLES = double_mlplr_obj$smpls
61 |     if (score == "IV-type") {
62 |       ml_g = learner_pars$ml_g$clone()
63 |     } else {
64 |       ml_g = NULL
65 |     }
66 |     double_mlplr_obj_external = DoubleMLPLR$new(data_ml,
67 |       ml_l = learner_pars$ml_l$clone(),
68 |       ml_m = learner_pars$ml_m$clone(),
69 |       ml_g = ml_g,
70 |       dml_procedure = dml_procedure,
71 |       score = score,
72 |       draw_sample_splitting = FALSE)
73 | 
74 |     double_mlplr_obj_external$set_sample_splitting(SAMPLES)
75 | 
76 |     set.seed(123)
77 |     double_mlplr_obj_external$fit()
78 |     theta_obj_external = double_mlplr_obj_external$coef
79 |     se_obj_external = double_mlplr_obj_external$se
80 |     double_mlplr_obj_external$bootstrap(method = "normal", n_rep = n_rep_boot)
81 |     boot_theta_obj_external = double_mlplr_obj_external$boot_coef
82 | 
83 |     expect_equal(theta_obj, theta_obj_external, tolerance = 1e-8)
84 |     expect_equal(se_obj, se_obj_external, tolerance = 1e-8)
85 |     expect_equal(as.vector(boot_theta_obj), as.vector(boot_theta_obj_external), tolerance = 1e-8)
86 |   }
87 | )
88 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_tuning.R:
--------------------------------------------------------------------------------
  1 | context("Unit tests for tuning of PLR")
  2 | 
  3 | requireNamespace("lgr")
  4 | 
  5 | logger = lgr::get_logger("bbotk")
  6 | logger$set_threshold("warn")
  7 | lgr::get_logger("mlr3")$set_threshold("warn")
  8 | 
  9 | # settings for parameter provision
 10 | # learner = c('regr.rpart')
 11 | #
 12 | # learner_list = list("mlmethod_m" = learner, "mlmethod_g" = learner)
 13 | 
 14 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 15 | if (on_cran) {
 16 |   test_cases = expand.grid(
 17 |     learner = "regr.rpart",
 18 |     m_learner = "regr.rpart",
 19 |     dml_procedure = "dml2",
 20 |     score = "partialling out",
 21 |     n_rep = c(1),
 22 |     tune_on_folds = c(FALSE, TRUE),
 23 |     stringsAsFactors = FALSE)
 24 | } else {
 25 |   test_cases = expand.grid(
 26 |     learner = "regr.rpart",
 27 |     m_learner = c("regr.rpart", "classif.rpart"),
 28 |     dml_procedure = c("dml1", "dml2"),
 29 |     score = c("IV-type", "partialling out"),
 30 |     n_rep = c(1, 3),
 31 |     tune_on_folds = c(FALSE, TRUE),
 32 |     stringsAsFactors = FALSE)
 33 | }
 34 | 
 35 | 
 36 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
 37 | 
 38 | # skip('Skip tests for PLR tuning')
 39 | patrick::with_parameters_test_that("Unit tests for tuning of PLR:",
 40 |   .cases = test_cases, {
 41 |     n_rep_boot = 498
 42 |     n_folds = 4
 43 | 
 44 |     set.seed(3141)
 45 |     Xnames = names(data_plr_multi)[names(data_plr_multi) %in% c("y", "d1", "d2", "z") == FALSE]
 46 |     if (m_learner == "regr.rpart") {
 47 |       data_ml = double_ml_data_from_data_frame(data_plr_multi,
 48 |         y_col = "y",
 49 |         d_cols = c("d1", "d2"), x_cols = Xnames)
 50 | 
 51 |     } else if (m_learner == "classif.rpart") {
 52 |       data_plr_binary = data_plr_multi
 53 |       data_plr_binary$d1 = as.numeric(data_plr_binary$d1 > 0)
 54 |       data_plr_binary$d2 = as.numeric(data_plr_binary$d2 > 0)
 55 |       data_ml = double_ml_data_from_data_frame(data_plr_binary,
 56 |         y_col = "y",
 57 |         d_cols = c("d1", "d2"), x_cols = Xnames)
 58 |     }
 59 |     if (score == "IV-type") {
 60 |       ml_g = learner
 61 |     } else {
 62 |       ml_g = NULL
 63 |     }
 64 |     double_mlplr_obj_tuned = DoubleMLPLR$new(data_ml,
 65 |       n_folds = n_folds,
 66 |       ml_l = learner,
 67 |       ml_m = m_learner,
 68 |       ml_g = ml_g,
 69 |       dml_procedure = dml_procedure,
 70 |       score = score,
 71 |       n_rep = n_rep)
 72 | 
 73 |     tune_sets = list(
 74 |       n_folds_tune = 2,
 75 |       n_folds_tune = 1,
 76 |       rsmp_tune = "cv",
 77 |       terminator = mlr3tuning::trm("evals", n_evals = 2),
 78 |       algorithm = "grid_search",
 79 |       resolution = 5)
 80 | 
 81 |     param_grid = list(
 82 |       "ml_l" = paradox::ps(
 83 |         cp = paradox::p_dbl(lower = 0.02, upper = 0.03),
 84 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
 85 |       "ml_m" = paradox::ps(
 86 |         cp = paradox::p_dbl(lower = 0.03, upper = 0.04),
 87 |         minsplit = paradox::p_int(lower = 2, upper = 3)))
 88 | 
 89 |     if (score == "IV-type") {
 90 |       param_grid[["ml_g"]] = paradox::ps(
 91 |         cp = paradox::p_dbl(lower = 0.015, upper = 0.025),
 92 |         minsplit = paradox::p_int(lower = 3, upper = 4))
 93 |     }
 94 | 
 95 |     double_mlplr_obj_tuned$tune(param_set = param_grid, tune_on_folds = tune_on_folds, tune_settings = tune_sets)
 96 | 
 97 |     double_mlplr_obj_tuned$fit()
 98 | 
 99 |     theta_obj_tuned = double_mlplr_obj_tuned$coef
100 |     se_obj_tuned = double_mlplr_obj_tuned$se
101 | 
102 |     # bootstrap
103 |     # double_mlplr_obj_tuned$bootstrap(method = 'normal',  n_rep = n_rep_boot)
104 |     # boot_theta_obj_tuned = double_mlplr_obj_tuned$boot_coef
105 | 
106 |     # restrictions to test
107 |     # Functional (tbd) vs OOP implementation (handling randomness in param selection!?)
108 | 
109 |     # Test case without including "other" treatment variables
110 | 
111 |     expect_is(theta_obj_tuned, "numeric")
112 |     expect_is(se_obj_tuned, "numeric")
113 |     #
114 |     # data_ml$use_other_treat_as_covariate = FALSE
115 |     # double_mlplr_obj_tuned$tune()
116 |     # double_mlplr_obj_tuned$fit()
117 |     # theta_obj_tuned = double_mlplr_obj_tuned$coef
118 |     # se_obj_tuned = double_mlplr_obj_tuned$se
119 |     #
120 |     # expect_is(theta_obj_tuned, "numeric")
121 |     # expect_is(se_obj_tuned, "numeric")
122 |   }
123 | )
124 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_plr_user_score.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for PLR, callable score")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | score_fct = function(y, d, l_hat, m_hat, g_hat, smpls) {
 8 |   v_hat = d - m_hat
 9 |   u_hat = y - l_hat
10 |   v_hatd = v_hat * d
11 |   psi_a = -v_hat * v_hat
12 |   psi_b = v_hat * u_hat
13 |   psis = list(psi_a = psi_a, psi_b = psi_b)
14 |   return(psis)
15 | }
16 | 
17 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
18 | if (on_cran) {
19 |   test_cases = expand.grid(
20 |     learner = "regr.lm",
21 |     dml_procedure = "dml1",
22 |     n_folds = c(3),
23 |     n_rep = c(2),
24 |     stringsAsFactors = FALSE)
25 | } else {
26 |   test_cases = expand.grid(
27 |     learner = c("regr.lm", "regr.glmnet"),
28 |     dml_procedure = c("dml1", "dml2"),
29 |     n_folds = c(2, 3),
30 |     n_rep = c(1, 2),
31 |     stringsAsFactors = FALSE)
32 | }
33 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
34 | 
35 | patrick::with_parameters_test_that("Unit tests for PLR, callable score:",
36 |   .cases = test_cases, {
37 |     n_rep_boot = 498
38 |     set.seed(3141)
39 | 
40 |     double_mlplr_obj = DoubleMLPLR$new(
41 |       data = data_plr$dml_data,
42 |       ml_l = lrn(learner),
43 |       ml_m = lrn(learner),
44 |       dml_procedure = dml_procedure,
45 |       n_folds = n_folds,
46 |       score = "partialling out")
47 | 
48 |     double_mlplr_obj$fit()
49 |     theta_obj = double_mlplr_obj$coef
50 |     se_obj = double_mlplr_obj$se
51 |     t_obj = double_mlplr_obj$t_stat
52 |     pval_obj = double_mlplr_obj$pval
53 |     ci_obj = double_mlplr_obj$confint(level = 0.95, joint = FALSE)
54 | 
55 |     set.seed(3141)
56 |     double_mlplr_obj_score = DoubleMLPLR$new(
57 |       data = data_plr$dml_data,
58 |       ml_l = lrn(learner),
59 |       ml_m = lrn(learner),
60 |       dml_procedure = dml_procedure,
61 |       n_folds = n_folds,
62 |       score = score_fct)
63 |     double_mlplr_obj_score$fit()
64 |     theta_obj_score = double_mlplr_obj_score$coef
65 |     se_obj_score = double_mlplr_obj_score$se
66 |     t_obj_score = double_mlplr_obj_score$t_stat
67 |     pval_obj_score = double_mlplr_obj_score$pval
68 |     ci_obj_score = double_mlplr_obj_score$confint(level = 0.95, joint = FALSE)
69 | 
70 |     expect_equal(theta_obj_score, theta_obj, tolerance = 1e-8)
71 |     expect_equal(se_obj_score, se_obj, tolerance = 1e-8)
72 |   }
73 | )
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_print.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for print methods")
 2 | 
 3 | lgr::get_logger("mlr3")$set_threshold("warn")
 4 | 
 5 | set.seed(3141)
 6 | dml_data = make_plr_CCDDHNR2018(n_obs = 100)
 7 | dml_cluster_data = make_pliv_multiway_cluster_CKMS2021(N = 10, M = 10, dim_X = 5)
 8 | 
 9 | ml_l = ml_g = ml_m = ml_r = "regr.rpart"
10 | dml_plr = DoubleMLPLR$new(dml_data, ml_l, ml_m, n_folds = 2)
11 | dml_pliv = DoubleMLPLIV$new(dml_cluster_data, ml_g, ml_m, ml_r, n_folds = 2)
12 | dml_plr$fit()
13 | dml_pliv$fit()
14 | 
15 | test_that("DoubleMLData print method", {
16 |   verify_output(test_path("print_outputs/dml_data.txt"), {
17 |     print(dml_data)
18 |   })
19 | })
20 | 
21 | test_that("DoubleMLClusterData print method", {
22 |   verify_output(test_path("print_outputs/dml_cluster_data.txt"), {
23 |     print(dml_cluster_data)
24 |   })
25 | })
26 | 
27 | test_that("DoubleMLPLR print method", {
28 |   verify_output(test_path("print_outputs/dml_plr.txt"), {
29 |     print(dml_plr)
30 |   })
31 | })
32 | 
33 | test_that("DoubleMLPLIV print method", {
34 |   verify_output(test_path("print_outputs/dml_pliv.txt"), {
35 |     print(dml_pliv)
36 |   })
37 | })
38 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_ssm_mar.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for SSM, missing at random")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "cv_glmnet",
11 |     dml_procedure = "dml1",
12 |     score = "missing-at-random",
13 |     trimming_threshold = 0,
14 |     stringsAsFactors = FALSE)
15 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = c("cv_glmnet", "graph_learner"),
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = "missing-at-random",
21 |     trimming_threshold = 0,
22 |     stringsAsFactors = FALSE)
23 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | }
25 | 
26 | patrick::with_parameters_test_that("Unit tests for SSM, missing-at-random:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_ssm(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     ssm_hat = dml_ssm(data_ssm_mar$df,
33 |       y = "y", d = "d", s = "s",
34 |       n_folds = 5,
35 |       ml_pi = learner_pars$ml_pi$clone(), ml_m = learner_pars$ml_m$clone(), ml_g = learner_pars$ml_g$clone(),
36 |       dml_procedure = dml_procedure, score = score)
37 |     theta = ssm_hat$coef
38 |     se = ssm_hat$se
39 | 
40 |     boot_theta = bootstrap_ssm(ssm_hat$thetas, ssm_hat$ses,
41 |       data_ssm_mar$df,
42 |       y = "y", d = "d", s = "s",
43 |       n_folds = 5, smpls = ssm_hat$smpls,
44 |       all_preds = ssm_hat$all_preds,
45 |       score = score,
46 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
47 | 
48 | 
49 |     set.seed(3141)
50 |     double_mlssm_obj = DoubleMLSSM$new(
51 |       data = data_ssm_mar$dml_data,
52 |       n_folds = 5,
53 |       ml_pi = learner_pars$ml_pi$clone(),
54 |       ml_m = learner_pars$ml_m$clone(),
55 |       ml_g = learner_pars$ml_g$clone(),
56 |       dml_procedure = dml_procedure,
57 |       score = score,
58 |       trimming_threshold = trimming_threshold)
59 | 
60 |     double_mlssm_obj$fit()
61 |     theta_obj = double_mlssm_obj$coef
62 |     se_obj = double_mlssm_obj$se
63 | 
64 |     # bootstrap
65 |     double_mlssm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
66 |     boot_theta_obj = double_mlssm_obj$boot_coef
67 | 
68 |     # at the moment the object result comes without a name
69 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
70 |     expect_equal(se, se_obj, tolerance = 1e-8)
71 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
72 |   }
73 | )
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_ssm_nonignorable.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for SSM, nonignorable nonresponse")
 2 | 
 3 | library("mlr3learners")
 4 | 
 5 | lgr::get_logger("mlr3")$set_threshold("warn")
 6 | 
 7 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
 8 | if (on_cran) {
 9 |   test_cases = expand.grid(
10 |     learner = "cv_glmnet",
11 |     dml_procedure = "dml1",
12 |     score = "nonignorable",
13 |     trimming_threshold = 0,
14 |     stringsAsFactors = FALSE)
15 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
16 | } else {
17 |   test_cases = expand.grid(
18 |     learner = c("cv_glmnet", "graph_learner"),
19 |     dml_procedure = c("dml1", "dml2"),
20 |     score = "nonignorable",
21 |     trimming_threshold = 0,
22 |     stringsAsFactors = FALSE)
23 |   test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
24 | }
25 | 
26 | patrick::with_parameters_test_that("Unit tests for SSM, nonignorable nonresponse:",
27 |   .cases = test_cases, {
28 |     learner_pars = get_default_mlmethod_ssm(learner)
29 |     n_rep_boot = 498
30 | 
31 |     set.seed(3141)
32 |     ssm_hat = dml_ssm(data_ssm_nonignorable$df,
33 |       y = "y", d = "d", z = "z", s = "s",
34 |       n_folds = 5,
35 |       ml_pi = learner_pars$ml_pi$clone(), ml_m = learner_pars$ml_m$clone(), ml_g = learner_pars$ml_g$clone(),
36 |       dml_procedure = dml_procedure, score = score)
37 |     theta = ssm_hat$coef
38 |     se = ssm_hat$se
39 | 
40 |     boot_theta = bootstrap_ssm(ssm_hat$thetas, ssm_hat$ses,
41 |       data_ssm_nonignorable$df,
42 |       y = "y", d = "d", s = "s",
43 |       n_folds = 5, smpls = ssm_hat$smpls,
44 |       all_preds = ssm_hat$all_preds,
45 |       score = score,
46 |       bootstrap = "normal", n_rep_boot = n_rep_boot)$boot_coef
47 | 
48 | 
49 |     set.seed(3141)
50 |     double_mlssm_obj = DoubleMLSSM$new(
51 |       data = data_ssm_nonignorable$dml_data,
52 |       n_folds = 5,
53 |       ml_pi = learner_pars$ml_pi$clone(),
54 |       ml_m = learner_pars$ml_m$clone(),
55 |       ml_g = learner_pars$ml_g$clone(),
56 |       dml_procedure = dml_procedure,
57 |       score = score,
58 |       trimming_threshold = trimming_threshold)
59 | 
60 |     double_mlssm_obj$fit()
61 |     theta_obj = double_mlssm_obj$coef
62 |     se_obj = double_mlssm_obj$se
63 | 
64 |     # bootstrap
65 |     double_mlssm_obj$bootstrap(method = "normal", n_rep = n_rep_boot)
66 |     boot_theta_obj = double_mlssm_obj$boot_coef
67 | 
68 |     # at the moment the object result comes without a name
69 |     expect_equal(theta, theta_obj, tolerance = 1e-8)
70 |     expect_equal(se, se_obj, tolerance = 1e-8)
71 |     expect_equal(as.vector(boot_theta), as.vector(boot_theta_obj), tolerance = 1e-8)
72 |   }
73 | )
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-double_ml_ssm_tuning.R:
--------------------------------------------------------------------------------
 1 | context("Unit tests for tuning of SSM")
 2 | 
 3 | requireNamespace("lgr")
 4 | 
 5 | logger = lgr::get_logger("bbotk")
 6 | logger$set_threshold("warn")
 7 | lgr::get_logger("mlr3")$set_threshold("warn")
 8 | 
 9 | tune_settings = list(
10 |   rsmp_tune = rsmp("cv", folds = 3),
11 |   measure = list(
12 |     "ml_m" = "classif.ce",
13 |     "ml_g" = "regr.mse",
14 |     "ml_pi" = "classif.ce"),
15 |   terminator = mlr3tuning::trm("evals", n_evals = 5),
16 |   algorithm = tnr("random_search"))
17 | 
18 | learner = "rpart"
19 | 
20 | 
21 | on_cran = !identical(Sys.getenv("NOT_CRAN"), "true")
22 | if (on_cran) {
23 |   test_cases = expand.grid(
24 |     learner_list = learner,
25 |     dml_procedure = "dml2",
26 |     score = c("missing-at-random", "nonignorable"),
27 |     n_rep = c(1),
28 |     tune_on_folds = FALSE,
29 |     stringsAsFactors = FALSE)
30 | } else {
31 |   test_cases = expand.grid(
32 |     learner_list = learner,
33 |     dml_procedure = c("dml1", "dml2"),
34 |     score = c("missing-at-random", "nonignorable"),
35 |     n_rep = c(1, 3),
36 |     tune_on_folds = c(FALSE, TRUE),
37 |     stringsAsFactors = FALSE)
38 | }
39 | 
40 | test_cases[".test_name"] = apply(test_cases, 1, paste, collapse = "_")
41 | 
42 | patrick::with_parameters_test_that("Unit tests for tuning of SSM:",
43 |   .cases = test_cases, {
44 |     n_rep_boot = 498
45 |     n_folds = 5
46 | 
47 |     if (score == "missing-at-random") {
48 |       dml_data = data_ssm_mar$dml_data
49 |     } else {
50 |       dml_data = data_ssm_nonignorable$dml_data
51 |     }
52 | 
53 |     set.seed(3141)
54 |     double_mlssm_obj_tuned = DoubleMLSSM$new(
55 |       data = dml_data,
56 |       n_folds = n_folds,
57 |       ml_g = lrn("regr.rpart"),
58 |       ml_m = lrn("classif.rpart"),
59 |       ml_pi = lrn("classif.rpart"),
60 |       dml_procedure = dml_procedure,
61 |       score = score,
62 |       n_rep = n_rep)
63 | 
64 |     param_grid = list(
65 |       "ml_m" = paradox::ps(
66 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
67 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
68 |       "ml_g" = paradox::ps(
69 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
70 |         minsplit = paradox::p_int(lower = 1, upper = 2)),
71 |       "ml_pi" = paradox::ps(
72 |         cp = paradox::p_dbl(lower = 0.01, upper = 0.02),
73 |         minsplit = paradox::p_int(lower = 1, upper = 2)))
74 | 
75 |     double_mlssm_obj_tuned$tune(param_set = param_grid, tune_on_folds = tune_on_folds, tune_settings = tune_settings)
76 | 
77 |     # skip if tune_on_folds = TRUE & score == "nonignorable"
78 |     if (tune_on_folds && score == "nonignorable") {
79 |       skip("Skipping test for tuning on folds with nonignorable score")
80 |     } else {
81 |       double_mlssm_obj_tuned$fit()
82 | 
83 |       theta_obj_tuned = double_mlssm_obj_tuned$coef
84 |       se_obj_tuned = double_mlssm_obj_tuned$se
85 | 
86 |       expect_is(theta_obj_tuned, "numeric")
87 |       expect_is(se_obj_tuned, "numeric")
88 |     }
89 |   }
90 | )
91 | 


--------------------------------------------------------------------------------
/tests/testthat_regression_tests.R:
--------------------------------------------------------------------------------
1 | library("testthat")
2 | library("patrick")
3 | library("DoubleML")
4 | 
5 | testthat::test_check("DoubleML")
6 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/Introduction_to_DoubleML.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "DoubleML - An Object-Oriented Implementation of Double Machine Learning in R"
 3 | date: "`r Sys.Date()`"
 4 | output: rmarkdown::html_vignette
 5 | vignette: >
 6 |   %\VignetteIndexEntry{DoubleML - An Object-Oriented Implementation of Double Machine Learning in R}
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\VignetteEncoding{UTF-8}
 9 | ---
10 | 
11 | # Introduction 
12 | 
13 | The R package `DoubleML` implements the double/debiased machine learning framework of Chernozhukov et al. (2018). It provides functionalities to estimate parameters in causal models based on machine learning methods. The double machine learning framework consist of three key ingredients: 
14 | 
15 | * Neyman orthogonality, 
16 | * High-quality machine learning estimation and
17 | * Sample splitting.
18 | 
19 | Estimation of nuisance components can be performed by various state-of-the-art machine learning methods that are available in the `mlr3` ecosystem (Lang et al., 2019). `DoubleML` makes it possible to perform inference in a variety of causal models, including partially linear and interactive regression models and their extensions to instrumental variable estimation. The object-oriented implementation of `DoubleML` enables a high flexibility for the model specification and makes it easily extendable. This paper serves as an introduction to the double machine learning framework and the R package `DoubleML`. In reproducible code examples with simulated and real data sets, we demonstrate how `DoubleML` users can perform valid inference based on machine learning methods. 
20 | 
21 | # Long Package Vignette
22 | 
23 | A long version of this package vignette is available in the accompanying publication in the Journal of Statistical Software at <doi:10.18637/jss.v108.i03>
24 | 
25 | 
26 | 
27 | # References: 
28 | 
29 | Bach, P., Chernozhukov, V., Kurz, M. S., Spindler, M. and Klaassen, S. (2024), DoubleML - An Object-Oriented Implementation of Double Machine Learning in R, Journal of Statistical Software, 108(3): 1-56, <doi:10.18637/jss.v108.i03>, arXiv:[2103.09603](https://arxiv.org/abs/2103.09603).
30 | 
31 | Chernozhukov, V., Chetverikov, D., Demirer, M., Duflo, E., Hansen, C., Newey, W. and Robins, J. (2018), Double/debiased machine learning for treatment and structural parameters. The Econometrics Journal, 21: C1-C68, URL: <https://doi.org/10.1111/ectj.12097>.
32 | 
33 | Lang, M., Binder, M., Richter, J., Schratz, P., Pfisterer, F., Coors, S., Au, Q., Casalicchio, G., Kotthoff, L. and Bischl, B. (2019), mlr3: A modern object-oriented machine learing framework in R. Journal of Open Source Software, <https://doi.org/10.21105/joss.01903>, URL:  https://mlr3.mlr-org.com/.
34 | 
35 | 


--------------------------------------------------------------------------------
/vignettes/install.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Installing DoubleML"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Installing DoubleML}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r setup, include=FALSE}
11 | knitr::opts_chunk$set(echo = TRUE)
12 | knitr::opts_chunk$set(eval = FALSE)
13 | ```
14 | 
15 | 
16 | ## Installation 
17 | 
18 | Please note that the current version of the `DoubleML` package for R is still under development and that the code might be subject to changes. 
19 | 
20 | ### Installation from CRAN
21 | 
22 | For installation from CRAN type
23 | ```{r, eval = FALSE}
24 | install.packages("DoubleML")
25 | ```
26 | 
27 | Load the package after completed installation. 
28 | 
29 | ```{r, message=FALSE, warning=FALSE}
30 | library(DoubleML)
31 | ```
32 | 
33 | ### Installation from GitHub
34 | 
35 | The `DoubleML` package for R can be downloaded using (previous installation of the [`remotes` package](https://remotes.r-lib.org/index.html) is required).
36 | 
37 | ```{r, eval = FALSE}
38 | remotes::install_github("DoubleML/doubleml-for-r")
39 | ```
40 | 
41 | Load the package after completed installation. 
42 | 
43 | ```{r, message=FALSE, warning=FALSE}
44 | library(DoubleML)
45 | ```
46 | 
47 | The python package `DoubleML` is available via the github repository. For more information, please visit our [user guide](https://docs.doubleml.org/). 
48 | 


--------------------------------------------------------------------------------