├── .gitignore
├── img
    ├── logo.png
    ├── favicon.ico
    └── logo_dark.png
├── doc
    ├── .gitignore
    ├── _static
    │   ├── logo.png
    │   ├── dark_rapi.png
    │   ├── logo_dark.png
    │   ├── light_rapi.png
    │   ├── dark_pythonapi.png
    │   ├── dark_userguide.png
    │   ├── dark_workflow.png
    │   ├── light_workflow.png
    │   ├── light_pythonapi.png
    │   ├── light_userguide.png
    │   ├── basic_iv_example_nb.png
    │   ├── dag_usecase_revised.png
    │   ├── dark_examplegallery.png
    │   ├── dark_gettingstarted.png
    │   ├── firststage_example_nb.png
    │   ├── light_examplegallery.png
    │   ├── light_gettingstarted.png
    │   ├── robust_iv_example_nb.png
    │   ├── sensitivity_example_nb.png
    │   ├── switcher.json
    │   └── css
    │   │   └── custom.css
    ├── examples
    │   ├── did
    │   │   └── mpdta.rda
    │   ├── figures
    │   │   └── dag_usecase_revised.png
    │   ├── data
    │   │   └── orig_demand_data_example.csv
    │   └── index.rst
    ├── _templates
    │   ├── sidebar-doubleml-workflow.html
    │   ├── logo.html
    │   └── class.rst
    ├── guide
    │   ├── sensitivity
    │   │   ├── plm
    │   │   │   ├── plm_sensitivity.inc
    │   │   │   └── plr_sensitivity.rst
    │   │   ├── irm
    │   │   │   ├── irm_sensitivity.inc
    │   │   │   ├── apo_sensitivity.rst
    │   │   │   └── irm_sensitivity.rst
    │   │   ├── did
    │   │   │   ├── did_pa_binary_sensitivity.rst
    │   │   │   ├── did_sensitivity.inc
    │   │   │   ├── did_cs_binary_sensitivity.rst
    │   │   │   ├── did_pa_sensitivity.rst
    │   │   │   └── did_cs_sensitivity.rst
    │   │   ├── benchmarking.rst
    │   │   ├── theory.rst
    │   │   └── implementation.rst
    │   ├── models
    │   │   ├── irm
    │   │   │   ├── apos.rst
    │   │   │   ├── apo.rst
    │   │   │   ├── irm.rst
    │   │   │   └── iivm.rst
    │   │   ├── plm
    │   │   │   ├── plr.rst
    │   │   │   ├── lplr.rst
    │   │   │   ├── pliv.rst
    │   │   │   └── plm_models.inc
    │   │   ├── did
    │   │   │   ├── did_models.inc
    │   │   │   ├── did_aggregation.rst
    │   │   │   ├── did_implementation.rst
    │   │   │   ├── did_pa.rst
    │   │   │   ├── did_binary.rst
    │   │   │   ├── did_cs.rst
    │   │   │   └── did_setup.rst
    │   │   └── ssm
    │   │   │   ├── ssm.rst
    │   │   │   └── ssm_models.inc
    │   ├── scores
    │   │   ├── ssm
    │   │   │   ├── ssm_scores.inc
    │   │   │   ├── mar_score.rst
    │   │   │   └── nr_score.rst
    │   │   ├── plm
    │   │   │   ├── plm_scores.inc
    │   │   │   ├── pliv_score.rst
    │   │   │   ├── plr_score.rst
    │   │   │   └── lplr_score.rst
    │   │   ├── irm
    │   │   │   ├── pq_score.rst
    │   │   │   ├── cvar_score.rst
    │   │   │   ├── iivm_score.rst
    │   │   │   ├── irm_scores.inc
    │   │   │   ├── apo_score.rst
    │   │   │   ├── lpq_score.rst
    │   │   │   └── irm_score.rst
    │   │   └── did
    │   │   │   ├── did_scores.inc
    │   │   │   ├── did_pa_binary_score.rst
    │   │   │   ├── did_cs_binary_score.rst
    │   │   │   ├── did_pa_score.rst
    │   │   │   └── did_cs_score.rst
    │   ├── guide.rst
    │   ├── learners
    │   │   ├── r
    │   │   │   ├── learners_overview.inc
    │   │   │   ├── minimum_req.rst
    │   │   │   ├── pipelines.rst
    │   │   │   ├── tune_and_pipelines.rst
    │   │   │   └── set_hyperparams.rst
    │   │   └── python
    │   │   │   ├── minimum_req.rst
    │   │   │   ├── learners_overview.inc
    │   │   │   ├── evaluate_learners.rst
    │   │   │   ├── tune_hyperparams.rst
    │   │   │   ├── set_hyperparams.rst
    │   │   │   ├── tune_hyperparams_old.rst
    │   │   │   └── external_preds.rst
    │   ├── models.rst
    │   ├── learners.rst
    │   ├── data_backend.rst
    │   ├── data
    │   │   ├── rdd_data.rst
    │   │   ├── ssm_data.rst
    │   │   ├── panel_data.rst
    │   │   └── did_data.rst
    │   └── sensitivity.rst
    ├── shared
    │   ├── dgp
    │   │   ├── return_type.rst
    │   │   └── return_type_iv.rst
    │   ├── heterogeneity
    │   │   ├── cvar_qte.rst
    │   │   ├── cate.rst
    │   │   ├── pq.rst
    │   │   ├── cvar.rst
    │   │   ├── qte.rst
    │   │   ├── gate.rst
    │   │   ├── cate_plr.rst
    │   │   ├── gate_plr.rst
    │   │   └── policytree.rst
    │   └── causal_graphs
    │   │   ├── plr_irm_causal_graph.rst
    │   │   └── pliv_iivm_causal_graph.rst
    ├── api
    │   ├── mixins.rst
    │   ├── data_class.rst
    │   ├── api.rst
    │   ├── utility.rst
    │   ├── datasets.rst
    │   └── dml_models.rst
    ├── Makefile
    └── make.bat
├── requirements.txt
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── docu.yml
    │   ├── config.yml
    │   └── literature.yml
    └── workflows
    │   ├── deploy_docu_stable.yml
    │   ├── test_build_docu_released.yml
    │   ├── deploy_docu_dev.yml
    │   └── test_build_docu_dev.yml
├── README.md
├── LICENSE
└── .devcontainer
    ├── devcontainer.json
    ├── docker_guide.md
    ├── Dockerfile.dev
    └── build_image_guide.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.idea
2 | *.vscode
3 | 


--------------------------------------------------------------------------------
/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/img/logo.png


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 
3 | # Ignore generated API documentation
4 | api/generated/


--------------------------------------------------------------------------------
/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/img/favicon.ico


--------------------------------------------------------------------------------
/img/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/img/logo_dark.png


--------------------------------------------------------------------------------
/doc/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/logo.png


--------------------------------------------------------------------------------
/doc/_static/dark_rapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_rapi.png


--------------------------------------------------------------------------------
/doc/_static/logo_dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/logo_dark.png


--------------------------------------------------------------------------------
/doc/_static/light_rapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_rapi.png


--------------------------------------------------------------------------------
/doc/examples/did/mpdta.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/examples/did/mpdta.rda


--------------------------------------------------------------------------------
/doc/_static/dark_pythonapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_pythonapi.png


--------------------------------------------------------------------------------
/doc/_static/dark_userguide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_userguide.png


--------------------------------------------------------------------------------
/doc/_static/dark_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_workflow.png


--------------------------------------------------------------------------------
/doc/_static/light_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_workflow.png


--------------------------------------------------------------------------------
/doc/_static/light_pythonapi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_pythonapi.png


--------------------------------------------------------------------------------
/doc/_static/light_userguide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_userguide.png


--------------------------------------------------------------------------------
/doc/_static/basic_iv_example_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/basic_iv_example_nb.png


--------------------------------------------------------------------------------
/doc/_static/dag_usecase_revised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dag_usecase_revised.png


--------------------------------------------------------------------------------
/doc/_static/dark_examplegallery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_examplegallery.png


--------------------------------------------------------------------------------
/doc/_static/dark_gettingstarted.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/dark_gettingstarted.png


--------------------------------------------------------------------------------
/doc/_templates/sidebar-doubleml-workflow.html:
--------------------------------------------------------------------------------
1 | <style>
2 |   .toc-entry {
3 |     padding-top: 0.5em;
4 |   }
5 | </style>
6 | 


--------------------------------------------------------------------------------
/doc/_static/firststage_example_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/firststage_example_nb.png


--------------------------------------------------------------------------------
/doc/_static/light_examplegallery.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_examplegallery.png


--------------------------------------------------------------------------------
/doc/_static/light_gettingstarted.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/light_gettingstarted.png


--------------------------------------------------------------------------------
/doc/_static/robust_iv_example_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/robust_iv_example_nb.png


--------------------------------------------------------------------------------
/doc/_static/sensitivity_example_nb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/_static/sensitivity_example_nb.png


--------------------------------------------------------------------------------
/doc/examples/figures/dag_usecase_revised.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DoubleML/doubleml-docs/HEAD/doc/examples/figures/dag_usecase_revised.png


--------------------------------------------------------------------------------
/doc/guide/sensitivity/plm/plm_sensitivity.inc:
--------------------------------------------------------------------------------
1 | The following partially linear models are implemented.
2 | 
3 | .. _sensitivity_plr:
4 | 
5 | Partially linear regression model (PLR)
6 | =======================================
7 | 
8 | .. include:: /guide/sensitivity/plm/plr_sensitivity.rst
9 | 


--------------------------------------------------------------------------------
/doc/shared/dgp/return_type.rst:
--------------------------------------------------------------------------------
1 | If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
2 | 
3 | If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
4 | 
5 | If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d)``.


--------------------------------------------------------------------------------
/doc/shared/dgp/return_type_iv.rst:
--------------------------------------------------------------------------------
1 | If ``'DoubleMLData'`` or ``DoubleMLData``, returns a ``DoubleMLData`` object.
2 | 
3 | If ``'DataFrame'``, ``'pd.DataFrame'`` or ``pd.DataFrame``, returns a ``pd.DataFrame``.
4 | 
5 | If ``'array'``, ``'np.ndarray'``, ``'np.array'`` or ``np.ndarray``, returns ``np.ndarray``'s ``(x, y, d, z)``.


--------------------------------------------------------------------------------
/doc/_static/switcher.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "name": "dev",
 4 |         "version": "dev",
 5 |         "url": "https://docs.doubleml.org/dev/"
 6 |     },
 7 |     {
 8 |         "name": "stable",
 9 |         "version": "stable",
10 |         "url": "https://docs.doubleml.org/stable/",
11 |         "preferred": true
12 |     }
13 | ]


--------------------------------------------------------------------------------
/doc/guide/models/irm/apos.rst:
--------------------------------------------------------------------------------
1 | If multiple treatment levels should be estimated simulatenously, another possible target parameter of interest in this model 
2 | are contrasts (or average treatment effects) between treatment levels :math:`d_j` and :math:`d_k`:
3 | 
4 | .. math::
5 | 
6 |     \theta_{0,jk} = \mathbb{E}[g_0(d_j, X) - g_0(d_k, X)].


--------------------------------------------------------------------------------
/doc/api/mixins.rst:
--------------------------------------------------------------------------------
 1 | .. _api_mixins:
 2 | 
 3 | Score Mixin Classes for DoubleML Models
 4 | ---------------------------------------
 5 | 
 6 | .. currentmodule:: doubleml
 7 | 
 8 | .. autosummary::
 9 |     :toctree: generated/
10 |     :template: class.rst
11 | 
12 |     double_ml_score_mixins.LinearScoreMixin
13 |     double_ml_score_mixins.NonLinearScoreMixin
14 | 


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/cvar_qte.rst:
--------------------------------------------------------------------------------
1 | For a quantile :math:`\tau \in (0,1)` the target parameter :math:`\theta_{\tau}` of interest are the 
2 | **treatment effects on the conditional value at risk**,
3 | 
4 | .. math::
5 | 
6 |     \theta_{\tau} = \theta_{\tau}(1) - \theta_{\tau}(0)
7 | 
8 | where :math:`\theta_{\tau}(d)` denotes the corresponding conditional values at risk
9 | of the potential outcomes.


--------------------------------------------------------------------------------
/doc/api/data_class.rst:
--------------------------------------------------------------------------------
 1 | .. _api_data_class:
 2 | 
 3 | DoubleML Data Class
 4 | ----------------------------------
 5 | 
 6 | .. currentmodule:: doubleml.data
 7 | 
 8 | .. autosummary::
 9 |     :toctree: generated/
10 |     :template: class.rst
11 | 
12 |     DoubleMLData
13 |     DoubleMLClusterData
14 |     DoubleMLPanelData
15 |     DoubleMLSSMData
16 |     DoubleMLRDDData
17 |     DoubleMLDIDData
18 | 


--------------------------------------------------------------------------------
/doc/guide/scores/ssm/ssm_scores.inc:
--------------------------------------------------------------------------------
 1 | The following scores for sample selection models are implemented.
 2 | 
 3 | .. _ssm-mar-score:
 4 | 
 5 | Missingness at Random
 6 | ======================
 7 | 
 8 | .. include:: /guide/scores/ssm/mar_score.rst
 9 | 
10 | 
11 | .. _ssm-nr-score:
12 | 
13 | Nonignorable Nonresponse
14 | =========================
15 | 
16 | .. include:: /guide/scores/ssm/nr_score.rst
17 | 


--------------------------------------------------------------------------------
/doc/api/api.rst:
--------------------------------------------------------------------------------
 1 | .. _python_api:
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | API Reference
 6 | =============
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 |    :numbered:
11 | 
12 |    DoubleML Data Class <data_class>
13 |    DoubleML Models <dml_models>
14 |    Datasets <datasets>
15 |    Utility Classes and Functions <utility>
16 |    Score Mixin Classes for DoubleML Models <mixins>
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/cate.rst:
--------------------------------------------------------------------------------
1 | **Conditional Average Treatment Effects (CATEs)** for ``DoubleMLIRM`` models consider the target parameters
2 | 
3 | .. math::
4 | 
5 |     \theta_{0}(x) = \mathbb{E}[Y(1) - Y(0)| X=x]
6 | 
7 | for a low-dimensional feature :math:`X`, where :math:`Y(d)` the potential outcome with :math:`d \in \{0, 1\}`.
8 | 
9 | Point estimates and confidence intervals can be obtained via the ``gate()`` and ``confint()`` methods.


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/pq.rst:
--------------------------------------------------------------------------------
 1 | For a quantile :math:`\tau \in (0,1)` the target parameters :math:`\theta_{\tau}(d)` of interest are the **potential quantiles (PQs)**,
 2 | 
 3 | .. math::
 4 | 
 5 |     P(Y(d) \le \theta_{\tau}(d)) = \tau,
 6 | 
 7 | and **local potential quantiles (LPQs)**,
 8 | 
 9 | .. math::
10 | 
11 |     P(Y(d) \le \theta_{\tau}(d)|\text{Compliers}) = \tau.
12 | 
13 | where :math:`Y(d)` denotes the potential outcome with :math:`d \in \{0, 1\}`.
14 | 
15 | 


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/cvar.rst:
--------------------------------------------------------------------------------
 1 | For a quantile :math:`\tau \in (0,1)` the target parameters :math:`\theta_{\tau}(d)` of interest are
 2 | the **conditional values at risk (CVaRs)** of the potential outcomes,
 3 | 
 4 | .. math::
 5 | 
 6 |     \theta_{\tau}(d) = \frac{\mathbb{E}[Y(d) 1\{F_{Y(d)}(Y(d) \ge \tau)]}{1-\tau},
 7 | 
 8 | 
 9 | where :math:`Y(d)` denotes the potential outcome with :math:`d \in \{0, 1\}` and 
10 | :math:`F_{Y(d)}(x)` the corresponding cdf of :math:`Y(d)`. 
11 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/irm/irm_sensitivity.inc:
--------------------------------------------------------------------------------
 1 | The following nonparametric regression models implemented.
 2 | 
 3 | 
 4 | .. _sensitivity_irm:
 5 | 
 6 | Interactive regression model (IRM)
 7 | =======================================
 8 | 
 9 | .. include:: /guide/sensitivity/irm/irm_sensitivity.rst
10 | 
11 | 
12 | .. _sensitivity_apo:
13 | 
14 | Average Potential Outcomes (APOs)
15 | =======================================
16 | 
17 | .. include:: /guide/sensitivity/irm/apo_sensitivity.rst
18 | 
19 | 


--------------------------------------------------------------------------------
/doc/guide/models/plm/plr.rst:
--------------------------------------------------------------------------------
 1 | **Partially linear regression (PLR)** models take the form
 2 | 
 3 | .. math::
 4 | 
 5 |     Y = D \theta_0 + g_0(X) + \zeta, & &\mathbb{E}(\zeta | D,X) = 0,
 6 | 
 7 |     D = m_0(X) + V, & &\mathbb{E}(V | X) = 0,
 8 | 
 9 | where :math:`Y` is the outcome variable and :math:`D` is the policy variable of interest.
10 | The high-dimensional vector :math:`X = (X_1, \ldots, X_p)` consists of other confounding covariates,
11 | and :math:`\zeta` and :math:`V` are stochastic errors.


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/qte.rst:
--------------------------------------------------------------------------------
 1 | For a quantile :math:`\tau \in (0,1)` the target parameter :math:`\theta_{\tau}` of interest are the **quantile treatment effect (QTE)**,
 2 | 
 3 | .. math::
 4 | 
 5 |     \theta_{\tau} = \theta_{\tau}(1) - \theta_{\tau}(0)
 6 | 
 7 | where :math:`\theta_{\tau}(d)` denotes the corresponding potential quantile.
 8 | 
 9 | Analogously, the **local quantile treatment effect (LQTE)** can be defined as the difference of 
10 | the corresponding local potential quantiles.
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | DoubleML[rdd]
 2 | scikit-learn==1.6.1
 3 | 
 4 | # test
 5 | pytest
 6 | rpy2
 7 | 
 8 | # doc
 9 | sphinx==8.1.3
10 | sphinx-copybutton
11 | nbsphinx==0.9.6
12 | ipykernel
13 | sphinx-gallery==0.18.0
14 | sphinx-panels
15 | sphinx-design
16 | jupyter-sphinx
17 | pydata-sphinx-theme==0.15.4
18 | pickleshare
19 | matplotlib
20 | plotly==5.24.1
21 | seaborn
22 | xgboost
23 | lightgbm
24 | flaml
25 | 
26 | # notebooks
27 | ipykernel
28 | pyreadr
29 | # tabpfn  # only relevant for TabPFN example notebooks


--------------------------------------------------------------------------------
/doc/shared/causal_graphs/plr_irm_causal_graph.rst:
--------------------------------------------------------------------------------
 1 | .. graphviz::
 2 |    :align: center
 3 |    :caption: Causal diagram
 4 | 
 5 |    digraph {
 6 |         nodesep=1;
 7 |         ranksep=1;
 8 |         rankdir=LR;
 9 |         { node [shape=circle, style=filled]
10 |           Y [fillcolor="#56B4E9"]
11 |           D [fillcolor="#F0E442"]
12 |           V [fillcolor="#F0E442"]
13 |           X [fillcolor="#D55E00"]
14 |         }
15 |         Y -> D -> V [dir="back"];
16 |         X -> D;
17 |         Y -> X [dir="back"];
18 |    }


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/gate.rst:
--------------------------------------------------------------------------------
 1 | **Group Average Treatment Effects (GATEs)** for ``DoubleMLIRM`` models consider the target parameters
 2 | 
 3 | .. math::
 4 | 
 5 |     \theta_{0,k} = \mathbb{E}[Y(1) - Y(0)| G_k],\quad k=1,\dots, K.
 6 | 
 7 | where :math:`G_k` denotes a group indicator and :math:`Y(d)` the potential outcome with :math:`d \in \{0, 1\}`.
 8 | 
 9 | Point estimates and confidence intervals can be obtained via the ``gate()`` and ``confint()`` methods.
10 | Remark that for straightforward interpretation, the groups have to be mutually exclusive.


--------------------------------------------------------------------------------
/doc/guide/models/plm/lplr.rst:
--------------------------------------------------------------------------------
 1 | **Logistic partially linear regression (LPLR)** models take the form
 2 | 
 3 | .. math::
 4 | 
 5 |     \mathbb{E} [Y | D, X] = \mathbb{P} (Y=1 | D, X) = \text{expit} \{\beta_0 D + r_0 (X) \}
 6 | 
 7 | where :math:`Y` is the binary outcome variable and :math:`D` is the policy variable of interest.
 8 | The high-dimensional vector :math:`X = (X_1, \ldots, X_p)` consists of confounding covariates and
 9 | :math:`\text{expit}` is the logistic link function
10 | 
11 | .. math::
12 |     \text{expit} ( X ) = \frac{1}{1 + e^{-x}}
13 | 
14 | 


--------------------------------------------------------------------------------
/doc/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* PYDATA THEME */
 2 | html[data-theme="dark"] img:not(.only-dark):not(.dark-light) {
 3 |   filter: brightness(1) contrast(1) !important;
 4 | }
 5 | 
 6 | html[data-theme="dark"] .bd-content img:not(.only-dark):not(.dark-light) {
 7 |   background: none !important;
 8 |   border-radius: 0;
 9 | }
10 | 
11 | html[data-theme="light"] {
12 |   --pst-color-primary: #0063BC;
13 | }
14 | 
15 | html[data-theme="dark"] {
16 |   --pst-color-primary: #0092ff;
17 | }
18 | 
19 | /* BOOTSTRAP */
20 | .card {
21 |   background: none !important;
22 | }
23 | 


--------------------------------------------------------------------------------
/doc/guide/models/plm/pliv.rst:
--------------------------------------------------------------------------------
 1 | **Partially linear IV regression (PLIV)** models take the form
 2 | 
 3 | .. math::
 4 | 
 5 |     Y - D \theta_0 =  g_0(X) + \zeta, & &\mathbb{E}(\zeta | Z, X) = 0,
 6 | 
 7 |     Z = m_0(X) + V, & &\mathbb{E}(V | X) = 0.
 8 | 
 9 | where :math:`Y` is the outcome variable, :math:`D` is the policy variable of interest and :math:`Z`
10 | denotes one or multiple instrumental variables. The high-dimensional vector
11 | :math:`X = (X_1, \ldots, X_p)` consists of other confounding covariates, and :math:`\zeta` and
12 | :math:`V` are stochastic errors.


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/docu.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation and User Guide
 2 | description: Issues related to the documentation and user guide on docs.doubleml.org
 3 | labels: ["documentation"]
 4 | 
 5 | body:
 6 | - type: textarea
 7 |   attributes:
 8 |     label: Description of the issue or change proposal
 9 |     description: |
10 |       Please provide a clear and concise discription of the issue and if relevant give links to the affected pages
11 |       on docs.doubleml.org
12 |   validations:
13 |     required: true
14 | - type: textarea
15 |   attributes:
16 |     label: Comments, context or references
17 | 


--------------------------------------------------------------------------------
/doc/guide/models/irm/apo.rst:
--------------------------------------------------------------------------------
 1 | For general discrete-values treatments :math:`D \in \lbrace d_0, \dots, d_l \rbrace` the model can be generalized to
 2 | 
 3 | .. math::
 4 | 
 5 |     Y = g_0(D, X) + U, & &\mathbb{E}(U | X, D) = 0,
 6 | 
 7 |     A_j = m_{0,j}(X) + V, & &\mathbb{E}(V | X) = 0,
 8 | 
 9 | where :math:`A_j := 1\lbrace D = d_j\rbrace` is an indicator variable for treatment level :math:`d_j` and :math:`m_{0,j}(X)` denotes
10 | the corresponding propensity score.
11 | 
12 | Possible target parameters of interest in this model are the average potential outcomes (APOs)
13 | 
14 | .. math::
15 | 
16 |     \theta_{0,j} = \mathbb{E}[g_0(d_j, X)].


--------------------------------------------------------------------------------
/doc/guide/scores/plm/plm_scores.inc:
--------------------------------------------------------------------------------
 1 | The following scores for partially linear models are implemented.
 2 | 
 3 | .. _plr-score:
 4 | 
 5 | Partially linear regression model (PLR)
 6 | =======================================
 7 | 
 8 | .. include:: /guide/scores/plm/plr_score.rst
 9 | 
10 | .. _lplr-score:
11 | 
12 | Logistic partial linear regression (LPLR)
13 | ===========================================
14 | 
15 | .. include:: /guide/scores/plm/lplr_score.rst
16 | 
17 | .. _pliv-score:
18 | 
19 | Partially linear IV regression model (PLIV)
20 | ===========================================
21 | 
22 | .. include:: /guide/scores/plm/pliv_score.rst
23 | 


--------------------------------------------------------------------------------
/doc/guide/guide.rst:
--------------------------------------------------------------------------------
 1 | .. _guide:
 2 | 
 3 | :parenttoc: True
 4 | 
 5 | User Guide
 6 | ==========
 7 | 
 8 | .. toctree::
 9 |    :maxdepth: 2
10 |    :numbered:
11 | 
12 |    The basics of double/debiased machine learning <basics>
13 |    Data Backend <data_backend>
14 |    Models <models>
15 |    Heterogeneous Treatment Effects <heterogeneity>
16 |    Score functions <scores>
17 |    Double machine learning algorithms <algorithms>
18 |    Learners, hyperparameters and hyperparameter tuning <learners>
19 |    Variance estimation and confidence intervals <se_confint>
20 |    Sample-splitting, cross-fitting and repeated cross-fitting <resampling>
21 |    Sensitivity Analysis <sensitivity>


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/cate_plr.rst:
--------------------------------------------------------------------------------
 1 | **Conditional Average Treatment Effects (CATEs)** for ``DoubleMLPLR`` models consider a slightly adjusted version of the ``DoubleMLPLR`` model. 
 2 | Instead of considering a constant treatment effect :math:`\theta_0` for all observations, the adjusted model allows for a different effect based on groups.
 3 | 
 4 | .. math::
 5 | 
 6 |     Y = D \theta_0(X) + g_0(X) + \zeta, & &\mathbb{E}(\zeta | D,X) = 0,
 7 | 
 8 |     D = m_0(X) + V, & &\mathbb{E}(V | X) = 0,
 9 | 
10 | where :math:`\theta_0(X)` denotes the heterogeneous treatment effect. 
11 | 
12 | Point estimates and confidence intervals can be obtained via the ``gate()`` and ``confint()`` methods.


--------------------------------------------------------------------------------
/doc/guide/models/irm/irm.rst:
--------------------------------------------------------------------------------
 1 | **Interactive regression (IRM)** models take the form
 2 | 
 3 | .. math::
 4 | 
 5 |     Y = g_0(D, X) + U, & &\mathbb{E}(U | X, D) = 0,
 6 | 
 7 |     D = m_0(X) + V, & &\mathbb{E}(V | X) = 0,
 8 | 
 9 | where the treatment variable is binary, :math:`D \in \lbrace 0,1 \rbrace`.
10 | We consider estimation of the average treatment effects when treatment effects are fully heterogeneous.
11 | 
12 | Target parameters of interest in this model are the average treatment effect (ATE),
13 | 
14 | .. math::
15 | 
16 |     \theta_0 = \mathbb{E}[g_0(1, X) - g_0(0,X)]
17 | 
18 | and the average treatment effect of the treated (ATTE),
19 | 
20 | .. math::
21 | 
22 |     \theta_0 = \mathbb{E}[g_0(1, X) - g_0(0,X) | D=1].
23 | 


--------------------------------------------------------------------------------
/doc/api/utility.rst:
--------------------------------------------------------------------------------
 1 | .. _api_utility:
 2 | 
 3 | Utility Classes and Functions
 4 | -----------------------------
 5 | 
 6 | Utility Classes
 7 | ~~~~~~~~~~~~~~~
 8 | 
 9 | .. currentmodule:: doubleml
10 | 
11 | .. autosummary::
12 |     :toctree: generated/
13 |     :template: class.rst
14 | 
15 |     utils.DMLDummyRegressor
16 |     utils.DMLDummyClassifier
17 |     utils.DMLOptunaResult
18 |     utils.DoubleMLBLP
19 |     utils.DoubleMLPolicyTree
20 |     utils.GlobalRegressor
21 |     utils.GlobalClassifier
22 |     utils.PSProcessorConfig
23 |     utils.PSProcessor
24 | 
25 | Utility Functions
26 | ~~~~~~~~~~~~~~~~~
27 | 
28 | .. currentmodule:: doubleml
29 | 
30 | .. autosummary::
31 |     :toctree: generated/
32 | 
33 |     utils.gain_statistics


--------------------------------------------------------------------------------
/doc/shared/causal_graphs/pliv_iivm_causal_graph.rst:
--------------------------------------------------------------------------------
 1 | .. graphviz::
 2 |    :align: center
 3 |    :caption: Causal diagram
 4 | 
 5 |    digraph {
 6 |         nodesep=1;
 7 |         ranksep=1;
 8 |         rankdir=LR;
 9 |         { node [shape=circle, style=filled]
10 |           Y [fillcolor="#56B4E9"]
11 |           D [fillcolor="#56B4E9"]
12 |           Z [fillcolor="#F0E442"]
13 |           V [fillcolor="#F0E442"]
14 |           X [fillcolor="#D55E00"]
15 |         }
16 | 
17 |         Z -> V [dir="back"];
18 |         D -> X [dir="back"];
19 |         Y -> D [dir="both"];
20 |         X -> Y;
21 |         Z -> X [dir="back"];
22 |         Z -> D;
23 | 
24 |         { rank=same; Y D }
25 |         { rank=same; Z X }
26 | 	    { rank=same; V }
27 |    }


--------------------------------------------------------------------------------
/doc/_templates/logo.html:
--------------------------------------------------------------------------------
 1 | <p class="logo" style="text-align:center;"><a href="{{ pathto(master_doc)|e }}">
 2 |     <img class="logo" src="{{ pathto('logo.png', 1)|e }}" alt="Logo" width="65%" height="65%">
 3 | </a></p>
 4 | 
 5 | <script type="text/javascript">
 6 |     // Change the logo depending on the theme
 7 |     var logo = document.querySelector('img.logo');
 8 |     var observer = new MutationObserver(function(mutations) {
 9 |         const dark = document.documentElement.dataset.theme == 'dark';
10 |         if (dark) {
11 |             logo.src = "{{ pathto('logo_dark.png', 1)|e }}";
12 |         } else {
13 |             logo.src = "{{ pathto('logo.png', 1)|e }}";
14 |         }
15 |     });
16 |     observer.observe(document.documentElement, {attributes: true, attributeFilter: ['data-theme']});
17 | </script>


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
 1 | blank_issues_enabled: true
 2 | contact_links:
 3 |   - name: API Documentation Improvement [Python Package DoubleML]
 4 |     url: https://github.com/DoubleML/doubleml-for-py/issues/new?assignees=&labels=documentation&template=api_docu.yml&title=%5BAPI+Documentation%5D%3A+
 5 |     about: Suggest an improvement for the API documentation [Python Package DoubleML]
 6 |   - name: API Documentation Improvement [R Package DoubleML]
 7 |     url: https://github.com/DoubleML/doubleml-for-r/issues/new?assignees=&labels=documentation&template=api_docu.yml&title=%5BAPI+Documentation%5D%3A+
 8 |     about: Suggest an improvement for the API documentation [R Package DoubleML]
 9 |   - name: Blank Issue
10 |     url: https://github.com/DoubleML/doubleml-docs/issues/new
11 |     about: Open a blank issue
12 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/pq_score.rst:
--------------------------------------------------------------------------------
 1 | For ``DoubleMLPQ`` the only valid option is ``score='PQ'``. For ``treatment=d`` with :math:`d\in\{0,1\}` and
 2 | a quantile :math:`\tau\in (0,1)` this implements the nonlinear score function:
 3 | 
 4 | .. math::
 5 | 
 6 |     \psi(W; \theta, \eta) := g_{d}(X, \tilde{\theta}) + \frac{1\{D=d\}}{m(X)}(1\{Y\le \theta\} - g_d(X, \tilde{\theta})) - \tau
 7 | 
 8 | 
 9 | where :math:`\eta=(g_d,m)` with true values
10 | 
11 | .. math::
12 | 
13 |     g_{d,0}(X, \theta_0) &= \mathbb{E}[1\{Y\le \theta_0\}|X, D=d]
14 | 
15 |     m_0(X) &= P(D=d|X).
16 | 
17 | Remark that :math:`g_{d,0}(X,\theta_0)` depends on the target parameter :math:`\theta_0`, such that
18 | the score is estimated with a preliminary estimate :math:`\tilde{\theta}`. For further details, see `Kallus et al. (2019) <https://arxiv.org/abs/1912.12945>`_. 
19 | 


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/gate_plr.rst:
--------------------------------------------------------------------------------
 1 | **Group Average Treatment Effects (GATEs)** for ``DoubleMLPLR`` models consider a slightly adjusted version of the ``DoubleMLPLR`` model. 
 2 | Instead of considering a constant treatment effect :math:`\theta_0` for all observations, the adjusted model allows for a different effect based on groups.
 3 | 
 4 | .. math::
 5 | 
 6 |     Y = D \theta_0(G_k) + g_0(X) + \zeta, & &\mathbb{E}(\zeta | D,X) = 0,
 7 | 
 8 |     D = m_0(X) + V, & &\mathbb{E}(V | X) = 0,
 9 | 
10 | where :math:`G_k` for :math:`k=1,\dots, K` denotes a group indicator where the groups can depend on the counfounding features :math:`X`.
11 | 
12 | Point estimates and confidence intervals can be obtained via the ``gate()`` and ``confint()`` methods.
13 | Remark that for straightforward interpretation, the groups have to be mutually exclusive.
14 | 


--------------------------------------------------------------------------------
/doc/guide/scores/ssm/mar_score.rst:
--------------------------------------------------------------------------------
 1 | For ``DoubleMLSSM`` the ``score='missing-at-random'`` implements the score function:
 2 | 
 3 | .. math::
 4 | 
 5 |     \psi(W; \theta, \eta) := \tilde{\psi}_1(W; \eta) - \tilde{\psi}_0(W; \eta) - \theta
 6 | 
 7 | where
 8 | 
 9 | .. math::
10 | 
11 |     \tilde{\psi}_1(W; \eta) &= \frac{D \cdot S \cdot [Y - g(1,1,X)]}{m(X) \cdot \pi(1, X)} + g(1,1,X)
12 | 
13 |     \tilde{\psi}_0(W; \eta) &= \frac{(1-D) \cdot S \cdot [Y - g(0,1,X)]}{(1-m(X)) \cdot \pi(0, X)} + g(0,1,X)
14 | 
15 | for :math:`d\in\{0,1\}` and :math:`\eta=(g, m, \pi)` with true values
16 | 
17 | .. math::
18 | 
19 |     g_0(d,s,X) &= \mathbb{E}[Y|D=d, S=s, X]
20 | 
21 |     m_0(X) &= P(D=1|X)
22 | 
23 |     \pi_0(d, X) &= P(S=1|D=d, X).
24 | 
25 | 
26 | For further details, see `Bia, Huber and Lafférs (2023) <https://doi.org/10.1080/07350015.2023.2271071>`_.
27 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | NBSPHINX_EXECUTE = auto
11 | 
12 | # Add NBSPHINX_EXECUTE to SPHINXOPTS
13 | SPHINXOPTS   += -D nbsphinx_execute=$(NBSPHINX_EXECUTE)
14 | 
15 | 
16 | # Put it first so that "make" without argument is like "make help".
17 | help:
18 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
19 | 
20 | .PHONY: help Makefile
21 | 
22 | # Catch-all target: route all unknown targets to Sphinx using the new
23 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
24 | %: Makefile
25 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
26 | 


--------------------------------------------------------------------------------
/doc/examples/data/orig_demand_data_example.csv:
--------------------------------------------------------------------------------
 1 | ,Date,StockCode,Country,Description,Quantity,revenue,UnitPrice
 2 | 0,2010-12-01,10002,France,INFLATABLE POLITICAL GLOBE ,48,40.8,0.85
 3 | 1,2010-12-01,10002,United Kingdom,INFLATABLE POLITICAL GLOBE ,12,10.2,0.85
 4 | 2,2010-12-01,10125,United Kingdom,MINI FUNKY DESIGN TAPES,2,1.7,0.85
 5 | 3,2010-12-01,10133,United Kingdom,COLOURING PENCILS BROWN TUBE,5,4.25,0.85
 6 | 4,2010-12-01,10135,United Kingdom,COLOURING PENCILS BROWN TUBE,1,2.51,2.51
 7 | 5,2010-12-01,11001,United Kingdom,ASSTD DESIGN RACING CAR PEN,3,10.08,3.36
 8 | 6,2010-12-01,15044B,United Kingdom,BLUE PAPER PARASOL ,1,2.95,2.95
 9 | 7,2010-12-01,15056BL,United Kingdom,EDWARDIAN PARASOL BLACK,20,113.0,5.65
10 | 8,2010-12-01,15056N,United Kingdom,EDWARDIAN PARASOL NATURAL,50,236.3,4.726
11 | 9,2010-12-01,15056P,United Kingdom,EDWARDIAN PARASOL PINK,48,220.79999999999998,4.6
12 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/cvar_score.rst:
--------------------------------------------------------------------------------
 1 | For ``DoubleMLCVAR`` the only valid option is ``score='CVaR'``. For ``treatment=d`` with :math:`d\in\{0,1\}` and
 2 | a quantile :math:`\tau\in (0,1)` this implements the score function:
 3 | 
 4 | .. math::
 5 | 
 6 |     \psi(W; \theta, \eta) := g_{d}(X, \gamma) + \frac{1\{D=d\}}{m(X)}(\max(\gamma, (1 - \tau)^{-1}(Y - \tau \gamma))  - g_d(X, \gamma)) - \theta
 7 | 
 8 | where :math:`\eta=(g_d,m,\gamma)` with true values
 9 | 
10 | .. math::
11 | 
12 |     g_{d,0}(X, \gamma_0) &= \mathbb{E}[\max(\gamma_0, (1 - \tau)^{-1}(Y - \tau \gamma_0))|X, D=d]
13 | 
14 |     m_0(X) &= P(D=d|X)
15 | 
16 | and :math:`\gamma_0` being the potential quantile of :math:`Y(d)`. As for potential quantiles, the estimate :math:`g_d` is constructed via
17 | a preliminary estimate of :math:`\gamma_0`. For further details, see `Kallus et al. (2019) <https://arxiv.org/abs/1912.12945>`_.
18 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/iivm_score.rst:
--------------------------------------------------------------------------------
 1 | For the IIVM model implemented in ``DoubleMLIIVM``
 2 | we employ for ``score='LATE'`` the score function:
 3 | 
 4 | ``score='LATE'`` implements the score function:
 5 | 
 6 | .. math::
 7 | 
 8 |     \psi(W; \theta, \eta) :=\; &g(1,X) - g(0,X)
 9 |     + \frac{Z (Y - g(1,X))}{m(X)} - \frac{(1 - Z)(Y - g(0,X))}{1 - m(X)}
10 | 
11 |     &- \bigg(r(1,X) - r(0,X) + \frac{Z (D - r(1,X))}{m(X)} - \frac{(1 - Z)(D - r(0,X))}{1 - m(X)} \bigg) \theta
12 | 
13 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
14 | 
15 | with :math:`\eta=(g, m, r)` and where the components of the linear score are
16 | 
17 | .. math::
18 | 
19 |     \psi_a(W; \eta) &=  - \bigg(r(1,X) - r(0,X) + \frac{Z (D - r(1,X))}{m(X)} - \frac{(1 - Z)(D - r(0,X))}{1 - m(X)} \bigg),
20 | 
21 |     \psi_b(W; \eta) &= g(1,X) - g(0,X) + \frac{Z (Y - g(1,X))}{m(X)} - \frac{(1 - Z)(Y - g(0,X))}{1 - m(X)}.
22 | 


--------------------------------------------------------------------------------
/doc/_templates/class.rst:
--------------------------------------------------------------------------------
 1 | {{ fullname | escape | underline}}
 2 | 
 3 | .. currentmodule:: {{ module }}
 4 | 
 5 | .. autoclass:: {{ objname }}
 6 | 
 7 |    {% block methods %}
 8 |    {% if methods %}
 9 |    .. rubric:: Methods
10 | 
11 |    .. autosummary::
12 |    {% for item in methods %}
13 |    {%- if not item.startswith('_') or item in ['__call__'] %} ~{{ name }}.{{ item }}
14 |    {% endif %}
15 |    {%- endfor %}
16 |    {% endif %}
17 |    {% endblock %}
18 | 
19 |    {% block attributes %}
20 |    {% if attributes %}
21 |    .. rubric:: Attributes
22 | 
23 |    .. autosummary::
24 |    {% for item in attributes %}
25 |       ~{{ name }}.{{ item }}
26 |    {%- endfor %}
27 |    {% endif %}
28 |    {% endblock %}
29 | 
30 | 
31 | {% if methods %}
32 | {% for item in methods %}
33 | {%- if not item.startswith('_') or item in ['__call__'] %}
34 | .. automethod:: {{ name }}.{{ item }}
35 | {% endif %}
36 | {%- endfor %}
37 | {% endif %}


--------------------------------------------------------------------------------
/doc/guide/learners/r/learners_overview.inc:
--------------------------------------------------------------------------------
 1 | .. _r_learner_req:
 2 | 
 3 | Minimum requirements for learners
 4 | #################################
 5 | 
 6 | .. include:: /guide/learners/r/minimum_req.rst
 7 | 
 8 | 
 9 | .. _r_set_params:
10 | 
11 | Specifying learners and set hyperparameters
12 | ###########################################
13 | 
14 | .. include:: /guide/learners/r/set_hyperparams.rst
15 | 
16 | 
17 | .. _r_pipelines:
18 | 
19 | Using pipelines to construct learners
20 | #####################################
21 | 
22 | .. include:: /guide/learners/r/pipelines.rst
23 | 
24 | 
25 | .. _r_tune_params:
26 | 
27 | Hyperparameter tuning
28 | #####################
29 | 
30 | .. include:: /guide/learners/r/tune_hyperparams.rst
31 | 
32 | 
33 | .. _r_tune_and_pipelines:
34 | 
35 | Hyperparameter tuning with pipelines
36 | ####################################
37 | 
38 | .. include:: /guide/learners/r/tune_and_pipelines.rst
39 | 
40 | 


--------------------------------------------------------------------------------
/doc/shared/heterogeneity/policytree.rst:
--------------------------------------------------------------------------------
 1 | **Policy Learning** considers to find an optimal decision policy. We consider deterministic binary policies, which are defined as mapping
 2 | 
 3 | .. math::
 4 | 
 5 |     \pi: X\mapsto \{0,1\}.
 6 | 
 7 | Using the score component :math:`\psi_b(W_i,\hat{\eta})` of the :ref:`IRM <irm-model>` score, 
 8 | we can find the optimal treatment policy by solving the weighted classification problem
 9 | 
10 | .. math::
11 | 
12 |     \hat{\pi} = \mathop{\arg \max}\limits_{\pi\in\Pi} \frac{1}{n}\sum_{i=1}^n(2\pi(X_i)-1)\hat{\psi_b(W_i,\hat{\eta})},
13 | 
14 | where :math:`\Pi` denotes a policy class, which we define as depth-:math:`m` classification trees.
15 | Thus, we estimate splits in the features :math:`X` that reflect the heterogeneity of the treatment effect 
16 | and consequently maximize the sum of the estimated individual treatment effects of all individuals by assigning different treatments.
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Documentation and User Guide for DoubleML - Double Machine Learning in Python & R <a href="https://docs.doubleml.org"><img src="img/logo.png" align="right" width = "120" /></a>
 2 | 
 3 | [![PyPI version](https://badge.fury.io/py/DoubleML.svg)](https://badge.fury.io/py/DoubleML)
 4 | [![CRAN Version](https://www.r-pkg.org/badges/version/DoubleML)](https://cran.r-project.org/package=DoubleML)
 5 | 
 6 | - This repo contains the source code for the documentation and user guide for the Python and R packages **DoubleML**.
 7 | - The documentation is available at [https://docs.doubleml.org/](https://docs.doubleml.org/).
 8 | - The source code for the Python package **DoubleML** is available here: [https://github.com/DoubleML/doubleml-for-py](https://github.com/DoubleML/doubleml-for-py).
 9 | - The source code for the R package **DoubleML** is available here: [https://github.com/DoubleML/doubleml-for-r](https://github.com/DoubleML/doubleml-for-r).
10 | 
11 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/doc/guide/scores/did/did_scores.inc:
--------------------------------------------------------------------------------
 1 | The following scores for difference-in-differences models are implemented.
 2 | 
 3 | 
 4 | .. _did-pa-score:
 5 | 
 6 | Panel Data
 7 | ==========
 8 | 
 9 | .. include:: /guide/scores/did/did_pa_score.rst
10 | 
11 | 
12 | .. _did-cs-score:
13 | 
14 | Repeated Cross-Sectional Data
15 | =============================
16 | 
17 | .. include:: /guide/scores/did/did_cs_score.rst
18 | 
19 | 
20 | Two treatment periods
21 | =====================
22 | 
23 | .. warning::
24 |     This documentation refers to the deprecated implementation for two time periods. 
25 |     This functionality will be removed in a future version. The generalized version are :ref:`did-pa-score` and :ref:`did-cs-score`.
26 | 
27 | 
28 | Panel Data
29 | ~~~~~~~~~~~
30 | 
31 | .. include:: /guide/scores/did/did_pa_binary_score.rst
32 | 
33 | 
34 | Repeated Cross-Sectional Data
35 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
36 | 
37 | 
38 | .. include:: /guide/scores/did/did_cs_binary_score.rst


--------------------------------------------------------------------------------
/doc/guide/models.rst:
--------------------------------------------------------------------------------
 1 | .. _models:
 2 | 
 3 | Models
 4 | ----------
 5 | 
 6 | The :ref:`DoubleML <doubleml_package>`-package includes the following models.
 7 | 
 8 | .. _plm-models:
 9 | 
10 | Partially linear models (PLM)
11 | +++++++++++++++++++++++++++++
12 | 
13 | .. include:: models/plm/plm_models.inc
14 | 
15 | 
16 | .. _irm-models:
17 | 
18 | Interactive regression models (IRM)
19 | ++++++++++++++++++++++++++++++++++++
20 | 
21 | .. include:: models/irm/irm_models.inc
22 | 
23 | 
24 | .. _did-models:
25 | 
26 | Difference-in-Differences Models (DID)
27 | ++++++++++++++++++++++++++++++++++++++
28 | 
29 | .. include:: models/did/did_models.inc
30 | 
31 | 
32 | .. _ssm-models:
33 | 
34 | Sample Selection Models (SSM)
35 | ++++++++++++++++++++++++++++++++++++++
36 | 
37 | .. include:: models/ssm/ssm_models.inc
38 | 
39 | 
40 | .. _rdd-models:
41 | 
42 | Regression Discontinuity Designs (RDD)
43 | ++++++++++++++++++++++++++++++++++++++
44 | 
45 | .. include:: models/rdd/rdd_models.inc
46 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/did/did_pa_binary_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | In the :ref:`did-pa-model` with ``score='observational'`` and ``in_sample_normalization=True`` the score function implies the following representations
 2 | 
 3 | .. math::
 4 | 
 5 |     m(W,g) &= \big(g(1,X) - g(0,X))\frac{D}{\mathbb{E}[D]}
 6 | 
 7 |     \alpha(W) &= \frac{D}{\mathbb{E}[D]} - \frac{\frac{m(X)(1-D)}{1-m(X)}}{\mathbb{E}\left[\frac{m(X)(1-D)}{1-m(X)}\right]}.
 8 | 
 9 | If instead ``in_sample_normalization=False``, the Riesz representer changes to 
10 | 
11 | .. math::
12 | 
13 |     \alpha(W) = \frac{D}{\mathbb{E}[D]} - \frac{m(X)(1-D)}{\mathbb{E}[D](1-m(X))}.
14 | 
15 | For ``score='experimental'`` implies the score function implies the following representations
16 | 
17 | .. math::
18 | 
19 |     m(W,g) &= g(1,X) - g(0,X)
20 | 
21 |     \alpha(W) &= \frac{D}{\mathbb{E}[D]} - \frac{1-D}{1-\mathbb{E}[D]}.
22 | 
23 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`.


--------------------------------------------------------------------------------
/doc/guide/scores/ssm/nr_score.rst:
--------------------------------------------------------------------------------
 1 | For ``DoubleMLSSM`` the ``score='nonignorable'`` implements the score function:
 2 | 
 3 | .. math::
 4 | 
 5 |     \psi(W; \theta, \eta) := \tilde{\psi}_1(W; \eta) - \tilde{\psi}_0(W; \eta) - \theta
 6 | 
 7 | where
 8 | 
 9 | .. math::
10 | 
11 |     \tilde{\psi}_1(W; \eta) &= \frac{D \cdot S \cdot [Y - g(1,1,X,\Pi)]}{m(X, \Pi) \cdot \pi(1,X,Z)} + g(1,1,X,\Pi)
12 | 
13 |     \tilde{\psi}_0(W; \eta) &= \frac{(1-D) \cdot S \cdot [Y - g(0,1,X,\Pi)]}{(1-m(X,\Pi)) \cdot \pi(0,X,Z)} + g(0,1,X,\Pi)
14 | 
15 | for :math:`d\in\{0,1\}` and :math:`\eta=(g, m, \pi, \Pi)` with true values
16 | 
17 | .. math::
18 | 
19 |     \pi_0(d, X, Z) &= P(S=1|D=d, X, Z)
20 | 
21 |     \Pi_0 &:= \pi_0(D, Z, X) = P(S=1|D,X,Z)
22 |     
23 |     g_0(d,s,X) &= \mathbb{E}[Y|D=d, S=s, X, \Pi_0]
24 | 
25 |     m_0(X, \Pi_0) &= P(D=1|X, \Pi_0).
26 | 
27 | The estimate of :math:`\Pi_0` is constructed via a preliminary estimate of :math:`\pi_0(D,X,Z)` via nested cross-fitting.
28 | 
29 | For further details, see `Bia, Huber and Lafférs (2023) <https://doi.org/10.1080/07350015.2023.2271071>`_.
30 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/irm_scores.inc:
--------------------------------------------------------------------------------
 1 | The following scores for nonparametric regression models are implemented.
 2 | 
 3 | .. _irm-score:
 4 | 
 5 | Binary Interactive Regression Model (IRM)
 6 | ==========================================
 7 | 
 8 | .. include:: /guide/scores/irm/irm_score.rst
 9 | 
10 | 
11 | .. _apo-score:
12 | 
13 | Average Potential Outcomes (APOs)
14 | =================================
15 | 
16 | .. include:: /guide/scores/irm/apo_score.rst
17 |  
18 | 
19 | .. _iivm-score:
20 | 
21 | Interactive IV model (IIVM)
22 | ===========================
23 | 
24 | .. include:: /guide/scores/irm/iivm_score.rst
25 | 
26 | 
27 | .. _pq-score:
28 | 
29 | Potential quantiles (PQs)
30 | =========================
31 | 
32 | .. include:: /guide/scores/irm/pq_score.rst
33 | 
34 | 
35 | .. _lpq-score:
36 | 
37 | Local potential quantiles (LPQs)
38 | ================================
39 | 
40 | .. include:: /guide/scores/irm/lpq_score.rst
41 | 
42 | 
43 | .. _cvar-score:
44 | 
45 | Conditional value at risk (CVaR)
46 | ================================
47 | 
48 | .. include:: /guide/scores/irm/cvar_score.rst


--------------------------------------------------------------------------------
/doc/api/datasets.rst:
--------------------------------------------------------------------------------
 1 | .. _api_datasets:
 2 | 
 3 | Datasets
 4 | ---------
 5 | 
 6 | Dataset Loaders
 7 | ~~~~~~~~~~~~~~~
 8 | 
 9 | .. currentmodule:: doubleml.datasets
10 | 
11 | .. autosummary::
12 |    :toctree: generated/
13 | 
14 |    fetch_401K
15 |    fetch_bonus
16 | 
17 | Dataset Generators
18 | ~~~~~~~~~~~~~~~~~~
19 | 
20 | .. currentmodule:: doubleml
21 | 
22 | .. autosummary::
23 |    :toctree: generated/
24 | 
25 |    irm.datasets.make_irm_data
26 |    irm.datasets.make_iivm_data
27 |    irm.datasets.make_heterogeneous_data
28 |    irm.datasets.make_irm_data_discrete_treatments
29 |    irm.datasets.make_confounded_irm_data
30 |    irm.datasets.make_ssm_data
31 | 
32 |    plm.datasets.make_plr_CCDDHNR2018
33 |    plm.datasets.make_plr_turrell2018
34 |    plm.datasets.make_lplr_LZZ2020
35 |    plm.datasets.make_pliv_CHS2015
36 |    plm.datasets.make_pliv_multiway_cluster_CKMS2021
37 |    plm.datasets.make_confounded_plr_data
38 | 
39 |    did.datasets.make_did_SZ2020
40 |    did.datasets.make_did_CS2021
41 |    did.datasets.make_did_cs_CS2021
42 | 
43 |    rdd.datasets.make_simple_rdd_data
44 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_models.inc:
--------------------------------------------------------------------------------
 1 | .. include:: /guide/models/did/did_setup.rst
 2 | 
 3 | .. _did-implementation-model:
 4 | 
 5 | Parameters & Implementation
 6 | ***************************
 7 | 
 8 | .. include:: /guide/models/did/did_implementation.rst
 9 | 
10 | 
11 | .. _did-pa-model:
12 | 
13 | Panel data
14 | ******************
15 | 
16 | .. include:: /guide/models/did/did_pa.rst
17 | 
18 | 
19 | .. _did-cs-model:
20 | 
21 | Repeated cross-sections
22 | *******************************
23 | 
24 | .. include:: /guide/models/did/did_cs.rst
25 | 
26 | 
27 | .. _did-aggregation:
28 | 
29 | Effect Aggregation
30 | ******************
31 | 
32 | .. include:: /guide/models/did/did_aggregation.rst
33 | 
34 | 
35 | .. _did-binary-model:
36 | 
37 | Two treatment periods
38 | *********************
39 | 
40 | .. warning::
41 |     This documentation refers to the deprecated implementation for two time periods. 
42 |     This functionality will be removed in a future version.
43 | 
44 | .. note::
45 |     We recommend using the implementation :ref:`did-pa-model` and :ref:`did-cs-model`.
46 | 
47 | .. include:: /guide/models/did/did_binary.rst
48 | 


--------------------------------------------------------------------------------
/doc/guide/learners.rst:
--------------------------------------------------------------------------------
 1 | .. _learners:
 2 | 
 3 | Learners, hyperparameters and hyperparameter tuning
 4 | -----------------------------------------------------------
 5 | 
 6 | The estimation of a double/debiased machine learning model involves the estimation of several nuisance function with
 7 | machine learning estimators.
 8 | Such learners are implemented in various Python and R packages.
 9 | The implementation of :ref:`DoubleML <doubleml_package>` is based on the meta-packages
10 | `scikit-learn <https://scikit-learn.org/>`_ for Python and `mlr3 <https://mlr3.mlr-org.com/>`_ for R.
11 | The interfaces to specify the learners, set hyperparameters and tune hyperparameters are described in the following
12 | separately for :ref:`Python <learners_python>` and :ref:`R <learners_r>`.
13 | 
14 | .. _learners_python:
15 | 
16 | Python: Learners and hyperparameters
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 | 
19 | .. include:: learners/python/learners_overview.inc
20 | 
21 | 
22 | .. _learners_r:
23 | 
24 | R: Learners and hyperparameters
25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26 | 
27 | .. include::  learners/r/learners_overview.inc
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/doc/guide/models/irm/iivm.rst:
--------------------------------------------------------------------------------
 1 | **Interactive IV regression (IIVM)** models take the form
 2 | 
 3 | .. math::
 4 | 
 5 |     Y = \ell_0(D, X) + \zeta, & &\mathbb{E}(\zeta | Z, X) = 0,
 6 | 
 7 |     Z = m_0(X) + V, & &\mathbb{E}(V | X) = 0,
 8 | 
 9 | where the treatment variable is binary, :math:`D \in \lbrace 0,1 \rbrace`
10 | and the instrument is binary, :math:`Z \in \lbrace 0,1 \rbrace`.
11 | Consider the functions :math:`g_0`, :math:`r_0` and :math:`m_0`, where :math:`g_0` maps the support of :math:`(Z,X)` to
12 | :math:`\mathbb{R}` and :math:`r_0` and :math:`m_0` respectively map the support of :math:`(Z,X)` and :math:`X` to
13 | :math:`(\varepsilon, 1-\varepsilon)` for some :math:`\varepsilon \in (0, 1/2)`, such that
14 | 
15 | .. math::
16 | 
17 |     Y = g_0(Z, X) + \nu, & &\mathbb{E}(\nu | Z, X) = 0,
18 | 
19 |     D = r_0(Z, X) + U, & &\mathbb{E}(U | Z, X) = 0,
20 | 
21 |     Z = m_0(X) + V, & &\mathbb{E}(V | X) = 0.
22 | 
23 | The target parameter of interest in this model is the local average treatment effect (LATE),
24 | 
25 | .. math::
26 | 
27 |     \theta_0 = \frac{\mathbb{E}[g_0(1, X)] - \mathbb{E}[g_0(0,X)]}{\mathbb{E}[r_0(1, X)] - \mathbb{E}[r_0(0,X)]}.


--------------------------------------------------------------------------------
/doc/guide/scores/plm/pliv_score.rst:
--------------------------------------------------------------------------------
 1 | For the PLIV model implemented in ``DoubleMLPLIV`` one can choose between
 2 | ``score='IV-type'`` and ``score='partialling out'``.
 3 | 
 4 | ``score='partialling out'`` implements the score function:
 5 | 
 6 | .. math::
 7 | 
 8 |     \psi(W; \theta, \eta) &:= [Y - \ell(X) - \theta (D - r(X))] [Z - m(X)]
 9 | 
10 |     &= - (D - r(X)) (Z - m(X)) \theta + (Y - \ell(X)) (Z - m(X))
11 | 
12 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
13 | 
14 | with :math:`\eta=(\ell, m, r)` and where the components of the linear score are
15 | 
16 | .. math::
17 | 
18 |     \psi_a(W; \eta) &=  - (D - r(X)) (Z - m(X)),
19 | 
20 |     \psi_b(W; \eta) &= (Y - \ell(X)) (Z - m(X)).
21 | 
22 | ``score='IV-type'`` implements the score function:
23 | 
24 | .. math::
25 | 
26 |     \psi(W; \theta, \eta) &:= [Y - D \theta - g(X)] [Z - m(X)]
27 | 
28 |     &= - D (Z - m(X)) \theta + (Y - g(X)) (Z - m(X))
29 | 
30 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
31 | 
32 | with :math:`\eta=(g,m)` and where the components of the linear score are
33 | 
34 | .. math::
35 | 
36 |     \psi_a(W; \eta) &=  - D (Z - m(X)),
37 | 
38 |     \psi_b(W; \eta) &= (Y - g(X)) (Z - m(X)).


--------------------------------------------------------------------------------
/doc/guide/learners/python/minimum_req.rst:
--------------------------------------------------------------------------------
 1 | The minimum requirement for a learner to be used for nuisance models in the :ref:`DoubleML <doubleml_package>`
 2 | package is
 3 | 
 4 | * The implementation of a ``fit()`` and ``predict()`` method.
 5 |   Some models, like :py:class:`doubleml.DoubleMLIRM` and :py:class:`doubleml.DoubleMLIIVM` require classifiers.
 6 | * In case of classifiers, the learner needs to come with a ``predict_proba()`` instead of, or in addition to, a
 7 |   ``predict()`` method, see for example :py:meth:`sklearn.ensemble.RandomForestClassifier.predict_proba`.
 8 | * In order to be able to use the ``set_ml_nuisance_params()`` method of :ref:`DoubleML <doubleml_package>` classes the
 9 |   learner additionally needs to come with a ``set_params()`` method,
10 |   see for example :py:meth:`sklearn.ensemble.RandomForestRegressor.set_params`.
11 | * We further rely on the function :py:func:`sklearn.base.clone` which adds the requirement of a ``get_params()``
12 |   method for a learner in order to be used for nuisance models of :ref:`DoubleML <doubleml_package>` model classes.
13 | 
14 | Most learners from `scikit-learn <https://scikit-learn.org/>`_ satisfy all these minimum requirements.
15 | 


--------------------------------------------------------------------------------
/doc/guide/data_backend.rst:
--------------------------------------------------------------------------------
 1 | .. _data_backend:
 2 | 
 3 | Data Backend
 4 | ------------
 5 | 
 6 | :ref:`DoubleML <doubleml_package>` provides a unified data interface via the :mod:`doubleml.data` module.
 7 | It supports both :py:class:`pandas.DataFrame` objects and :py:class:`numpy.ndarray` arrays and now allows
 8 | clustered data to be handled directly via :class:`~doubleml.data.DoubleMLData`.
 9 | 
10 | .. _dml_data:
11 | 
12 | DoubleMLData
13 | ~~~~~~~~~~~~
14 | 
15 | .. include:: data/base_data.rst
16 | 
17 | 
18 | .. _dml_data_types:
19 | 
20 | Special Data Types
21 | ~~~~~~~~~~~~~~~~~~
22 | 
23 | The :ref:`DoubleMLData <dml_data>` class is extended by the following classes to support special data
24 | types or allow for additional parameters.
25 | 
26 | .. _dml_did_data:
27 | 
28 | DoubleMLDIDData
29 | ^^^^^^^^^^^^^^^
30 | 
31 | .. include:: data/did_data.rst
32 | 
33 | 
34 | .. _dml_panel_data:
35 | 
36 | DoubleMLPanelData
37 | ^^^^^^^^^^^^^^^^^
38 | 
39 | .. include:: data/panel_data.rst
40 | 
41 | 
42 | .. _dml_rdd_data:
43 | 
44 | DoubleMLRDDData
45 | ^^^^^^^^^^^^^^^
46 | 
47 | .. include:: data/rdd_data.rst
48 | 
49 | 
50 | .. _dml_ssm_data:
51 | 
52 | DoubleMLSSMData
53 | ^^^^^^^^^^^^^^^
54 | 
55 | .. include:: data/ssm_data.rst
56 | 
57 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/apo_score.rst:
--------------------------------------------------------------------------------
 1 | For the average potential outcomes (APO) models implemented in ``DoubleMLAPO`` and ``DoubleMLAPOS``
 2 | the ``score='APO'`` is implemented. Furthermore, weights :math:`\omega(Y,D,X)` and
 3 | 
 4 | .. math::
 5 | 
 6 |     \bar{\omega}(X) = \mathbb{E}[\omega(Y,D,X)|X]
 7 | 
 8 | can be specified. For a given treatment level :math:`d` the general score function takes the form 
 9 | 
10 | .. math::
11 | 
12 |     \psi(W; \theta, \eta) :=\; &\omega(Y,D,X) \cdot g(d,X) + \bar{\omega}(X)\cdot \frac{1\lbrace D = d\rbrace }{m(X)}(Y - g(d,X)) - \theta
13 | 
14 |     =& \psi_a(W; \eta) \theta + \psi_b(W; \eta)
15 | 
16 | with :math:`\eta=(g,m)`, where the true nuisance elements are 
17 | 
18 | .. math::
19 | 
20 |     g_0(D, X) &= \mathbb{E}[Y | D, X],
21 | 
22 |     m_{0,d}(X) &= \mathbb{E}[1\lbrace D = d\rbrace | X] = P(D=d|X).
23 | 
24 | The components of the linear score are
25 | 
26 | .. math::
27 | 
28 |     \psi_a(W; \eta) =&  - 1,
29 | 
30 |     \psi_b(W; \eta) =\; &\omega(Y,D,X) \cdot g(d,X) + \bar{\omega}(X)\cdot \frac{1\lbrace D = d\rbrace }{m(X)}(Y - g(d,X)).
31 | 
32 | 
33 | If no weights are specified, the weights are set to
34 | 
35 | .. math::
36 | 
37 |     \omega(Y,D,X) &= 1
38 | 
39 |     \bar{\omega}(X) &= 1.
40 | 


--------------------------------------------------------------------------------
/doc/guide/learners/python/learners_overview.inc:
--------------------------------------------------------------------------------
 1 | .. _py_learner_req:
 2 | 
 3 | Minimum requirements for learners
 4 | #################################
 5 | 
 6 | .. include:: /guide/learners/python/minimum_req.rst
 7 | 
 8 | 
 9 | .. _py_set_params:
10 | 
11 | Specifying learners and set hyperparameters
12 | ###########################################
13 | 
14 | .. include:: /guide/learners/python/set_hyperparams.rst
15 | 
16 | .. _py_tune_params:
17 | 
18 | Hyperparameter tuning
19 | #####################
20 | 
21 | .. include:: /guide/learners/python/tune_hyperparams.rst
22 | 
23 | 
24 | Hyperparameter tuning (Grid Search)
25 | ###################################
26 | 
27 | .. warning::
28 |    **Deprecated:** The ``tune()`` method is deprecated and be removed in a future version.
29 |    Please use ``tune_ml_models()`` for hyperparameter tuning, see :ref:`Hyperparameter tuning <py_tune_params>`.
30 | 
31 | .. include:: /guide/learners/python/tune_hyperparams_old.rst
32 | 
33 | .. _py_eval_learners:
34 | 
35 | Evaluate learners
36 | #################
37 | 
38 | .. include:: /guide/learners/python/evaluate_learners.rst
39 | 
40 | 
41 | .. _py_ext_pred:
42 | 
43 | Advanced: External Predictions
44 | ##############################
45 | 
46 | 
47 | .. include:: /guide/learners/python/external_preds.rst


--------------------------------------------------------------------------------
/doc/guide/models/ssm/ssm.rst:
--------------------------------------------------------------------------------
 1 | **Sample Selection Models (SSM)** implemented in the package focus on the the binary treatment case when outcomes are only observed for a subpopulation
 2 | due to sample selection or outcome attrition.
 3 | 
 4 | The implementation and notation is based on `Bia, Huber and Lafférs (2023) <https://doi.org/10.1080/07350015.2023.2271071>`_.
 5 | Let :math:`D_i` be the binary treatment indicator and :math:`Y_{i}(d)` the potential outcome under treatment value :math:`d`. Further, define
 6 | :math:`Y_{i}:=Y_{i}(D)` to be the realized outcome and :math:`S_{i}` as a binary selection indicator. The outcome :math:`Y_{i}` is only observed if :math:`S_{i}=1`.
 7 | Finally, let :math:`X_i` be a vector of observed covariates, measures prior to treatment assignment.
 8 | 
 9 | Target parameter of interest is the average treatment effect (ATE)
10 | 
11 | .. math::
12 | 
13 |     \theta_0 = \mathbb{E}[Y_{i}(1)- Y_{i}(0)].
14 | 
15 | The corresponding identifying assumption is
16 | 
17 | - **Cond. Independence of Treatment:** :math:`Y_i(d) \perp D_i|X_i\quad a.s.` for :math:`d=0,1`
18 | 
19 | where further assmputions are made in the context of the respective sample selection model.
20 | 
21 | .. note::
22 |     A more detailed example can be found in the :ref:`Example Gallery <examplegallery>`.


--------------------------------------------------------------------------------
/doc/guide/data/rdd_data.rst:
--------------------------------------------------------------------------------
 1 | The ``DoubleMLRDDData`` class specialises :ref:`DoubleMLData <dml_data>` for
 2 | regression discontinuity designs. In addition to the standard causal roles it
 3 | tracks a mandatory running variable.
 4 | 
 5 | Key arguments
 6 | """""""""""""
 7 | 
 8 | * ``score_col``: column with the running/score variable.
 9 | * ``cluster_cols``: optional cluster identifiers inherited from the base data
10 |   class.
11 | * ``from_arrays``: expects an additional ``score`` array alongside ``x``, ``y``
12 |   and ``d``.
13 | 
14 | ``DoubleMLRDDData`` ensures that the running variable is kept separate from the
15 | other feature sets and exposes the ``score`` property for convenient access.
16 | 
17 | Example usage
18 | """""""""""""
19 | 
20 | .. tab-set::
21 | 
22 |     .. tab-item:: Python
23 |         :sync: py
24 | 
25 |         .. ipython:: python
26 | 
27 |           import doubleml as dml
28 |           from doubleml.rdd.datasets import make_simple_rdd_data
29 | 
30 |           dict_rdd = make_simple_rdd_data(n_obs=500, return_type="DataFrame")
31 |           dml_data = dml.DoubleMLRDDData.from_arrays(
32 |             x=dict_rdd["X"], 
33 |             y=dict_rdd["Y"], 
34 |             d=dict_rdd["D"], 
35 |             score=dict_rdd["score"]
36 |             )
37 | 
38 |           print(dml_data)
39 | 
40 | 


--------------------------------------------------------------------------------
/doc/api/dml_models.rst:
--------------------------------------------------------------------------------
 1 | .. _api_dml_models:
 2 | 
 3 | DoubleML Models
 4 | ------------------------------
 5 | 
 6 | 
 7 | .. _api_plm_models:
 8 | 
 9 | doubleml.plm
10 | ~~~~~~~~~~~~~~~
11 | 
12 | .. currentmodule:: doubleml.plm
13 | 
14 | .. autosummary::
15 |     :toctree: generated/
16 |     :template: class.rst
17 | 
18 |     DoubleMLPLR
19 |     DoubleMLLPLR
20 |     DoubleMLPLIV
21 | 
22 | 
23 | .. _api_irm_models:
24 | 
25 | doubleml.irm
26 | ~~~~~~~~~~~~~~~
27 | 
28 | .. currentmodule:: doubleml.irm
29 | 
30 | .. autosummary::
31 |     :toctree: generated/
32 |     :template: class.rst
33 | 
34 |     DoubleMLIRM
35 |     DoubleMLAPO
36 |     DoubleMLAPOS
37 |     DoubleMLIIVM
38 |     DoubleMLPQ
39 |     DoubleMLLPQ
40 |     DoubleMLCVAR
41 |     DoubleMLQTE
42 |     DoubleMLSSM
43 | 
44 | 
45 | .. _api_did_models:
46 | 
47 | doubleml.did
48 | ~~~~~~~~~~~~~~~
49 | 
50 | .. currentmodule:: doubleml.did
51 | 
52 | .. autosummary::
53 |     :toctree: generated/
54 |     :template: class.rst
55 | 
56 |     DoubleMLDIDMulti
57 |     DoubleMLDIDAggregation
58 |     DoubleMLDIDBinary
59 |     DoubleMLDID
60 |     DoubleMLDIDCS
61 | 
62 | 
63 | .. _api_rdd_models:
64 | 
65 | doubleml.rdd
66 | ~~~~~~~~~~~~~
67 | 
68 | .. currentmodule:: doubleml.rdd
69 | 
70 | .. autosummary::
71 |     :toctree: generated/
72 |     :template: class.rst
73 | 
74 |     RDFlex


--------------------------------------------------------------------------------
/doc/guide/data/ssm_data.rst:
--------------------------------------------------------------------------------
 1 | The ``DoubleMLSSMData`` class covers the sample selection model backend. 
 2 | It extends :ref:`DoubleMLData <dml_data>` with a dedicated selection indicator and inherits support for clustered data.
 3 | 
 4 | Key arguments
 5 | """""""""""""
 6 | 
 7 | * ``s_col``: column containing the selection indicator.
 8 | * ``cluster_cols``: optional cluster identifiers.
 9 | * ``from_arrays``: expects an additional ``s`` array together with ``x``, ``y`` and ``d``.
10 | 
11 | The object exposes the ``s`` property and keeps the selection indicator
12 | separate from covariates and treatment variables.
13 | 
14 | Example usage
15 | """""""""""""
16 | 
17 | .. tab-set::
18 | 
19 |     .. tab-item:: Python
20 |         :sync: py
21 | 
22 |         .. ipython:: python
23 | 
24 |             import doubleml as dml
25 |             from doubleml.irm.datasets import make_ssm_data
26 | 
27 |             df = make_ssm_data(n_obs=500, return_type="DataFrame")
28 |             dml_data = dml.DoubleMLSSMData(
29 |                 df,
30 |                 y_col="y",
31 |                 d_cols="d",
32 |                 s_col="s"
33 |             )
34 | 
35 |             x, y, d, _, s = make_ssm_data(n_obs=200, return_type="array")
36 |             dml_data_arrays = dml.DoubleMLSSMData.from_arrays(x, y, d, s=s)
37 |             print(dml_data)
38 | 
39 | 


--------------------------------------------------------------------------------
/doc/guide/scores/irm/lpq_score.rst:
--------------------------------------------------------------------------------
 1 | For ``DoubleMLLPQ`` the only valid option is ``score='LPQ'``. For ``treatment=d`` with :math:`d\in\{0,1\}`, instrument :math:`Z` and
 2 | a quantile :math:`\tau\in (0,1)` this implements the nonlinear score function:
 3 | 
 4 | .. math::
 5 | 
 6 |     \psi(W; \theta, \eta) :=& \Big(g_{d, Z=1}(X, \tilde{\theta}) - g_{d, Z=0}(X, \tilde{\theta}) + \frac{Z}{m(X)}(1\{D=d\} \cdot 1\{Y\le \theta\} - g_{d, Z=1}(X, \tilde{\theta}))
 7 | 
 8 |     &\quad - \frac{1-Z}{1-m(X)}(1\{D=d\} \cdot 1\{Y\le \theta\} - g_{d, Z=0}(X, \tilde{\theta}))\Big) \cdot \frac{2d -1}{\gamma} - \tau
 9 | 
10 | 
11 | where :math:`\eta=(g_{d,Z=1}, g_{d,Z=0}, m, \gamma)` with true values
12 | 
13 | .. math::
14 | 
15 |     g_{d,Z=z,0}(X, \theta_0) &= \mathbb{E}[1\{D=d\} \cdot 1\{Y\le \theta_0\}|X, Z=z],\quad z\in\{0,1\}
16 | 
17 |     m_{Z=z,0}(X) &= P(D=d|X, Z=z),\quad z\in\{0,1\}
18 | 
19 |     m_0(X) &= P(Z=1|X)
20 | 
21 |     \gamma_0 &= \mathbb{E}[P(D=d|X, Z=1) - P(D=d|X, Z=0)].
22 | 
23 | Further, the compliance probability :math:`\gamma_0` is estimated with the two additional nuisance components 
24 | 
25 | .. math::
26 | 
27 |     m_{Z=z,0}(X) = P(D=d|X, Z=z),\quad z\in\{0,1\}.
28 | 
29 | Remark that :math:`g_{d,Z=z,0}(X, \theta_0)` depends on the target parameter :math:`\theta_0`, such that
30 | the score is estimated with a preliminary estimate :math:`\tilde{\theta}`. For further details, see `Kallus et al. (2019) <https://arxiv.org/abs/1912.12945>`_.
31 | 


--------------------------------------------------------------------------------
/doc/guide/data/panel_data.rst:
--------------------------------------------------------------------------------
 1 | The ``DoubleMLPanelData`` class serves as data-backend for :ref:`DiD models <did-models>` and can be initialized from a dataframe.
 2 | The class is a subclass of :ref:`DoubleMLData <dml_data>` and inherits all methods and attributes.
 3 | Furthermore, it provides additional methods and attributes to handle panel data.
 4 | 
 5 | Key arguments
 6 | """""""""""""
 7 | 
 8 | * ``id_col``: column to with unique identifiers for each unit
 9 | * ``t_col``: column to specify the time periods of the observation
10 | * ``datetime_unit``: unit of the time periods (e.g. 'Y', 'M', 'D', 'h', 'm', 's')
11 | 
12 | .. note::
13 |     The ``t_col`` can contain ``float``, ``int`` or ``datetime`` values.
14 | 
15 | Example usage
16 | """""""""""""
17 | 
18 | .. tab-set::
19 | 
20 |     .. tab-item:: Python
21 |         :sync: py
22 | 
23 |         .. ipython:: python
24 | 
25 |             import numpy as np
26 |             import doubleml as dml
27 |             from doubleml.did.datasets import make_did_CS2021
28 | 
29 |             np.random.seed(42)
30 |             df = make_did_CS2021(n_obs=500) 
31 |             dml_data = dml.data.DoubleMLPanelData(
32 |                 df,
33 |                 y_col="y",
34 |                 d_cols="d",
35 |                 id_col="id",
36 |                 t_col="t",
37 |                 x_cols=["Z1", "Z2", "Z3", "Z4"],
38 |                 datetime_unit="M"
39 |             )
40 | 
41 |             print(dml_data)
42 | 


--------------------------------------------------------------------------------
/doc/guide/scores/plm/plr_score.rst:
--------------------------------------------------------------------------------
 1 | For the PLR model implemented in ``DoubleMLPLR`` one can choose between
 2 | ``score='partialling out'`` and ``score='IV-type'``.
 3 | 
 4 | ``score='partialling out'`` implements the score function:
 5 | 
 6 | .. math::
 7 | 
 8 |     \psi(W; \theta, \eta) &:= [Y - \ell(X) - \theta (D - m(X))] [D - m(X)]
 9 | 
10 |     &= - (D - m(X)) (D - m(X)) \theta + (Y - \ell(X)) (D - m(X))
11 | 
12 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
13 | 
14 | with :math:`\eta=(\ell,m)`, where
15 | 
16 | .. math::
17 | 
18 |     \ell_0(X) &:= \mathbb{E}[Y \mid X] = \theta_0\mathbb{E}[D \mid X] + g(X),
19 | 
20 |     m_0(X) &:= \mathbb{E}[D \mid X].
21 | 
22 | The components of the linear score are
23 | 
24 | .. math::
25 | 
26 |     \psi_a(W; \eta) &=  - (D - m(X)) (D - m(X)),
27 | 
28 |     \psi_b(W; \eta) &= (Y - \ell(X)) (D - m(X)).
29 | 
30 | ``score='IV-type'`` implements the score function:
31 | 
32 | .. math::
33 | 
34 |     \psi(W; \theta, \eta) &:= [Y - D \theta - g(X)] [D - m(X)]
35 | 
36 |     &= - D (D - m(X)) \theta + (Y - g(X)) (D - m(X))
37 | 
38 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
39 | 
40 | with :math:`\eta=(g,m)`, where
41 | 
42 | .. math::
43 | 
44 |     g_0(X) &:= \mathbb{E}[Y - D \theta_0\mid X],
45 | 
46 |     m_0(X) &:= \mathbb{E}[D \mid X].
47 | 
48 | The components of the linear score are
49 | 
50 | .. math::
51 | 
52 |     \psi_a(W; \eta) &=  - D (D - m(X)),
53 | 
54 |     \psi_b(W; \eta) &= (Y - g(X)) (D - m(X)).


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/literature.yml:
--------------------------------------------------------------------------------
 1 | name: Double Machine Learning Literature Overview
 2 | description: Issues related to the double machine learning literature overview
 3 | title: "[ADD/EDIT] Reference in literature overview"
 4 | labels: ["literature"]
 5 | assignees:
 6 |   - FrederikBornemann
 7 | 
 8 | body:
 9 | - type: checkboxes
10 |   attributes:
11 |     label: Type of the issue
12 |     options:
13 |       - label: "add reference"
14 |         required: false
15 |       - label: "edit reference"
16 |         required: false
17 | - type: textarea
18 |   attributes:
19 |     label: Proposed change or addition
20 |     description: |
21 |       Propose a change or addition for the [double machine learning literature overview](https://docs.doubleml.org/stable/literature/literature.html).
22 |       In case of a change request, please also copy the existing entry below here.
23 |   validations:
24 |     required: true
25 | - type: checkboxes
26 |   attributes:
27 |     label: Checklist
28 |     description: |
29 |       Your reference should contain the [Core Components of an APA Reference](https://www.mendeley.com/guides/apa-citation-guide/).
30 |       Don't worry about formatting!
31 |     options:
32 |       - label: "Author(s)"
33 |         required: false
34 |       - label: "Date"
35 |         required: false
36 |       - label: "Title"
37 |         required: false
38 |       - label: "Publisher"
39 |         required: false
40 |       - label: "URL"
41 |         required: false
42 | 


--------------------------------------------------------------------------------
/doc/guide/scores/plm/lplr_score.rst:
--------------------------------------------------------------------------------
 1 | For the LPLR model implemented in ``DoubleMLLPLR`` one can choose between
 2 | ``score='nuisance_space'`` and ``score='instrument'``.
 3 | 
 4 | ``score='nuisance_space'`` implements the score function:
 5 | 
 6 | .. math::
 7 | 
 8 |     \psi(W, \beta, \eta) := \psi(X) \{Y e^{\beta D} -(1-Y)e^{r_0(X)} \} \{ D - m_0(X)\}
 9 | 
10 | with nuisance elements :math:`\eta = { r(\cdot), m(\cdot), \psi(\cdot) }`, where
11 | 
12 | .. math::
13 | 
14 |     r_0(X) = t_0(X) - \breve \beta a_0(X),
15 | 
16 |     m_0(X) = \mathbb{E} [D | X, Y=0],
17 | 
18 |     \psi(X) = \text{expit} (-r_0(X)).
19 | 
20 | For the estimation of :math:`r_0(X)`, we further need to obtain a preliminary estimate :math:`\breve \beta` and
21 | :math:`M (D, X) = \mathbb{P} [Y=1 | D, X]` as described in `Liu et al. (2021) <https://doi.org/10.1093/ectj/utab019>`_
22 | and the following estimates:
23 | 
24 | .. math::
25 | 
26 |     t_0(X) = \mathbb{E} [\text{logit}(M (D, X)) | X],
27 | 
28 |     a_0(X) = \mathbb{E} [D | X].
29 | 
30 | 
31 | 
32 | ``score='instrument'`` implements the score function:
33 | 
34 | .. math::
35 | 
36 |     \psi(W; \beta, \eta) :=  \mathbb E [ \{Y - \text{expit} (\beta_0 D + r_0(X )) \} Z_0 ]
37 | 
38 | 
39 | with :math:`Z_0=D-m(X)` and :math:`\eta = { r(\cdot), m(\cdot), \psi(\cdot) }`, where
40 | 
41 | .. math::
42 | 
43 |     r_0(X) = t_0(X) - \breve \beta a_0(X),
44 | 
45 |     m_0(X) = \mathbb{E} [D | X].
46 | 
47 | and :math:`r_0(X)` is computed as for ``score='nuisance_space'``.


--------------------------------------------------------------------------------
/doc/guide/sensitivity/did/did_sensitivity.inc:
--------------------------------------------------------------------------------
 1 | The following difference-in-differences models implemented.
 2 | 
 3 | .. note::
 4 |     Remark that :ref:`sensitivity_benchmark` is only relevant for ``score='observational'``, since no effect of :math:`X` on treatment assignment is assumed.
 5 |     Generally, we recommend ``score='observational'``, if unobserved confounding seems plausible.
 6 | 
 7 | 
 8 | .. _sensitivity-did-pa:
 9 | 
10 | Difference-in-Differences for Panel Data
11 | ========================================
12 | 
13 | .. include:: /guide/sensitivity/did/did_pa_sensitivity.rst
14 | 
15 | 
16 | .. _sensitivity-did-cs:
17 | 
18 | Difference-in-Differences for repeated cross-sections
19 | =====================================================
20 | 
21 | .. include:: /guide/sensitivity/did/did_cs_sensitivity.rst
22 | 
23 | 
24 | .. _sensitivity-did-binary:
25 | 
26 | Two treatment periods
27 | ======================
28 | 
29 | 
30 | .. warning::
31 |     This documentation refers to the deprecated implementation for two time periods. 
32 |     This functionality will be removed in a future version. The generalized version are :ref:`sensitivity-did-pa` and :ref:`sensitivity-did-cs`.
33 | 
34 | 
35 | .. _sensitivity-did-pa-binary:
36 | 
37 | Panel Data
38 | """"""""""
39 | 
40 | .. include:: /guide/sensitivity/did/did_pa_binary_sensitivity.rst
41 | 
42 | 
43 | .. _sensitivity-did-cs-binary:
44 | 
45 | Repeated Cross-Sectional Data
46 | """""""""""""""""""""""""""""
47 | 
48 | .. include:: /guide/sensitivity/did/did_cs_binary_sensitivity.rst


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019-2023 Philipp Bach, Victor Chernozhukov, Sven Klaassen, Malte S. Kurz, Martin Spindler
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/doc/guide/scores/irm/irm_score.rst:
--------------------------------------------------------------------------------
 1 | For the IRM model implemented in ``DoubleMLIRM`` one can choose between
 2 | ``score='ATE'`` and ``score='ATTE'``. Furthermore, weights :math:`\omega(Y,D,X)` and 
 3 | 
 4 | .. math::
 5 | 
 6 |     \bar{\omega}(X) = \mathbb{E}[\omega(Y,D,X)|X]
 7 | 
 8 | can be specified. The general score function takes the form 
 9 | 
10 | .. math::
11 | 
12 |     \psi(W; \theta, \eta) :=\; &\omega(Y,D,X) \cdot (g(1,X) - g(0,X)) 
13 |     
14 |     & + \bar{\omega}(X)\cdot \bigg(\frac{D (Y - g(1,X))}{m(X)} - \frac{(1 - D)(Y - g(0,X))}{1 - m(X)}\bigg) - \theta
15 | 
16 |     =& \psi_a(W; \eta) \theta + \psi_b(W; \eta)
17 | 
18 | with :math:`\eta=(g,m)` and where the components of the linear score are
19 | 
20 | .. math::
21 | 
22 |     \psi_a(W; \eta) =&  - 1,
23 | 
24 |     \psi_b(W; \eta) =\; &\omega(Y,D,X) \cdot (g(1,X) - g(0,X))
25 |     
26 |     & + \bar{\omega}(X)\cdot \bigg(\frac{D (Y - g(1,X))}{m(X)} - \frac{(1 - D)(Y - g(0,X))}{1 - m(X)}\bigg).
27 | 
28 | If no weights are specified, ``score='ATE'`` sets the weights
29 | 
30 | .. math::
31 | 
32 |     \omega(Y,D,X) &= 1
33 | 
34 |     \bar{\omega}(X) &= 1
35 | 
36 | whereas ``score='ATTE'`` changes weights to:
37 | 
38 | .. math::
39 | 
40 |     \omega(Y,D,X) &= \frac{D}{\mathbb{E}_n[D]}
41 |     
42 |     \bar{\omega}(Y,D,X) &= \frac{m(X)}{\mathbb{E}_n[D]}.
43 | 
44 | This score is identical to the original presentation in Section 5.1. of Chernozhukov et al. (2018)
45 | 
46 | .. math::
47 | 
48 |     \psi_a(W; \eta) &= -\frac{D}{\mathbb{E}_n[D]}
49 |     
50 |     \psi_b(W; \eta) &= \frac{D(Y-g(0,X))}{\mathbb{E}_n[D]} - \frac{m(X)(1-D)(Y-g(0,X))}{\mathbb{E}_n[D](1-m(X))}.
51 | 
52 | For more details on other weight specifications, see :ref:`weighted_cates`.
53 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity.rst:
--------------------------------------------------------------------------------
 1 | .. _sensitivity:
 2 | 
 3 | Sensitivity analysis
 4 | ------------------------
 5 | 
 6 | The :ref:`DoubleML <doubleml_package>` package implements sensitivity analysis with respect to omitted variable bias
 7 | based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_.
 8 | 
 9 | .. _sensitivity_general:
10 | 
11 | General algorithm
12 | +++++++++++++++++
13 | 
14 | The section :ref:`sensitivity_theory` contains a general summary and the relevant definitions, whereas :ref:`sensitivity_implementation` considers
15 | the general part of the implementation.
16 | 
17 | .. _sensitivity_theory:
18 | 
19 | Theory
20 | ~~~~~~
21 | 
22 | .. include:: ./sensitivity/theory.rst
23 | 
24 | .. _sensitivity_implementation:
25 | 
26 | Implementation
27 | ~~~~~~~~~~~~~~
28 | 
29 | .. include:: ./sensitivity/implementation.rst
30 | 
31 | .. _sensitivity_benchmark:
32 | 
33 | Benchmarking
34 | ~~~~~~~~~~~~
35 | 
36 | .. include:: ./sensitivity/benchmarking.rst
37 | 
38 | .. _sensitivity_models:
39 | 
40 | Model-specific implementations
41 | +++++++++++++++++++++++++++++++++++
42 | 
43 | This section contains the implementation details for each specific model and model specific interpretations.
44 | 
45 | .. _plm-sensitivity:
46 | 
47 | Partially linear models (PLM)
48 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
49 | 
50 | .. include:: sensitivity/plm/plm_sensitivity.inc
51 | 
52 | 
53 | .. _irm-sensitivity:
54 | 
55 | Interactive regression models (IRM)
56 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57 | 
58 | .. include:: sensitivity/irm/irm_sensitivity.inc
59 | 
60 | 
61 | .. _did-sensitivity:
62 | 
63 | Difference-in-Differences Models
64 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
65 | 
66 | .. include:: sensitivity/did/did_sensitivity.inc
67 | 


--------------------------------------------------------------------------------
/doc/guide/data/did_data.rst:
--------------------------------------------------------------------------------
 1 | The ``DoubleMLDIDData`` class tailors :ref:`DoubleMLData <dml_data>` to difference-in-differences
 2 | applications. It handles both panel settings and repeated cross-sections by tracking an optional time indicator.
 3 | 
 4 | Key arguments
 5 | """""""""""""
 6 | 
 7 | * ``t_col``: column containing the time variable for repeated cross-sections. It
 8 |   must be unique from ``y_col``, ``d_cols``, ``x_cols``, ``z_cols`` and
 9 |   ``cluster_cols``.
10 | * ``cluster_cols``: optional cluster identifiers inherited from
11 |   :class:`doubleml.DoubleMLData`.
12 | * ``force_all_d_finite``: controls how missing or infinite treatment values are
13 |   handled. For standard DiD applications it defaults to ``True``.
14 | 
15 | ``DoubleMLDIDData`` exposes additional helpers such as the ``t`` property and
16 | an extended ``from_arrays`` constructor that accepts the ``t`` array (and
17 | ``cluster_vars``) alongside the standard covariates.
18 | 
19 | Example usage
20 | """""""""""""
21 | 
22 | .. tab-set::
23 | 
24 |     .. tab-item:: Python
25 |         :sync: py
26 | 
27 |         .. ipython:: python
28 |           :okwarning:
29 |           
30 |             import doubleml as dml
31 |             from doubleml.did.datasets import make_did_SZ2020
32 | 
33 |             df = make_did_SZ2020(n_obs=500, return_type="DataFrame")
34 |             print(df.head())
35 |             dml_data = dml.DoubleMLDIDData(
36 |                 df,
37 |                 y_col="y",
38 |                 d_cols="d",
39 |             )
40 | 
41 |             # from arrays
42 |             x, y, d, t = make_did_SZ2020(n_obs=200, return_type="array")
43 |             dml_data_arrays = dml.DoubleMLDIDData.from_arrays(x, y, d)
44 |             print(dml_data)
45 | 
46 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/did/did_cs_binary_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | In the :ref:`did-cs-model` with ``score='observational'`` and ``in_sample_normalization=True`` the score function implies the following representations
 2 | 
 3 | .. math::
 4 | 
 5 |     m(W,g) &= \Big(\big(g(1,1,X) - g(1,0,X)\big) - \big(g(0,1,X) - g(0,0,X)\big)\Big) \frac{D}{\mathbb{E}[D]}
 6 | 
 7 |     \alpha(W) &= \frac{DT}{\mathbb{E}[DT]} - \frac{D(1-T)}{\mathbb{E}[D(1-T)]}
 8 | 
 9 |     &\quad - \frac{m(X)(1-D)T}{1-m(X)}\mathbb{E}\left[\frac{m(X)(1-D)T}{1-m(X)}\right]^{-1}
10 | 
11 |     &\quad + \frac{m(X)(1-D)(1-T)}{1-m(X)}\mathbb{E}\left[\frac{m(X)(1-D)(1-T)}{1-m(X)}\right]^{-1}.
12 | 
13 | If instead ``in_sample_normalization=False``, the Riesz representer (after simplifications) changes to 
14 | 
15 | .. math::
16 | 
17 |     \alpha(W) = \left(\frac{T}{\mathbb{E}[D]\mathbb{E}[T]} + \frac{1-T}{\mathbb{E}[D](1-\mathbb{E}[T])}\right)\left(D - (1-D)\frac{m(X)}{1-m(X)}\right).
18 | 
19 | For ``score='experimental'`` and ``in_sample_normalization=True`` implies the score function implies the following representations
20 | 
21 | .. math::
22 | 
23 |     m(W,g) &= \big(g(1,1,X) - g(1,0,X)\big) - \big(g(0,1,X) - g(0,0,X)\big)
24 | 
25 |     \alpha(W) &= \frac{DT}{\mathbb{E}[DT]} - \frac{D(1-T)}{\mathbb{E}[D(1-T)]} - \frac{(1-D)T}{\mathbb{E}[(1-D)T]} + \frac{(1-D)(1-T)}{\mathbb{E}[(1-D)(1-T)]}.
26 | 
27 | And again, if instead ``in_sample_normalization=False``, the Riesz representer (after simplifications) changes to 
28 | 
29 | .. math::
30 | 
31 |     \alpha(W) = \frac{DT}{\mathbb{E}[D]\mathbb{E}[T]} - \frac{D(1-T)}{\mathbb{E}[D](1-\mathbb{E}[T])} - \frac{(1-D)T}{(1-\mathbb{E}[D])\mathbb{E}[T]} + \frac{(1-D)(1-T)}{(1-\mathbb{E}[D])(1-\mathbb{E}[T])}.
32 | 
33 | 
34 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`.


--------------------------------------------------------------------------------
/doc/guide/sensitivity/did/did_pa_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | For a detailed description of the scores and nuisance elements, see :ref:`did-pa-score`.
 2 | 
 3 | In the :ref:`did-pa-model` with ``score='observational'`` and ``in_sample_normalization=True`` the score function implies the following representations
 4 | 
 5 | .. math::
 6 | 
 7 |     m(W,g) &= \big(g(1,X) - g(0,X)\big)\cdot \frac{G^{\mathrm{g}}}{\mathbb{E}[G^{\mathrm{g}}]}\cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
 8 | 
 9 |     \alpha(W) &= \left(\frac{G^{\mathrm{g}}}{\mathbb{E}[G^{\mathrm{g}}]} - \frac{\frac{m(X)(1-G^{\mathrm{g}})}{1-m(X)}}{\mathbb{E}\left[\frac{m(X)(1-G^{\mathrm{g}})}{1-m(X)}\right]}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
10 | 
11 | If instead ``in_sample_normalization=False``, the Riesz representer changes to 
12 | 
13 | .. math::
14 | 
15 |     \alpha(W) = \left(\frac{G^{\mathrm{g}}}{\mathbb{E}[G^{\mathrm{g}}]} - \frac{m(X)(1-G^{\mathrm{g}})}{\mathbb{E}[G^{\mathrm{g}}](1-m(X))}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
16 | 
17 | For ``score='experimental'`` implies the score function implies the following representations
18 | 
19 | .. math::
20 | 
21 |     m(W,g) &= \big(g(1,X) - g(0,X)\big)\cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
22 | 
23 |     \alpha(W) &= \left(\frac{G^{\mathrm{g}}}{\mathbb{E}[G^{\mathrm{g}}]} - \frac{1-G^{\mathrm{g}}}{1-\mathbb{E}[G^{\mathrm{g}}]}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
24 | 
25 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`, but the scores :math:`\psi_{\sigma^2}` and :math:`\psi_{\nu^2}` are scaled according to the sample size of the subset, i.e. with scaling factor :math:`c=\frac{n_{\text{ids}}}{n_{\text{subset}}}`.
26 | 
27 | .. note::
28 |     Remark that the elements are only non-zero for units in the corresponding treatment group :math:`\mathrm{g}` and control group :math:`C^{(\cdot)}`, as :math:`1-G^{\mathrm{g}}=C^{(\cdot)}` if :math:`\max(G^{\mathrm{g}}, C^{(\cdot)})=1`.
29 | 


--------------------------------------------------------------------------------
/doc/guide/learners/r/minimum_req.rst:
--------------------------------------------------------------------------------
 1 | The minimum requirement for a learner to be used for nuisance models in the :ref:`DoubleML <doubleml_package>` package is
 2 | 
 3 | * The implementation as a learner for regression or classification in the `mlr3 <https://mlr3.mlr-org.com/>`_ package
 4 |   or its extension packages `mlr3learners <https://mlr3learners.mlr-org.com/>`_ and
 5 |   `mlr3extralearners <https://mlr3extralearners.mlr-org.com/>`_ . A guide on how to add a learner is provided in the
 6 |   `chapter on extending learners in the mlr3 book <https://mlr3book.mlr-org.com/chapters/chapter10/advanced_technical_aspects_of_mlr3.html#sec-extending>`_ .
 7 | * The `mlr3 <https://mlr3.mlr-org.com/>`_ package makes sure that the learners satisfy some core functionalities.
 8 |   To specify a specific learner in :ref:`DoubleML <doubleml_package>` users can pass objects of the class
 9 |   `Learner <https://mlr3.mlr-org.com/reference/Learner.html>`_. A fast way to construct these objects is to use the
10 |   `mlr3 <https://mlr3.mlr-org.com/>`_  function `lrn() <https://mlr3.mlr-org.com/reference/mlr_sugar.html>`_.
11 |   An introduction to learners in `mlr3 <https://mlr3.mlr-org.com/>`_  is provided in the `chapter on learners of the mlr3 book <https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners>`_.
12 | * It is also possible to pass learners that have been constructed from a pipeline with the `mlr3pipelines <https://mlr3pipelines.mlr-org.com/>`_
13 |   package.
14 | * The models `DoubleML::DoubleMLIRM <https://docs.doubleml.org/r/stable/reference/DoubleMLIRM.html>`_ and
15 |   `DoubleML::DoubleMLIIVM <https://docs.doubleml.org/r/stable/reference/DoubleMLIIVM.html>`_ require classifiers.
16 |   Users can also specify classifiers in the `DoubleML::DoubleMLPLR <https://docs.doubleml.org/r/stable/reference/DoubleMLPLR.html>`_
17 |   in cases with binary treatment variables.
18 | * Hyperparameters of learners can either be set at instantiation in `mlr3 <https://mlr3.mlr-org.com/>`_ or after
19 |   instantiation using the ``set_ml_nuisance_params()`` method.
20 | 
21 | 
22 | An interactive list of provided learners in the `mlr3 <https://mlr3.mlr-org.com/>`_ and extension packages can be found on the
23 | `website of the mlr3extralearners package <https://mlr3extralearners.mlr-org.com/articles/learners/list_learners.html>`_.
24 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "DoubleML Documentation Development",
 3 | 	"image": "svenklaassen/doubleml-docs:latest",
 4 | 	// "dockerFile": "Dockerfile.dev",
 5 | 	// "context": "..",
 6 | 	"workspaceFolder": "/workspace", // Folder inside the container for your project
 7 | 	// Customizations for VS Code
 8 | 	"customizations": {
 9 | 		"vscode": {
10 | 			"extensions": [
11 | 				"ms-python.python", // Python extension for VS Code
12 | 				"ms-azuretools.vscode-docker", // Docker integration for VS Code
13 | 				"njpwerner.autodocstring", // Optional: Auto-generate docstrings
14 | 				"ms-python.black-formatter", // Optional: Black formatter
15 | 				"streetsidesoftware.code-spell-checker", // Optional: Spell checker
16 | 				"github.copilot", // Add GitHub Copilot extension
17 | 				"GitHub.github-vscode-theme", // GitHub theme
18 | 				"github.vscode-github-actions", // GitHub Actions extension
19 | 				"ms-toolsai.jupyter", // Jupyter extension
20 | 				"charliermarsh.ruff" // Ruff extension
21 | 			],
22 | 			"settings": {
23 | 				"python.defaultInterpreterPath": "/home/ubuntu/.venv/bin/python",
24 | 				"editor.formatOnSave": true, // Auto-format code when saving
25 | 				"editor.codeActionsOnSave": {
26 | 					"source.organizeImports": true // Auto-organize imports on save
27 | 				},
28 | 				"python.linting.enabled": true, // Enable linting
29 | 				"python.linting.flake8Enabled": false, // Disable Flake8 for linting
30 | 				"python.linting.ruffEnabled": true, // Enable Ruff for linting
31 | 				"python.formatting.provider": "none",
32 | 				"[python]": {
33 | 					"editor.defaultFormatter": "ms-python.black-formatter",
34 | 					"editor.formatOnSave": true
35 | 				},
36 | 				"python.testing.pytestEnabled": true, // Enable Pytest for testing
37 | 				"python.testing.pytestArgs": [],
38 | 				"python.testing.unittestEnabled": false,
39 | 				"files.exclude": {
40 | 					"**/__pycache__": true, // Hide __pycache__ directories
41 | 					"**/*.pyc": true, // Hide .pyc files
42 | 					"**/.DS_Store": true // Hide .DS_Store files (macOS)
43 | 				}
44 | 			}
45 | 		}
46 | 	},
47 | 	"mounts": [
48 | 		"source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached" // Mount your local workspace into the container
49 | 	],
50 | 	"remoteUser": "ubuntu",
51 | 	"postCreateCommand": "id && ls -la /workspace && echo 'Container is ready!'"
52 | }


--------------------------------------------------------------------------------
/doc/guide/learners/python/evaluate_learners.rst:
--------------------------------------------------------------------------------
 1 | To compare different learners it is possible to evaluate the out-of-sample performance of each learner. The ``summary``
 2 | already displays either the root-mean-squared error (for regressions) or log-loss (for classifications) for each learner
 3 | and each corresponding repetition of cross-fitting (``n_rep`` argument).
 4 | 
 5 | To illustrate the parameter tuning, we work with the following example.
 6 | 
 7 | .. tab-set::
 8 | 
 9 |   .. tab-item:: Python
10 |     :sync: py
11 | 
12 |     .. ipython:: python
13 | 
14 |         import doubleml as dml
15 |         from doubleml.plm.datasets import make_plr_CCDDHNR2018
16 |         from sklearn.ensemble import RandomForestRegressor
17 | 
18 |         np.random.seed(1234)
19 |         ml_l = RandomForestRegressor()
20 |         ml_m = RandomForestRegressor()
21 |         data = make_plr_CCDDHNR2018(alpha=0.5, return_type='DataFrame')
22 |         obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
23 |         dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)
24 |         dml_plr_obj.fit()
25 |         print(dml_plr_obj)
26 | 
27 | The loss of each learner are also stored in the ``nuisance_loss`` attribute.
28 | Further, the ``evaluate_learners()`` method allows to evalute customized evaluation metrics as e.g. the mean absolute error. 
29 | The default option is still the root-mean-squared error for evaluation.
30 | 
31 | .. tab-set::
32 | 
33 |   .. tab-item:: Python
34 |     :sync: py
35 | 
36 |     .. ipython:: python
37 | 
38 |         print(dml_plr_obj.nuisance_loss)
39 |         print(dml_plr_obj.evaluate_learners())
40 | 
41 | To evaluate a customized metric one has to define a ``callable``. For some models (e.g. the IRM model) it is important that
42 | the metric can handle ``nan`` values as not all target values are known.   
43 | 
44 | .. tab-set::
45 | 
46 |   .. tab-item:: Python
47 |     :sync: py
48 | 
49 |     .. ipython:: python
50 | 
51 |         from sklearn.metrics import mean_absolute_error
52 | 
53 |         def mae(y_true, y_pred):
54 |             subset = np.logical_not(np.isnan(y_true))
55 |             return mean_absolute_error(y_true[subset], y_pred[subset])
56 | 
57 |         dml_plr_obj.evaluate_learners(learners=['ml_l'], metric=mae)
58 | 
59 | A more detailed notebook on the choice of learners is available in the :ref:`example gallery <examplegallery>`.


--------------------------------------------------------------------------------
/doc/guide/learners/python/tune_hyperparams.rst:
--------------------------------------------------------------------------------
 1 | Parameter tuning of learners for the nuisance functions of :ref:`DoubleML <doubleml_package>` models can be done via
 2 | the ``tune_ml_models()`` method.
 3 | To illustrate the parameter tuning, we generate data from a sparse partially linear regression model.
 4 | 
 5 | .. tab-set::
 6 | 
 7 |   .. tab-item:: Python
 8 |     :sync: py
 9 | 
10 |     .. ipython:: python
11 | 
12 |       import doubleml as dml
13 |       import numpy as np
14 | 
15 |       np.random.seed(3141)
16 |       n_obs = 200
17 |       n_vars = 200
18 |       theta = 3
19 |       X = np.random.normal(size=(n_obs, n_vars))
20 |       d = np.dot(X[:, :3], np.array([5, 5, 5])) + np.random.standard_normal(size=(n_obs,))
21 |       y = theta * d + np.dot(X[:, :3], np.array([5, 5, 5])) + np.random.standard_normal(size=(n_obs,))
22 |       dml_data = dml.DoubleMLData.from_arrays(X, y, d)
23 | 
24 | The hyperparameter-tuning is performed using `Optuna <https://optuna.org/#key_features>`_ as backend. Here, we illustrate
25 | the tuning via defining a search space for the nuisance function learners over ``100`` trials. The most important input
26 | argument is the hyperparameter space via a dictionary of functions. This search space will internally transformed into a
27 | suitable ``objective(trial)`` function for `Optuna <https://optuna.org/#key_features>`_.
28 | 
29 | .. tab-set::
30 | 
31 |   .. tab-item:: Python
32 |     :sync: py
33 | 
34 |     .. ipython:: python
35 |       :okwarning:
36 | 
37 |       import doubleml as dml
38 |       from sklearn.linear_model import Lasso
39 |       import optuna
40 | 
41 |       ml_l = Lasso()
42 |       ml_m = Lasso()
43 |       dml_plr_obj = dml.DoubleMLPLR(dml_data, ml_l, ml_m)
44 | 
45 |       def ml_l_params(trial):
46 |           return {'alpha': trial.suggest_float('alpha', 0.05, 1.0)}
47 | 
48 |       def ml_m_params(trial):
49 |           return {'alpha': trial.suggest_float('alpha', 0.05, 1.0)}
50 | 
51 |       param_space = {'ml_l': ml_l_params, 'ml_m': ml_m_params}
52 |       optuna_settings = {'n_trials': 100, 'verbosity': optuna.logging.WARNING}
53 | 
54 |       dml_plr_obj.tune_ml_models(ml_param_space=param_space, optuna_settings=optuna_settings)
55 |       
56 |       print(dml_plr_obj.params)
57 |       print(dml_plr_obj.fit().summary)
58 | 
59 | A more detailed description of hyperparameter-tuning possibilities can be found in the :ref:`Example Gallery <examplegallery>`.


--------------------------------------------------------------------------------
/doc/guide/sensitivity/irm/apo_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | In the :ref:`irm-model` the (weighted) average potential outcome for the treatment level :math:`d` can be written as
 2 | 
 3 | .. math::
 4 | 
 5 |     \theta_0 = \mathbb{E}[g_0(d,X)\omega(Y,D,X)]
 6 | 
 7 | where :math:`\omega(Y,D,X)` are weights (e.g. set to :math:`1` for the APO).
 8 | This implies the following representations
 9 | 
10 | .. math::
11 | 
12 |     m(W,g) &= g(d,X)\omega(Y,D,X)
13 | 
14 |     \alpha(W) &= \frac{1\lbrace D = d\rbrace }{m(X)}\cdot\mathbb{E}[\omega(Y,D,X)|X].
15 | 
16 | .. note::
17 | 
18 |     In the :ref:`irm-model` the form and interpretation of ``cf_y`` only depends on the conditional expectation :math:`\mathbb{E}[Y|D,X]`.
19 | 
20 |     - ``cf_y`` has the interpretation as the *nonparametric partial* :math:`R^2` *of* :math:`A` *with* :math:`Y` *given* :math:`(D,X)`
21 |     
22 |     .. math:: 
23 |         
24 |         \frac{\textrm{Var}(\mathbb{E}[Y|D,X,A]) - \textrm{Var}(\mathbb{E}[Y|D,X])}{\textrm{Var}(Y)-\textrm{Var}(\mathbb{E}[Y|D,X])}
25 |     
26 |     - ``cf_d`` takes the following form
27 |     
28 |     .. math:: 
29 |         
30 |         \frac{\mathbb{E}\left[\frac{1}{P(D=d|X,A)}\right] - \mathbb{E}\left[\frac{1}{P(D=d|X)}\right]}{\mathbb{E}\left[\frac{1}{P(D=d|X,A)}\right]}
31 | 
32 |     where the numerator measures the *average change in inverse propensity weights for* :math:`D=d` *conditional on* :math:`A` *in addition to* :math:`X`.
33 |     The denominator is the *average inverse propensity weights for* :math:`D=d` *conditional on* :math:`A` *and* :math:`X`. Consequently ``cf_d`` measures the *relative change in inverse propensity weights*.
34 |     Including weights changes only the definition of ``cf_d`` to 
35 | 
36 |     .. math::
37 | 
38 |         \frac{\mathbb{E}\left[\frac{1}{P(D=d|X,A)}\mathbb{E}[\omega(Y,D,X)|X,A]^2\right] - \mathbb{E}\left[\frac{1}{P(D=d|X)}\mathbb{E}[\omega(Y,D,X)|X]^2\right]}{\mathbb{E}\left[\frac{1}{P(D=d|X,A)}\mathbb{E}[\omega(Y,D,X)|X,A]^2\right]}
39 | 
40 |     which has a interpretation as the *relative weighted change in inverse propensity weights*.
41 | 
42 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`. 
43 | The default weights are set to one 
44 | 
45 | .. math::
46 | 
47 |     \omega(Y,D,X) = 1,
48 | 
49 | whereas 
50 | 
51 | .. math::
52 | 
53 |     \bar{\omega}(X) := \mathbb{E}[\omega(Y,D,X)|X],
54 | 
55 | have to be supplied for weights which depend on :math:`Y` or :math:`D`.
56 | 


--------------------------------------------------------------------------------
/doc/examples/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | :parenttoc: True
 3 | 
 4 | .. _examplegallery:
 5 | 
 6 | Examples
 7 | ==========
 8 | 
 9 | Python: Case studies
10 | ---------------------
11 | 
12 | These are case studies with the Python package :ref:`DoubleML <doubleml_package>`.
13 | 
14 | General Examples
15 | ++++++++++++++++
16 | 
17 | .. nbgallery::
18 |     :name: case-studies-py
19 | 
20 |     py_double_ml_basics.ipynb
21 |     py_double_ml_pension.ipynb
22 |     py_double_ml_sensitivity.ipynb
23 |     py_double_ml_apo.ipynb
24 |     py_double_ml_irm_vs_apo.ipynb
25 |     py_double_ml_lplr.ipynb
26 |     py_double_ml_ssm.ipynb
27 |     learners/py_optuna.ipynb
28 |     learners/py_learner.ipynb
29 |     py_double_ml_firststage.ipynb
30 |     py_double_ml_multiway_cluster.ipynb
31 |     py_double_ml_sensitivity_booking.ipynb
32 |     learners/py_tabpfn.ipynb
33 |     py_double_ml_basic_iv.ipynb
34 |     py_double_ml_robust_iv.ipynb
35 |     py_double_ml_plm_irm_hetfx.ipynb
36 |     py_double_ml_meets_flaml.ipynb
37 |     py_double_ml_rdflex.ipynb
38 | 
39 | 
40 | Effect Heterogeneity
41 | ++++++++++++++++++++
42 | 
43 | .. nbgallery::
44 |     :name: case-studies-py-heterogeneity
45 | 
46 |     py_double_ml_gate.ipynb
47 |     py_double_ml_gate_plr.ipynb
48 |     py_double_ml_cate.ipynb
49 |     py_double_ml_cate_plr.ipynb
50 |     py_double_ml_gate_sensitivity.ipynb
51 |     py_double_ml_policy_tree.ipynb
52 |     py_double_ml_pension_qte.ipynb
53 |     py_double_ml_pq.ipynb
54 |     py_double_ml_cvar.ipynb
55 | 
56 | 
57 | .. _did_examplegallery:
58 | 
59 | Difference-in-Differences
60 | +++++++++++++++++++++++++
61 | 
62 | .. nbgallery::
63 |     :name: case-studies-py-did
64 | 
65 |     did/py_panel_simple.ipynb
66 |     did/py_panel.ipynb
67 |     did/py_panel_data_example.ipynb
68 |     did/py_rep_cs.ipynb
69 | 
70 | 
71 | R: Case studies
72 | ---------------
73 | 
74 | These are case studies with the R package :ref:`DoubleML <doubleml_package>`.
75 | 
76 | .. nbgallery::
77 |     :name: case-studies-r
78 | 
79 |     R_double_ml_basics.ipynb
80 |     R_double_ml_pension.ipynb
81 |     R_double_ml_did.ipynb
82 |     R_double_ml_multiway_cluster.ipynb
83 |     R_double_ml_ssm.ipynb
84 |     R_double_ml_basic_iv.ipynb
85 | 
86 | Sandbox/Archive
87 | ---------------
88 | 
89 | These are examples which are work-in-progress and/or not yet fully documented.
90 | 
91 | .. nbgallery::
92 |     :name: sandbox_gallery
93 |     :maxdepth: 1
94 | 
95 |     R_double_ml_pipeline.ipynb
96 |     double_ml_bonus_data.ipynb
97 |     did/py_did.ipynb
98 |     did/py_did_pretest.ipynb
99 | 


--------------------------------------------------------------------------------
/doc/guide/learners/r/pipelines.rst:
--------------------------------------------------------------------------------
 1 | Users can also specify learners that have been constructed from a pipeline using the `mlr3pipelines <https://mlr3pipelines.mlr-org.com/>`_
 2 | package. In general, pipelines can be used to perform data preprocessing, feature selection, combine learners and even
 3 | to perform hyperparameter tuning. In the following, we provide two examples on how to construct a single learner and how
 4 | to stack different learners via a pipeline. For a more detailed introduction to `mlr3pipelines <https://mlr3pipelines.mlr-org.com/>`_,
 5 | we refer to the `Pipelines Chapter in the mlr3book <https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html>`_. Moreover, a
 6 | notebook on how to use `mlr3pipelines <https://mlr3pipelines.mlr-org.com/>`_ in combination with :ref:`DoubleML <doubleml_package>`
 7 | is available in the example gallery.
 8 | 
 9 | .. tab-set::
10 | 
11 |   .. tab-item:: R
12 |     :sync: r
13 | 
14 |     .. jupyter-execute::
15 |       
16 |       library(DoubleML)
17 |       library(mlr3)
18 |       library(mlr3learners)
19 |       library(mlr3pipelines)
20 |       library(data.table)
21 | 
22 |       set.seed(3141)
23 |       # Define random forest learner in a pipeline
24 |       single_learner_pipeline = po("learner", lrn("regr.ranger", num.trees = 10))
25 | 
26 |       # Use pipeline to create a new instance of a learner
27 |       ml_g = as_learner(single_learner_pipeline)
28 |       ml_m = as_learner(single_learner_pipeline)
29 | 
30 |       data = make_plr_CCDDHNR2018(alpha=0.5, return_type='data.table')
31 |       obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
32 | 
33 |       n_rep = 2
34 |       n_folds = 3
35 |       dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_g, ml_m, n_rep=n_rep, n_folds=n_folds)
36 |       dml_plr_obj$learner
37 |       dml_plr_obj$fit()
38 |       dml_plr_obj$summary()
39 | 
40 |       set.seed(3141)
41 |       # Define ensemble learner in a pipeline
42 |       ensemble_learner_pipeline = gunion(list(
43 |               po("learner", lrn("regr.cv_glmnet", s = "lambda.min")),
44 |               po("learner", lrn("regr.ranger")),
45 |               po("learner", lrn("regr.rpart", cp = 0.01)))) %>>%
46 |           po("regravg", 3)
47 | 
48 |       # Use pipeline to create a new instance of a learner
49 |       ml_g = as_learner(ensemble_learner_pipeline)
50 |       ml_m = as_learner(ensemble_learner_pipeline)
51 | 
52 |       obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
53 | 
54 |       n_rep = 2
55 |       n_folds = 3
56 |       dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_g, ml_m, n_rep=n_rep, n_folds=n_folds)
57 |       dml_plr_obj$learner
58 |       dml_plr_obj$fit()
59 |       dml_plr_obj$summary()
60 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/did/did_cs_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | For a detailed description of the scores and nuisance elements, see :ref:`did-cs-score`.
 2 | 
 3 | In the :ref:`did-cs-model` with ``score='observational'`` and ``in_sample_normalization=True`` the score function implies the following representations
 4 | 
 5 | .. math::
 6 | 
 7 |     m(W,g) &= \Big(\big(g(1,1,X) - g(1,0,X)\big) - \big(g(0,1,X) - g(0,0,X)\big)\Big) \frac{G^{\mathrm{g}}}{\mathbb{E}[G^{\mathrm{g}}]} \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
 8 | 
 9 |     \alpha(W) &= \Bigg(\frac{G^{\mathrm{g}}T}{\mathbb{E}[G^{\mathrm{g}}T]} - \frac{G^{\mathrm{g}}(1-T)}{\mathbb{E}[G^{\mathrm{g}}(1-T)]}
10 | 
11 |     &\quad - \frac{m(X)(1-G^{\mathrm{g}})T}{1-m(X)}\mathbb{E}\left[\frac{m(X)(1-G^{\mathrm{g}})T}{1-m(X)}\right]^{-1}
12 | 
13 |     &\quad + \frac{m(X)(1-G^{\mathrm{g}})(1-T)}{1-m(X)}\mathbb{E}\left[\frac{m(X)(1-G^{\mathrm{g}})(1-T)}{1-m(X)}\right]^{-1} \Bigg) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
14 | 
15 | If instead ``in_sample_normalization=False``, the Riesz representer changes to 
16 | 
17 | .. math::
18 | 
19 |     \alpha(W) = \left(\frac{T}{\mathbb{E}[G^{\mathrm{g}}]\mathbb{E}[T]} + \frac{1-T}{\mathbb{E}[G^{\mathrm{g}}](1-\mathbb{E}[T])}\right)\left(G^{\mathrm{g}} - (1-G^{\mathrm{g}})\frac{m(X)}{1-m(X)}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
20 | 
21 | For ``score='experimental'`` the score function implies the following representations
22 | 
23 | .. math::
24 | 
25 |     m(W,g) &= \big(g(1,1,X) - g(1,0,X)\big) - \big(g(0,1,X) - g(0,0,X)\big) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
26 | 
27 |     \alpha(W) &= \left(\frac{G^{\mathrm{g}}T}{\mathbb{E}[G^{\mathrm{g}}T]} - \frac{G^{\mathrm{g}}(1-T)}{\mathbb{E}[G^{\mathrm{g}}(1-T)]} - \frac{(1-G^{\mathrm{g}})T}{\mathbb{E}[(1-G^{\mathrm{g}})T]} + \frac{(1-G^{\mathrm{g}})(1-T)}{\mathbb{E}[(1-G^{\mathrm{g}})(1-T)]}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
28 | 
29 | And again, if instead ``in_sample_normalization=False``, the Riesz representer changes to 
30 | 
31 | .. math::
32 | 
33 |     \alpha(W) = \left(\frac{G^{\mathrm{g}}T}{\mathbb{E}[G^{\mathrm{g}}]\mathbb{E}[T]} - \frac{G^{\mathrm{g}}(1-T)}{\mathbb{E}[G^{\mathrm{g}}](1-\mathbb{E}[T])} - \frac{(1-G^{\mathrm{g}})T}{(1-\mathbb{E}[G^{\mathrm{g}}])\mathbb{E}[T]} + \frac{(1-G^{\mathrm{g}})(1-T)}{(1-\mathbb{E}[G^{\mathrm{g}}])(1-\mathbb{E}[T])}\right) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}).
34 | 
35 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`, but the scores :math:`\psi_{\sigma^2}` and :math:`\psi_{\nu^2}` are scaled according to the sample size of the subset, i.e. with scaling factor :math:`c=\frac{n_{\text{obs}}}{n_{\text{subset}}}`.
36 | 
37 | .. note::
38 |     Remark that the elements are only non-zero for units in the corresponding treatment group :math:`\mathrm{g}` and control group :math:`C^{(\cdot)}`, as :math:`1-G^{\mathrm{g}}=C^{(\cdot)}` if :math:`\max(G^{\mathrm{g}}, C^{(\cdot)})=1`.


--------------------------------------------------------------------------------
/doc/guide/learners/python/set_hyperparams.rst:
--------------------------------------------------------------------------------
 1 | The learners are set during initialization of the :ref:`DoubleML <doubleml_package>` model classes
 2 | :py:class:`doubleml.DoubleMLPLR`, :py:class:`doubleml.DoubleMLPLIV`,
 3 | :py:class:`doubleml.DoubleMLIRM` and :py:class:`doubleml.DoubleMLIIVM`.
 4 | Lets simulate some data and consider the partially linear regression model.
 5 | We need to specify learners for the nuisance functions :math:`g_0(X) = E[Y|X]` and :math:`m_0(X) = E[D|X]`,
 6 | for example :py:class:`sklearn.ensemble.RandomForestRegressor`.
 7 | 
 8 | .. tab-set::
 9 | 
10 |   .. tab-item:: Python
11 |     :sync: py
12 | 
13 |     .. ipython:: python
14 | 
15 |       import doubleml as dml
16 |       from doubleml.plm.datasets import make_plr_CCDDHNR2018
17 |       from sklearn.ensemble import RandomForestRegressor
18 | 
19 |       np.random.seed(1234)
20 |       ml_l = RandomForestRegressor()
21 |       ml_m = RandomForestRegressor()
22 |       data = make_plr_CCDDHNR2018(alpha=0.5, return_type='DataFrame')
23 |       obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
24 |       dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)
25 |       dml_plr_obj.fit().summary
26 | 
27 | Without further specification of the hyperparameters, default values are used. To set hyperparameters:
28 | 
29 | * We can also use pre-parametrized learners, like ``RandomForestRegressor(n_estimators=10)``.
30 | * Alternatively, hyperparameters can also be set after initialization via the method
31 |   ``set_ml_nuisance_params(learner, treat_var, params)``
32 | 
33 | 
34 | .. tab-set::
35 | 
36 |   .. tab-item:: Python
37 |     :sync: py
38 | 
39 |     .. ipython:: python
40 | 
41 |         np.random.seed(1234)
42 |         dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
43 |                                       RandomForestRegressor(n_estimators=10),
44 |                                       RandomForestRegressor())
45 |         print(dml_plr_obj.fit().summary)
46 | 
47 |         np.random.seed(1234)
48 |         dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
49 |                                       RandomForestRegressor(),
50 |                                       RandomForestRegressor())
51 |         dml_plr_obj.set_ml_nuisance_params('ml_l', 'd', {'n_estimators': 10});
52 |         print(dml_plr_obj.fit().summary)
53 | 
54 | Setting treatment-variable-specific or fold-specific hyperparameters:
55 | 
56 | * In the multiple-treatment case, the method ``set_ml_nuisance_params(learner, treat_var, params)`` can be used to set
57 |   different hyperparameters for different treatment variables.
58 | * The method ``set_ml_nuisance_params(learner, treat_var, params)`` accepts dicts and lists for ``params``.
59 |   A dict should be provided if for each fold the same hyperparameters should be used.
60 |   Fold-specific parameters are supported. To do so,  provide a nested list as ``params``, where the outer list is of
61 |   length ``n_rep`` and the inner list of length ``n_folds``.


--------------------------------------------------------------------------------
/.github/workflows/deploy_docu_stable.yml:
--------------------------------------------------------------------------------
 1 | # Workflow based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml
 2 | 
 3 | name: Deploy Docu (stable)
 4 | 
 5 | on:
 6 |   workflow_dispatch:
 7 | 
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-22.04
13 | 
14 |     steps:
15 |     - name: Check out the repo containing the docu source
16 |       uses: actions/checkout@v4
17 | 
18 |     - name: Install graphviz
19 |       run: sudo apt-get install graphviz
20 | 
21 |     - name: Install python
22 |       uses: actions/setup-python@v5
23 |       with:
24 |         python-version: '3.12'
25 |     - name: Install dependencies and the python package
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         pip install -r requirements.txt
29 | 
30 |     - name: Add R repository
31 |       run: |
32 |         sudo apt install dirmngr gnupg apt-transport-https ca-certificates software-properties-common
33 |         sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
34 |         sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
35 |     - name: Install R
36 |       run: |
37 |         sudo apt-get update
38 |         sudo apt-get install r-base
39 |         sudo apt-get install r-base-dev
40 |         sudo apt-get install -y zlib1g-dev libicu-dev pandoc make libcurl4-openssl-dev libssl-dev
41 | 
42 |     - name: Get user library folder
43 |       run: |
44 |         mkdir ${GITHUB_WORKSPACE}/tmp_r_libs_user
45 |         echo R_LIBS_USER=${GITHUB_WORKSPACE}/tmp_r_libs_user >> $GITHUB_ENV
46 | 
47 |     - name: Query R version
48 |       run: |
49 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
50 |       shell: Rscript {0}
51 | 
52 |     - name: Cache R packages
53 |       uses: actions/cache@v4
54 |       with:
55 |         path: ${{ env.R_LIBS_USER }}
56 |         key: doubleml-user-guide-stable-${{ hashFiles('.github/R-version') }}
57 | 
58 |     - name: Install R kernel for Jupyter and the R package DoubleML (dev)
59 |       run: |
60 |         install.packages('remotes')
61 |         remotes::install_cran('DoubleML', dependencies = TRUE)
62 |         install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', "igraph", "mlr3filters", "mlr3measures", "did"))
63 |         IRkernel::installspec()
64 |       shell: Rscript {0}
65 | 
66 |     - name: Build docu with sphinx
67 |       run: |
68 |         make -C doc html
69 | 
70 |     - name: Deploy to stable
71 |       uses: JamesIves/github-pages-deploy-action@v4
72 |       with:
73 |         repository-name: DoubleML/doubleml.github.io
74 |         branch: main
75 |         folder: doc/_build/html
76 |         target-folder: stable
77 |         git-config-name: DoubleML Deploy Bot
78 |         git-config-email: DoubleML@users.noreply.github.com
79 |         clean: true
80 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
81 | 


--------------------------------------------------------------------------------
/.devcontainer/docker_guide.md:
--------------------------------------------------------------------------------
 1 | # Build Documentation with Development Container
 2 | 
 3 | This guide shows how to use WSL2 (Windows Subsystem for Linux), Docker Desktop, Visual Studio Code (VS Code), and how to work with Development Containers in VS Code on a Windows machine.
 4 | 
 5 | Requirements:
 6 |  - [VS Code](https://code.visualstudio.com/)
 7 |  - [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install)
 8 |  - [Docker Desktop](https://docs.docker.com/desktop/setup/install/windows-install/)
 9 | 
10 | ## Step 1: Verify installations & Setup
11 | 
12 | You can verify the installations in a terminal:
13 |    
14 |    ```bash
15 |    code --version
16 |    wsl --version
17 |    docker --version
18 |    ```
19 | 
20 | ### Configure Docker to Use WSL2
21 | 
22 |    See [Docker Desktop Documentation](https://docs.docker.com/desktop/features/wsl/#turn-on-docker-desktop-wsl-2).
23 |    1. Open Docker Desktop.
24 |    2. Go to **Settings > General** and make sure **Use the WSL 2 based engine** is checked.
25 |    3. Under **Settings > Resources > WSL Integration**, ensure that your desired Linux distribution(s) are selected for integration with Docker.
26 | 
27 | ### Install Extensions
28 | 
29 |    1. Open Visual Studio Code.
30 |    2. Press `Ctrl+Shift+X` to open the Extensions view.
31 |    3. Search and install (includes WSL and Dev Containers Extensions):
32 |       - [Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack)
33 | 
34 |    Helpful VS Code Documentations:
35 |    - [Developing in WSL](https://code.visualstudio.com/docs/remote/wsl)
36 |    - [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers)
37 | 
38 | 
39 | ## Step 2: Open the Development Container (Using Pre-built Image)
40 | 
41 | For faster setup, we'll use a pre-built Docker image:
42 | 
43 | 1. Open the `doubleml-docs` repository in VS Code:
44 | 
45 |    ```bash
46 |    code /path/to/doubleml-docs
47 |    ```
48 | 
49 | 2. Open the Command Palette (`Ctrl+Shift+P`).
50 | 3. Type `Dev Containers: Reopen in Container`.
51 | 
52 | VS Code will pull the `svenklaassen/doubleml-docs:latest` image (if needed) based on `devcontainer.json` and open the project in the container.<br>
53 | This approach is much faster than building the container from scratch. VS Code automatically downloads the image from Docker Hub if it's not already on your system.
54 | 
55 | 
56 | ## Step 3: Build the documentation
57 | 
58 | 1. Open a terminal in VS Code (`Terminal > New Terminal`)
59 | 
60 | 2. Build the documentation:
61 | 
62 |    ```bash
63 |    cd doc
64 |    make html
65 |    ```
66 | 
67 |    To build without notebook examples:
68 |    ```bash
69 |    make html NBSPHINX_EXECUTE=never
70 |    ```
71 | 
72 | 3. View the built documentation by opening the output files:
73 | 
74 |    ```bash
75 |    # On Windows
76 |    explorer.exe _build/html
77 |    
78 |    # On Linux
79 |    xdg-open _build/html
80 |    
81 |    # On macOS
82 |    open _build/html
83 |    ```
84 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/plm/plr_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | In the :ref:`plr-model` the confounding strength ``cf_d`` can be further be simplified to match the explanation of ``cf_y``. 
 2 | Given the that the Riesz representer takes the following form
 3 | 
 4 | .. math::
 5 | 
 6 |     \alpha(W) = \frac{D-\mathbb{E}[D|X]}{\mathbb{E}[(D-\mathbb{E}[D|X]^2)]}
 7 | 
 8 |     \tilde{\alpha}(\tilde{W}) = \frac{D-\mathbb{E}[D|X,A]}{\mathbb{E}[(D-\mathbb{E}[D|X,A]^2)]}
 9 | 
10 | one can show that
11 | 
12 | .. math::
13 | 
14 |     C_D^2 :=\frac{\frac{\mathbb{E}\big[\big(\mathbb{E}[D|X,A] - \mathbb{E}[D|X]\big)^2\big]}{\mathbb{E}\big[\big(D - \mathbb{E}[D|X]\big)^2\big]}}{1-\frac{\mathbb{E}\big[\big(\mathbb{E}[D|X,A] - \mathbb{E}[D|X]\big)^2\big]}{\mathbb{E}\big[\big(D - \mathbb{E}[D|X]\big)^2\big]}}.
15 | 
16 | Therefore,
17 | 
18 | - ``cf_y``:math:`:=\frac{\mathbb{E}[(\tilde{g}(\tilde{W}) - g(W))^2]}{\mathbb{E}[(Y - g(W))^2]}`  measures the proportion of residual variance in the outcome :math:`Y` explained by the latent confounders :math:`A`
19 | 
20 | - ``cf_d``:math:`:=\frac{\mathbb{E}\big[\big(\mathbb{E}[D|X,A] - \mathbb{E}[D|X]\big)^2\big]}{\mathbb{E}\big[\big(D - \mathbb{E}[D|X]\big)^2\big]}` measures the proportion of residual variance in the treatment :math:`D` explained by the latent confounders :math:`A`
21 | 
22 | .. note::
23 |     In the :ref:`plr-model`, both ``cf_y`` and ``cf_d`` can be interpreted as *nonparametric partial* :math:`R^2`
24 | 
25 |     - ``cf_y`` has the interpretation as the *nonparametric partial* :math:`R^2` *of* :math:`A` *with* :math:`Y` *given* :math:`(D,X)`
26 |     
27 |     .. math:: 
28 |         
29 |         \frac{\textrm{Var}(\mathbb{E}[Y|D,X,A]) - \textrm{Var}(\mathbb{E}[Y|D,X])}{\textrm{Var}(Y)-\textrm{Var}(\mathbb{E}[Y|D,X])}
30 | 
31 |     - ``cf_d`` has the interpretation as the *nonparametric partial* :math:`R^2` *of* :math:`A` *with* :math:`D` *given* :math:`X`
32 |     
33 |     .. math:: 
34 |         
35 |         \frac{\textrm{Var}(\mathbb{E}[D|X,A]) - \textrm{Var}(\mathbb{E}[D|X])}{\textrm{Var}(D)-\textrm{Var}(\mathbb{E}[D|X])}
36 | 
37 | Using the partially linear regression model with ``score='partialling out'`` the ``nuisance_elements`` are implemented in the following form
38 | 
39 | .. math::
40 | 
41 |     \hat{\sigma}^2 &:= \mathbb{E}_n\Big[\big(Y-\hat{l}(X) - \hat{\theta}(D-\hat{m}(X))\big)^2\Big]
42 | 
43 |     \hat{\nu}^2 &:= \mathbb{E}_n[\hat{\alpha}(W)^2] = \frac{1}{\mathbb{E}_n\big[(D - \hat{m}(X))^2\big]}
44 | 
45 | with scores
46 | 
47 | .. math::
48 | 
49 |     \psi_{\sigma^2}(W, \hat{\sigma}^2, g) &:=  \big(Y-\hat{l}(X) - \hat{\theta}(D-\hat{m}(X))\big)^2 - \hat{\sigma}^2
50 | 
51 |     \psi_{\nu^2}(W, \hat{\nu}^2, \alpha) &:= \hat{\nu}^2 - \big(D-\hat{m}(X)\big)^2\big(\hat{\nu}^2)^2.
52 | 
53 | If ``score='IV-type'`` the senstivity elements are instead set to
54 | 
55 | .. math::
56 | 
57 |     \hat{\sigma}^2 &:= \mathbb{E}_n\Big[\big(Y - \hat{\theta}D - \hat{g}(X)\big)^2\Big]
58 | 
59 |     \psi_{\sigma^2}(W, \hat{\sigma}^2, g) &:=  \big(Y - \hat{\theta}D - \hat{g}(X)\big)^2 - \hat{\sigma}^2.


--------------------------------------------------------------------------------
/doc/guide/scores/did/did_pa_binary_score.rst:
--------------------------------------------------------------------------------
 1 | For the difference-in-differences model implemented in ``DoubleMLDID`` one can choose between
 2 | ``score='observational'`` and ``score='experimental'``.
 3 | 
 4 | ``score='observational'`` implements the score function (dropping the unit index :math:`i`):
 5 | 
 6 | .. math::
 7 | 
 8 |     \psi(W,\theta, \eta) 
 9 |     :&= -\frac{D}{\mathbb{E}_n[D]}\theta + \left(\frac{D}{\mathbb{E}_n[D]} - \frac{\frac{m(X) (1-D)}{1-m(X)}}{\mathbb{E}_n\left[\frac{m(X) (1-D)}{1-m(X)}\right]}\right) \left(Y_1 - Y_0 - g(0,X)\right)
10 | 
11 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
12 | 
13 | where the components of the linear score are
14 | 
15 | .. math::
16 | 
17 |     \psi_a(W; \eta) &=  - \frac{D}{\mathbb{E}_n[D]},
18 | 
19 |     \psi_b(W; \eta) &= \left(\frac{D}{\mathbb{E}_n[D]} - \frac{\frac{m(X) (1-D)}{1-m(X)}}{\mathbb{E}_n\left[\frac{m(X) (1-D)}{1-m(X)}\right]}\right) \left(Y_1 - Y_0 - g(0,X)\right)
20 | 
21 | and the nuisance elements :math:`\eta=(g, m)` are defined as
22 | 
23 | .. math::
24 | 
25 |     g_{0}(0, X) &= \mathbb{E}[Y_1 - Y_0|D=0, X]
26 | 
27 |     m_0(X) &= P(D=1|X).
28 | 
29 | If ``in_sample_normalization='False'``, the score is set to
30 | 
31 | .. math::
32 | 
33 |     \psi(W,\theta,\eta) &= - \frac{D}{p}\theta + \frac{D - m(X)}{p(1-m(X))}\left(Y_1 - Y_0 -g(0,X)\right)
34 | 
35 |     &= \psi_a(W; \eta) \theta + \psi_b(W; \eta)
36 | 
37 | with :math:`\eta=(g, m, p)`, where :math:`p_0 = \mathbb{E}[D]` is estimated on the cross-fitting folds.
38 | Remark that this will result in the same score, but just uses slightly different normalization.
39 | 
40 | ``score='experimental'`` assumes that the treatment probability is independent of the covariates :math:`X` and
41 | implements the score function:
42 | 
43 | .. math::
44 | 
45 |     \psi(W,\theta, \eta) 
46 |     :=\; &-\theta + \left(\frac{D}{\mathbb{E}_n[D]} - \frac{1-D}{\mathbb{E}_n[1-D]}\right)\left(Y_1 - Y_0 -g(0,X)\right)
47 | 
48 |     &+ \left(1 - \frac{D}{\mathbb{E}_n[D]}\right) \left(g(1,X) - g(0,X)\right)
49 | 
50 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
51 | 
52 | where the components of the linear score are
53 | 
54 | .. math::
55 | 
56 |     \psi_a(W; \eta) \;=  &- 1,
57 | 
58 |     \psi_b(W; \eta) \;= &\left(\frac{D}{\mathbb{E}_n[D]} - \frac{1-D}{\mathbb{E}_n[1-D]}\right)\left(Y_1 - Y_0 -g(0,X)\right)
59 | 
60 |     &+  \left(1 - \frac{D}{\mathbb{E}_n[D]}\right) \left(g(1,X) - g(0,X)\right)
61 | 
62 | and the nuisance elements :math:`\eta=(g)` are defined as
63 | 
64 | .. math::
65 | 
66 |     g_{0}(0, X) &= \mathbb{E}[Y_1 - Y_0|D=0, X]
67 | 
68 |     g_{0}(1, X) &= \mathbb{E}[Y_1 - Y_0|D=1, X]
69 | 
70 | Analogously, if ``in_sample_normalization='False'``,  the score is set to
71 | 
72 | .. math::
73 | 
74 |     \psi(W,\theta, \eta) 
75 |     :=\; &-\theta +  \frac{D - p}{p(1-p)}\left(Y_1 - Y_0 -g(0,X)\right)
76 | 
77 |     &+ \left(1 - \frac{D}{p}\right) \left(g(1,X) - g(0,X)\right)
78 | 
79 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
80 | 
81 | with :math:`\eta=(g, p)`, where :math:`p_0 = \mathbb{E}[D]` is estimated on the cross-fitting folds.
82 | Remark that this will result in the same score, but just uses slightly different normalization.
83 | 


--------------------------------------------------------------------------------
/doc/guide/learners/python/tune_hyperparams_old.rst:
--------------------------------------------------------------------------------
 1 | Parameter tuning of learners for the nuisance functions of :ref:`DoubleML <doubleml_package>` models can be done via
 2 | the ``tune()`` method.
 3 | To illustrate the parameter tuning, we generate data from a sparse partially linear regression model.
 4 | 
 5 | .. tab-set::
 6 | 
 7 |   .. tab-item:: Python
 8 |     :sync: py
 9 | 
10 |     .. ipython:: python
11 | 
12 |       import doubleml as dml
13 |       import numpy as np
14 | 
15 |       np.random.seed(3141)
16 |       n_obs = 200
17 |       n_vars = 200
18 |       theta = 3
19 |       X = np.random.normal(size=(n_obs, n_vars))
20 |       d = np.dot(X[:, :3], np.array([5, 5, 5])) + np.random.standard_normal(size=(n_obs,))
21 |       y = theta * d + np.dot(X[:, :3], np.array([5, 5, 5])) + np.random.standard_normal(size=(n_obs,))
22 |       dml_data = dml.DoubleMLData.from_arrays(X, y, d)
23 | 
24 | The hyperparameter-tuning is performed using either an exhaustive search over specified parameter values
25 | implemented in :class:`sklearn.model_selection.GridSearchCV` or via a randomized search implemented in
26 | :class:`sklearn.model_selection.RandomizedSearchCV`.
27 | 
28 | .. tab-set::
29 | 
30 |   .. tab-item:: Python
31 |     :sync: py
32 | 
33 |     .. ipython:: python
34 |       :okwarning:
35 | 
36 |       import doubleml as dml
37 |       from sklearn.linear_model import Lasso
38 | 
39 |       ml_l = Lasso()
40 |       ml_m = Lasso()
41 |       dml_plr_obj = dml.DoubleMLPLR(dml_data, ml_l, ml_m)
42 |       par_grids = {'ml_l': {'alpha': np.arange(0.05, 1., 0.1)},
43 |                     'ml_m': {'alpha': np.arange(0.05, 1., 0.1)}}
44 |       dml_plr_obj.tune(par_grids, search_mode='grid_search');
45 |       print(dml_plr_obj.params)
46 |       print(dml_plr_obj.fit().summary)
47 | 
48 |       np.random.seed(1234)
49 |       par_grids = {'ml_l': {'alpha': np.arange(0.05, 1., 0.01)},
50 |                     'ml_m': {'alpha': np.arange(0.05, 1., 0.01)}}
51 |       dml_plr_obj.tune(par_grids, search_mode='randomized_search', n_iter_randomized_search=20);
52 |       print(dml_plr_obj.params)
53 |       print(dml_plr_obj.fit().summary)
54 | 
55 | Hyperparameter tuning can also be done with more sophisticated methods, like for example an iterative fitting along
56 | a regularization path implemented in :py:class:`sklearn.linear_model.LassoCV`.
57 | In this case the tuning should be done externally and the parameters can then be set via the
58 | ``set_ml_nuisance_params()`` method.
59 | 
60 | .. tab-set::
61 | 
62 |   .. tab-item:: Python
63 |     :sync: py
64 | 
65 |     .. ipython:: python
66 | 
67 |       import doubleml as dml
68 |       from sklearn.linear_model import LassoCV
69 | 
70 |       np.random.seed(1234)
71 |       ml_l_tune = LassoCV().fit(dml_data.x, dml_data.y)
72 |       ml_m_tune = LassoCV().fit(dml_data.x, dml_data.d)
73 | 
74 |       ml_l = Lasso()
75 |       ml_m = Lasso()
76 |       dml_plr_obj = dml.DoubleMLPLR(dml_data, ml_l, ml_m)
77 |       dml_plr_obj.set_ml_nuisance_params('ml_l', 'd', {'alpha': ml_l_tune.alpha_});
78 |       dml_plr_obj.set_ml_nuisance_params('ml_m', 'd', {'alpha': ml_m_tune.alpha_});
79 |       print(dml_plr_obj.params)
80 |       print(dml_plr_obj.fit().summary)
81 | 


--------------------------------------------------------------------------------
/.devcontainer/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:24.04
 2 | 
 3 | # Set non-interactive mode to avoid prompts
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | # Update package list and install dependencies
 7 | RUN apt-get update && \
 8 |     apt-get install -y \
 9 |     software-properties-common \
10 |     graphviz \
11 |     wget \
12 |     dirmngr \
13 |     gnupg \
14 |     apt-transport-https \
15 |     ca-certificates \
16 |     git \
17 |     cmake \
18 |     locales && \
19 |     locale-gen en_US.UTF-8 && \
20 |     update-locale LANG=en_US.UTF-8 && \
21 |     apt-get clean && \
22 |     rm -rf /var/lib/apt/lists/*
23 | 
24 | # Set environment variables for locale
25 | ENV LANG=en_US.UTF-8
26 | ENV LANGUAGE=en_US:en
27 | ENV LC_ALL=en_US.UTF-8
28 | 
29 | # Install Python 3.12
30 | RUN add-apt-repository ppa:deadsnakes/ppa && \
31 |     apt-get update && \
32 |     apt-get install -y python3.12 python3.12-venv python3.12-dev python3-pip python3-full && \
33 |     update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
34 |     update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
35 |     apt-get clean && \
36 |     rm -rf /var/lib/apt/lists/*
37 | 
38 | # Add R repository and install R
39 | RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | tee /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \
40 |     add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu noble-cran40/' && \
41 |     apt-get update && \
42 |     apt-get install -y r-base r-base-dev zlib1g-dev libicu-dev pandoc make libcurl4-openssl-dev libssl-dev && \
43 |     apt-get clean && \
44 |     rm -rf /var/lib/apt/lists/*
45 | 
46 | # Reuse existing 'ubuntu' user (UID 1000)
47 | ARG USERNAME=ubuntu
48 | 
49 | RUN mkdir -p /workspace && \
50 |     chown -R $USERNAME:$USERNAME /workspace
51 | 
52 | # Create a directory for R user libraries
53 | RUN mkdir -p /usr/local/lib/R/site-library && \
54 |     chown -R $USERNAME:$USERNAME /usr/local/lib/R/site-library
55 | ENV R_LIBS_USER=/usr/local/lib/R/site-library
56 | 
57 | # Switch to non-root user for remaining operations
58 | USER $USERNAME
59 | 
60 | # Install Python packages in the virtual environment
61 | COPY --chown=$USERNAME:$USERNAME requirements.txt /tmp/requirements.txt
62 | RUN python -m venv /home/$USERNAME/.venv && \
63 |     /home/$USERNAME/.venv/bin/python -m pip install --upgrade pip && \
64 |     /home/$USERNAME/.venv/bin/pip install --no-cache-dir -r /tmp/requirements.txt && \
65 |     /home/$USERNAME/.venv/bin/pip install --no-cache-dir "DoubleML[rdd] @ git+https://github.com/DoubleML/doubleml-for-py.git@main"
66 | 
67 | # Set the virtual environment as the default Python environment
68 | ENV PATH="/home/$USERNAME/.venv/bin:$PATH"
69 | 
70 | # Install R packages and Jupyter kernel
71 | RUN Rscript -e "install.packages('remotes')" && \
72 |     Rscript -e "remotes::install_github('DoubleML/doubleml-for-r', dependencies = TRUE)" && \
73 |     Rscript -e "install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', 'igraph', 'mlr3filters', 'mlr3measures', 'did', dependencies=TRUE))" && \
74 |     Rscript -e "IRkernel::installspec()"
75 | 
76 | # Set the working directory
77 | WORKDIR /workspace
78 | 


--------------------------------------------------------------------------------
/.github/workflows/test_build_docu_released.yml:
--------------------------------------------------------------------------------
 1 | # Workflow based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml
 2 | 
 3 | name: Test Docu Build (with released pkgs)
 4 | 
 5 | on:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 |   pull_request:
10 |     branches:
11 |       - main
12 |       - dev
13 |   workflow_dispatch:
14 |     inputs:
15 |       nbsphinx-execute:
16 |         description: 'Execute notebooks with nbsphinx'
17 |         required: false
18 |         default: 'auto'
19 |   schedule:
20 |     - cron: "0 9 * * 1,3,5"
21 | 
22 | 
23 | jobs:
24 |   build:
25 | 
26 |     runs-on: ubuntu-22.04
27 | 
28 |     steps:
29 |     - name: Check out the repo containing the docu source
30 |       uses: actions/checkout@v4
31 | 
32 |     - name: Install graphviz
33 |       run: sudo apt-get install graphviz
34 | 
35 |     - name: Install python
36 |       uses: actions/setup-python@v5
37 |       with:
38 |         python-version: '3.12'
39 |     - name: Install dependencies and the python package
40 |       run: |
41 |         python -m pip install --upgrade pip
42 |         pip install -r requirements.txt
43 | 
44 |     - name: Add R repository
45 |       run: |
46 |         sudo apt install dirmngr gnupg apt-transport-https ca-certificates software-properties-common
47 |         sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
48 |         sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
49 |     - name: Install R
50 |       run: |
51 |         sudo apt-get update
52 |         sudo apt-get install r-base
53 |         sudo apt-get install r-base-dev
54 |         sudo apt-get install -y zlib1g-dev libicu-dev pandoc make libcurl4-openssl-dev libssl-dev
55 | 
56 |     - name: Get user library folder
57 |       run: |
58 |         mkdir ${GITHUB_WORKSPACE}/tmp_r_libs_user
59 |         echo R_LIBS_USER=${GITHUB_WORKSPACE}/tmp_r_libs_user >> $GITHUB_ENV
60 | 
61 |     - name: Query R version
62 |       run: |
63 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
64 |       shell: Rscript {0}
65 | 
66 |     - name: Cache R packages
67 |       uses: actions/cache@v4
68 |       with:
69 |         path: ${{ env.R_LIBS_USER }}
70 |         key: doubleml-test-build-stable-${{ hashFiles('.github/R-version') }}
71 | 
72 |     - name: Install R kernel for Jupyter and the R package DoubleML (dev)
73 |       run: |
74 |         install.packages('remotes')
75 |         remotes::install_cran('DoubleML', dependencies = TRUE)
76 |         install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', "igraph", "mlr3filters", "mlr3measures", "did"))
77 |         IRkernel::installspec()
78 |       shell: Rscript {0}
79 | 
80 |     - name: Build docu with sphinx
81 |       run: |
82 |         make -C doc html NBSPHINX_EXECUTE=${{ github.event.inputs.nbsphinx-execute || 'auto' }}
83 | 
84 |     - name: Check for broken links / URLs
85 | 
86 |       run: |
87 |         make -C doc linkcheck
88 | 
89 |     - name: Upload html artifacts
90 |       uses: actions/upload-artifact@v4
91 |       with:
92 |         name: build_html
93 |         path: doc/_build/html/
94 | 


--------------------------------------------------------------------------------
/doc/guide/learners/r/tune_and_pipelines.rst:
--------------------------------------------------------------------------------
 1 | As an alternative to the previously presented tuning approach, it is possible to base the parameter tuning on a pipeline
 2 | as provided by the `mlr3pipelines <https://mlr3pipelines.mlr-org.com/>`_ package. The basic idea of this approach is to
 3 | define a learner via a pipeline and then perform the tuning via the ``tune()``. We will shortly repeat the lasso example
 4 | from above. In general, the pipeline-based approach can be used to find optimal values not only for the parameters of
 5 | one or multiple learners, but also for other parameters, which are, for example, involved in the data preprocessing. We
 6 | refer to more details provided in the `Pipelines Chapter in the mlr3book <https://mlr3book.mlr-org.com/chapters/chapter7/sequential_pipelines.html>`_.
 7 | 
 8 | .. tab-set::
 9 | 
10 |   .. tab-item:: R
11 |     :sync: r
12 | 
13 |     .. jupyter-execute::
14 | 
15 |         library(DoubleML)
16 |         library(mlr3)
17 |         library(mlr3tuning)
18 |         library(mlr3pipelines)
19 |         lgr::get_logger("mlr3")$set_threshold("warn")
20 |         lgr::get_logger("bbotk")$set_threshold("warn")
21 | 
22 |         set.seed(3141)
23 |         n_obs = 200
24 |         n_vars = 200
25 |         theta = 3
26 |         X = matrix(stats::rnorm(n_obs * n_vars), nrow = n_obs, ncol = n_vars)
27 |         d = X[, 1:3, drop = FALSE] %*% c(5, 5, 5) + stats::rnorm(n_obs)
28 |         y = theta * d + X[, 1:3, drop = FALSE] %*% c(5, 5, 5)  + stats::rnorm(n_obs)
29 |         dml_data = double_ml_data_from_matrix(X = X, y = y, d = d)
30 | 
31 |         # Define learner in a pipeline
32 |         set.seed(1234)
33 |         lasso_pipe = po("learner",
34 |             learner = lrn("regr.glmnet"))
35 |         ml_g = as_learner(lasso_pipe)
36 |         ml_m = as_learner(lasso_pipe)
37 | 
38 |         # Instantiate a DoubleML object
39 |         dml_plr_obj = DoubleMLPLR$new(dml_data, ml_g, ml_m)
40 | 
41 |         # Parameter grid for lambda
42 |         par_grids = ps(regr.glmnet.lambda = p_dbl(lower = 0.05, upper = 0.1))
43 | 
44 |         tune_settings = list(terminator = trm("evals", n_evals = 100),
45 |                              algorithm = tnr("grid_search", resolution = 10),
46 |                              rsmp_tune = rsmp("cv", folds = 5),
47 |                              measure = list("ml_g" = msr("regr.mse"),
48 |                                             "ml_m" = msr("regr.mse")))
49 |         dml_plr_obj$tune(param_set = list("ml_g" = par_grids,
50 |                                           "ml_m" = par_grids),
51 |                                           tune_settings=tune_settings,
52 |                                           tune_on_fold=TRUE)
53 |         dml_plr_obj$fit()
54 |         dml_plr_obj$summary()
55 | 
56 | References
57 | ++++++++++
58 | 
59 | * Lang, M., Binder, M., Richter, J., Schratz, P., Pfisterer, F., Coors, S., Au, Q., Casalicchio, G., Kotthoff, L., Bischl, B. (2019), mlr3: A modern object-oriented machine learing framework in R. Journal of Open Source Software, `doi:10.21105/joss.01903 <https://doi.org/10.21105/joss.01903>`_.
60 | 
61 | * Becker, M., Binder, M., Bischl, B., Lang, M., Pfisterer, F., Reich, N.G., Richter, J., Schratz, P., Sonabend, R. (2020), mlr3 book, available at `https://mlr3book.mlr-org.com <https://mlr3book.mlr-org.com>`_.
62 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy_docu_dev.yml:
--------------------------------------------------------------------------------
 1 | # Workflow based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml
 2 | 
 3 | name: Deploy Docu (dev)
 4 | 
 5 | on:
 6 |   workflow_dispatch:
 7 | 
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ubuntu-22.04
13 |     env:
14 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
15 | 
16 |     steps:
17 |     - name: Check out the repo containing the docu source
18 |       uses: actions/checkout@v4
19 | 
20 |     - name: Check out the repo containing the python pkg DoubleML (dev)
21 |       uses: actions/checkout@v4
22 |       with:
23 |         repository: DoubleML/doubleml-for-py
24 |         path: doubleml-for-py
25 | 
26 |     - name: Install graphviz
27 |       run: sudo apt-get install graphviz
28 | 
29 |     - name: Install python
30 |       uses: actions/setup-python@v5
31 |       with:
32 |         python-version: '3.12'
33 |     - name: Install dependencies and the python package
34 |       run: |
35 |         python -m pip install --upgrade pip
36 |         pip install -r requirements.txt
37 |         pip uninstall -y DoubleML
38 |         cd doubleml-for-py
39 |         pip install -e .[rdd]
40 | 
41 |     - name: Add R repository
42 |       run: |
43 |         sudo apt install dirmngr gnupg apt-transport-https ca-certificates software-properties-common
44 |         sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
45 |         sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
46 |     - name: Install R
47 |       run: |
48 |         sudo apt-get update
49 |         sudo apt-get install r-base
50 |         sudo apt-get install r-base-dev
51 |         sudo apt-get install -y zlib1g-dev libicu-dev pandoc make libcurl4-openssl-dev libssl-dev
52 | 
53 |     - name: Get user library folder
54 |       run: |
55 |         mkdir ${GITHUB_WORKSPACE}/tmp_r_libs_user
56 |         echo R_LIBS_USER=${GITHUB_WORKSPACE}/tmp_r_libs_user >> $GITHUB_ENV
57 | 
58 |     - name: Query R version
59 |       run: |
60 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
61 |       shell: Rscript {0}
62 | 
63 |     - name: Cache R packages
64 |       uses: actions/cache@v4
65 |       with:
66 |         path: ${{ env.R_LIBS_USER }}
67 |         key: doubleml-user-guide-dev-${{ hashFiles('.github/R-version') }}
68 | 
69 |     - name: Install R kernel for Jupyter and the R package DoubleML (dev)
70 |       run: |
71 |         install.packages('remotes')
72 |         remotes::install_github('DoubleML/doubleml-for-r', dependencies = TRUE)
73 |         install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', "igraph", "mlr3filters", "mlr3measures", "did"))
74 |         IRkernel::installspec()
75 |       shell: Rscript {0}
76 | 
77 |     - name: Build docu with sphinx
78 |       run: |
79 |         make -C doc html
80 | 
81 |     - name: Deploy to dev
82 |       uses: JamesIves/github-pages-deploy-action@v4
83 |       with:
84 |         repository-name: DoubleML/doubleml.github.io
85 |         branch: main
86 |         folder: doc/_build/html
87 |         target-folder: dev
88 |         git-config-name: DoubleML Deploy Bot
89 |         git-config-email: DoubleML@users.noreply.github.com
90 |         clean: true
91 |         ssh-key: ${{ secrets.DEPLOY_KEY }}
92 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/irm/irm_sensitivity.rst:
--------------------------------------------------------------------------------
 1 | In the :ref:`irm-model` the target parameter can be written as
 2 | 
 3 | .. math::
 4 | 
 5 |     \theta_0 = \mathbb{E}[(g_0(1,X) - g_0(0,X))\omega(Y,D,X)]
 6 | 
 7 | where :math:`\omega(Y,D,X)` are weights (e.g. set to :math:`1` for the ATE).
 8 | This implies the following representations
 9 | 
10 | .. math::
11 | 
12 |     m(W,g) &= \big(g(1,X) - g(0,X)\big)\omega(Y,D,X)
13 | 
14 |     \alpha(W) &= \bigg(\frac{D}{m(X)} - \frac{1-D}{1-m(X)}\bigg)  \mathbb{E}[\omega(Y,D,X)|X].
15 | 
16 | 
17 | .. note::
18 | 
19 |     In the :ref:`irm-model` with for the ATE (weights equal to :math:`1`), the form and interpretation of ``cf_y`` is the same as in the :ref:`plr-model`.
20 | 
21 |     - ``cf_y`` has the interpretation as the *nonparametric partial* :math:`R^2` *of* :math:`A` *with* :math:`Y` *given* :math:`(D,X)`
22 |     
23 |     .. math:: 
24 |         
25 |         \frac{\textrm{Var}(\mathbb{E}[Y|D,X,A]) - \textrm{Var}(\mathbb{E}[Y|D,X])}{\textrm{Var}(Y)-\textrm{Var}(\mathbb{E}[Y|D,X])}
26 |     
27 |     - ``cf_d`` takes the following form
28 |     
29 |     .. math:: 
30 |         
31 |         \small{\frac{\mathbb{E}\Big[\big(P(D=1|X,A)(1-P(D=1|X,A))\big)^{-1}\Big] - \mathbb{E}\Big[\big(P(D=1|X)(1-P(D=1|X))\big)^{-1}\Big]}{\mathbb{E}\Big[\big(P(D=1|X,A)(1-P(D=1|X,A))\big)^{-1}\Big]}}
32 | 
33 |     where the numerator measures the *gain in average conditional precision to predict* :math:`D` *by using* :math:`A` *in addition to* :math:`X`.
34 |     The denominator is the *average conditional precision to predict* :math:`D` *by using* :math:`A` *and* :math:`X`. Consequently ``cf_d`` measures the *relative gain in average conditional precision*.
35 | 
36 |     Remark that :math:`P(D=1|X,A)(1-P(D=1|X,A))` denotes the variance of the conditional distribution of :math:`D` given :math:`(X,A)`, such that the inverse measures the precision of
37 |     predicting :math:`D` conditional on :math:`(X,A)`.
38 |     
39 |     Since :math:`C_D^2=\frac{cf_d}{1 - cf_d}`, this corresponds to
40 | 
41 |     .. math:: 
42 | 
43 |         C_D^2= \small{\frac{\mathbb{E}\Big[\big(P(D=1|X,A)(1-P(D=1|X,A))\big)^{-1}\Big] - \mathbb{E}\Big[\big(P(D=1|X)(1-P(D=1|X))\big)^{-1}\Big]}{\mathbb{E}\Big[\big(P(D=1|X)(1-P(D=1|X))\big)^{-1}\Big]}}
44 |     
45 |     which has the same numerator but is instead relative to the *average conditional precision to predict* :math:`D` *by using only* :math:`X`.
46 | 
47 |     Including weights changes only the definition of ``cf_d`` to 
48 | 
49 |     .. math::
50 | 
51 |         \frac{\mathbb{E}\left[\frac{\mathbb{E}[\omega(Y,D,X)|X,A]^2}{P(D=1|X,A)(1-P(D=1|X,A))}\right] - \mathbb{E}\left[\frac{\mathbb{E}[\omega(Y,D,X)|X]^2}{P(D=1|X)(1-P(D=1|X))}\right]}{\mathbb{E}\left[\frac{\mathbb{E}[\omega(Y,D,X)|X,A]^2}{P(D=1|X,A)(1-P(D=1|X,A))}\right]}
52 | 
53 |     which has a interpretation as the *relative weighted gain in average conditional precision*.
54 | 
55 | The ``nuisance_elements`` are then computed with plug-in versions according to the general :ref:`sensitivity_implementation`. 
56 | For ``score='ATE'``, the weights are set to one 
57 | 
58 | .. math::
59 | 
60 |     \omega(Y,D,X) = 1,
61 | 
62 | wheras for ``score='ATTE'``
63 | 
64 | .. math::
65 | 
66 |     \omega(Y,D,X) = \frac{D}{\mathbb{E}[D]},
67 | 
68 | such that
69 | 
70 | .. math::
71 | 
72 |     \mathbb{E}[\omega(Y,D,X)|X] = \frac{m(X)}{\mathbb{E}[D]}.
73 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_aggregation.rst:
--------------------------------------------------------------------------------
 1 | The following section considers the aggregation of different :math:`ATT(\mathrm{g},t)` to summary measures based on `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
 2 | All implemented aggregation schemes take the form of a weighted average of the :math:`ATT(\mathrm{g},t)` estimates
 3 | 
 4 | .. math::
 5 |     \theta = \sum_{\mathrm{g}\in \mathcal{G}} \sum_{t=2}^{\mathcal{T}} \omega(\mathrm{g},t) \cdot ATT(\mathrm{g},t)
 6 | 
 7 | where :math:`\omega(\mathrm{g},t)` is a weight function based on the treatment group :math:`\mathrm{g}` and time period :math:`t`.
 8 | The aggragation schemes are implmented via the ``aggregate()`` method of the ``DoubleMLDIDMulti`` class.
 9 | 
10 | 
11 | .. tab-set::
12 | 
13 |     .. tab-item:: Python
14 |         :sync: py
15 | 
16 |         .. ipython:: python
17 |             :okwarning:
18 | 
19 |             import numpy as np
20 |             import doubleml as dml
21 |             from doubleml.did.datasets import make_did_CS2021
22 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
23 | 
24 |             np.random.seed(42)
25 |             df = make_did_CS2021(n_obs=500) 
26 |             dml_data = dml.data.DoubleMLPanelData(
27 |                 df,
28 |                 y_col="y",
29 |                 d_cols="d",
30 |                 id_col="id",
31 |                 t_col="t",
32 |                 x_cols=["Z1", "Z2", "Z3", "Z4"],
33 |                 datetime_unit="M"
34 |             )
35 |             dml_did_obj = dml.did.DoubleMLDIDMulti(
36 |                 obj_dml_data=dml_data,
37 |                 ml_g=RandomForestRegressor(min_samples_split=10),
38 |                 ml_m=RandomForestClassifier(min_samples_split=10),
39 |                 gt_combinations="standard",
40 |                 control_group="never_treated",
41 |             )
42 |             dml_did_obj.fit()
43 | 
44 |             agg_did_obj = dml_did_obj.aggregate(aggregation="group")
45 |             agg_did_obj.aggregated_frameworks.bootstrap()
46 |             print(agg_did_obj)
47 | 
48 | The method ``aggregate()`` requires the ``aggregation`` argument to be set to one of the following values:
49 | 
50 | * ``'group'``: aggregates :math:`ATT(\mathrm{g},t)` estimates by the treatment group :math:`\mathrm{g}`.
51 | * ``'time'``: aggregates :math:`ATT(\mathrm{g},t)` estimates by the time period :math:`t` (based on group size).
52 | * ``'eventstudy'``: aggregates :math:`ATT(\mathrm{g},t)` estimates based on time difference to first treatment assignment like an event study (based on group size).
53 | * ``dictionary``: a dictionary with values containing the aggregation weights (as ``numpy.ma.MaskedArray``).
54 | 
55 | .. warning::
56 |     Remark that ``'time'`` and ``'eventstudy'`` aggregation use internal group reweighting according to the total group size (e.g. the group decomposition should be relatively stable over time, as assumed in Assumption 2).
57 |     It can be helpful to check the aggregation weights as in the :ref:`example gallery <did_examplegallery>`.
58 | 
59 | .. note::
60 |     A more detailed example on effect aggregation is available in the :ref:`example gallery <did_examplegallery>`.
61 |     For a detailed discussion on different aggregation schemes, we refer to of `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
62 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_implementation.rst:
--------------------------------------------------------------------------------
 1 | To estimate the target parameter :math:`ATT(\mathrm{g},t_\text{eval})`, the implementation (both for panel data or repeated cross sections) is based on the following parameters:
 2 | 
 3 | * :math:`\mathrm{g}` is the first post-treatment period of interest, i.e. the treatment group.
 4 | * :math:`t_\text{pre}` is the pre-treatment period, i.e. the time period from which the conditional parallel trends are assumed.
 5 | * :math:`t_\text{eval}` is the time period of interest or evaluation period, i.e. the time period where the treatment effect is evaluated.
 6 | * :math:`\delta` is number of anticipation periods, i.e. the number of time periods for which units are assumed to anticipate the treatment.
 7 | 
 8 | 
 9 | Under the assumptions above the target parameter :math:`ATT(\mathrm{g},t_\text{eval})` can be estimated by choosing a suitable combination
10 | of :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval}, \delta)` if :math:`t_\text{eval} - t_\text{pre} \ge 1 + \delta`, i.e. the parallel trends are assumed to hold at least one period more than the anticipation period.
11 | 
12 | .. note::
13 |     The choice :math:`t_\text{pre}= \min(\mathrm{g},t_\text{eval}) -\delta-1` corresponds to the definition of :math:`ATT_{dr}(\mathrm{g},t_\text{eval};\delta)` from `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
14 | 
15 |     As an example, if the target parameter is the effect on the group receiving treatment in :math:`2006` but evaluated in :math:`2007` with an anticipation period of :math:`\delta=1`, then the pre-treatment period is :math:`2004`.
16 |     The parallel trend assumption is slightly stronger with anticipation as the trends have to parallel for a longer periods, i.e. :math:`ATT_{dr}(2006,2007;1)=ATT(2006,2004;2006)`.
17 | 
18 | In the following, we will omit the subscript :math:`\delta` in the notation of the nuisance functions and the control group (implicitly assuming :math:`\delta=0`).
19 | 
20 | For a given tuple :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` the target parameter :math:`ATT(\mathrm{g},t)` is estimated by solving the empirical version of the the following linear moment condition:
21 | 
22 | .. math::
23 |     ATT(\mathrm{g}, t_\text{pre}, t_\text{eval}):= -\frac{\mathbb{E}[\psi_b(W,\eta_0)]}{\mathbb{E}[\psi_a(W,\eta_0)]}
24 | 
25 | with nuisance elements :math:`\eta_0` which depend on the parameter combination :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` and score function :math:`\psi(W,\theta, \eta)` (for details, see :ref:`Panel Data Details <did-pa-model>` or :ref:`Repeated Cross-Section Details <did-cs-model>`).
26 | Under the identifying assumptions above 
27 | 
28 | .. math::
29 |     ATT(\mathrm{g}, t_\text{pre}, t_\text{eval}) = ATT(\mathrm{g},t).
30 | 
31 | ``DoubleMLDIDMulti`` implements the estimation of :math:`ATT(\mathrm{g}, t_\text{pre}, t_\text{eval})` for multiple time periods and requires :ref:`DoubleMLPanelData <dml_panel_data>` as input.
32 | 
33 | Setting ``gt_combinations='standard'`` will estimate the target parameter for all (possible) combinations of :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` with :math:`\mathrm{g}\in\{2,\dots,\mathcal{T}\}` and :math:`(t_\text{pre}, t_\text{eval})` with :math:`t_\text{eval}\in\{2,\dots,\mathcal{T}\}` and
34 | :math:`t_\text{pre}= \min(\mathrm{g},t_\text{eval}) -\delta-1`.
35 | This corresponds to the setting where all trends are set as short as possible, but still respecting the anticipation period.
36 | 


--------------------------------------------------------------------------------
/doc/guide/learners/python/external_preds.rst:
--------------------------------------------------------------------------------
 1 | Since there might be cases where the user wants to use a learner that is not supported by :ref:`DoubleML <doubleml_package>`
 2 | or do some extensive hyperparameter tuning, it is possible to use external predictions for the nuisance functions.
 3 | Remark that this requires the user to take care of the cross-fitting procedure and learner evaluation.
 4 | 
 5 | To illustrate the use of external predictions, we work with the following example.
 6 | 
 7 | .. tab-set::
 8 | 
 9 |   .. tab-item:: Python
10 |     :sync: py
11 | 
12 |     .. ipython:: python
13 | 
14 |       import numpy as np
15 |       import doubleml as dml
16 |       from doubleml.irm.datasets import make_irm_data
17 |       from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
18 | 
19 |       np.random.seed(3333)
20 |       data = make_irm_data(theta=0.5, n_obs=500, dim_x=10, return_type='DataFrame')
21 |       obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
22 | 
23 |       # DoubleML with interal predictions
24 |       ml_g = RandomForestRegressor(n_estimators=100, max_features=10, max_depth=5, min_samples_leaf=2)
25 |       ml_m = RandomForestClassifier(n_estimators=100, max_features=10, max_depth=5, min_samples_leaf=2)
26 |       dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
27 |       dml_irm_obj.fit()
28 |       print(dml_irm_obj.summary)
29 | 
30 | The :py:class:`doubleml.DoubleMLIRM` model class saves nuisance predictions in the ``predictions`` attribute as a nested dictionary.
31 | To rely on external predictions, the user has to provide a nested dictionary, where the outer level keys correspond to the treatment
32 | variable names and the inner level keys correspond to the nuisance learner names. Further the values have to be ``numpy`` arrays of shape
33 | ``(n_obs, n_rep)``. Here we generate an external predictions dictionary from the internal ``predictions`` attribute.
34 | 
35 | .. tab-set::
36 | 
37 |   .. tab-item:: Python
38 |     :sync: py
39 | 
40 |     .. ipython:: python
41 | 
42 |       pred_dict = {"d": {
43 |           "ml_g0": dml_irm_obj.predictions["ml_g0"][:, :, 0],
44 |           "ml_g1": dml_irm_obj.predictions["ml_g1"][:, :, 0],
45 |           "ml_m": dml_irm_obj.predictions["ml_m"][:, :, 0]
46 |           }               
47 |       }
48 | 
49 | The external predictions can be passed to the ``fit()`` method of the :py:class:`doubleml.DoubleML` class via the ``external_predictions`` argument.
50 | 
51 | .. tab-set::
52 | 
53 |   .. tab-item:: Python
54 |     :sync: py
55 | 
56 |     .. ipython:: python
57 |       
58 |       ml_g = dml.utils.DMLDummyRegressor()
59 |       ml_m = dml.utils.DMLDummyClassifier()
60 |       dml_irm_obj_ext = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
61 |       dml_irm_obj_ext.fit(external_predictions=pred_dict)
62 |       print(dml_irm_obj_ext.summary)
63 | 
64 | Both model have identical estimates. Remark that :py:class:`doubleml.DoubleML` class usually require learners for initialization.
65 | With external predictions these learners are not used. The ``DMLDummyRegressor`` and ``DMLDummyClassifier`` are dummy learners which
66 | are used to initialize the :py:class:`doubleml.DoubleML` class. Both dummy learners raise errors if specific methods are called to safeguard against
67 | undesired behavior. Further, the :py:class:`doubleml.DoubleMLData` class requires features (e.g. via the ``x_cols`` argument) which are not used. 
68 | This can be handled by adding a dummy column to the data.


--------------------------------------------------------------------------------
/doc/guide/models/did/did_pa.rst:
--------------------------------------------------------------------------------
 1 | For the estimation of the target parameters :math:`ATT(\mathrm{g},t)` the following nuisance functions are required:
 2 | 
 3 | .. math::
 4 |     \begin{align}
 5 |     g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval}, \delta}(X_i) &:= \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)} = 1], \\
 6 |     m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i) &:= P(G_i^{\mathrm{g}}=1|X_i, G_i^{\mathrm{g}} + C_{i,t_\text{eval} + \delta}^{(\cdot)}=1).
 7 |     \end{align}
 8 | 
 9 | where :math:`g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval},\delta}(\cdot)` denotes the population outcome change regression function and :math:`m_{0, \mathrm{g}, t_\text{eval} + \delta}(\cdot)` the generalized propensity score.
10 | 
11 | .. note::
12 |     Remark that the nuisance functions depend on the control group used for the estimation of the target parameter.
13 |     By slight abuse of notation we use the same notation for both control groups :math:`C_{i,t}^{(\text{nev})}` and :math:`C_{i,t}^{(\text{nyt})}`. More specifically, the
14 |     control group only depends on :math:`\delta` for *not yet treated* units.
15 | 
16 | For a given tuple :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` the target parameter :math:`ATT(\mathrm{g},t)` is estimated by solving the empirical version of the the following linear moment condition:
17 | 
18 | .. math::
19 |     ATT(\mathrm{g}, t_\text{pre}, t_\text{eval}):= -\frac{\mathbb{E}[\psi_b(W,\eta_0)]}{\mathbb{E}[\psi_a(W,\eta_0)]}
20 | 
21 | with nuisance elements :math:`\eta_0=(g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval}}, m_{0, \mathrm{g}, t_\text{eval}})` and score function :math:`\psi(W,\theta, \eta)` being defined in the :ref:`DiD Score Section<did-pa-score>`.
22 | 
23 | Estimation is conducted via its ``fit()`` method:
24 | 
25 | .. tab-set::
26 | 
27 |     .. tab-item:: Python
28 |         :sync: py
29 | 
30 |         .. ipython:: python
31 |             :okwarning:
32 | 
33 |             import numpy as np
34 |             import doubleml as dml
35 |             from doubleml.did.datasets import make_did_CS2021
36 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
37 | 
38 |             np.random.seed(42)
39 |             df = make_did_CS2021(n_obs=500) 
40 |             dml_data = dml.data.DoubleMLPanelData(
41 |                 df,
42 |                 y_col="y",
43 |                 d_cols="d",
44 |                 id_col="id",
45 |                 t_col="t",
46 |                 x_cols=["Z1", "Z2", "Z3", "Z4"],
47 |                 datetime_unit="M"
48 |             )
49 |             dml_did_obj = dml.did.DoubleMLDIDMulti(
50 |                 obj_dml_data=dml_data,
51 |                 ml_g=RandomForestRegressor(min_samples_split=10),
52 |                 ml_m=RandomForestClassifier(min_samples_split=10),
53 |                 gt_combinations="standard",
54 |                 control_group="never_treated",
55 |             )
56 |             print(dml_did_obj.fit())
57 | 
58 | .. note::
59 |     Remark that the output contains two different outcome regressions :math:`g(0,X)` and :math:`g(1,X)`. As in the :ref:`IRM model <irm-model>`
60 |     the outcome regression :math:`g(0,X)` refers to the control group, whereas :math:`g(1,X)` refers to the outcome change regression for the treatment group, i.e.
61 | 
62 |     .. math::
63 |         \begin{align}
64 |         g(0,X) &\approx g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval}, \delta}(X_i) = \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)} = 1],\\
65 |         g(1,X) &\approx \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, G_i^{\mathrm{g}} = 1].
66 |         \end{align}
67 | 
68 |     Further, :math:`g(1,X)` is only required for :ref:`Sensitivity Analysis <sensitivity-did-pa>` and is not used for the estimation of the target parameter.
69 | 
70 | .. note::
71 |     A more detailed example is available in the :ref:`Example Gallery <did_examplegallery>`.
72 | 


--------------------------------------------------------------------------------
/.devcontainer/build_image_guide.md:
--------------------------------------------------------------------------------
  1 | # Building and Publishing the Docker Image
  2 | 
  3 | This guide shows how to build the DoubleML documentation development container locally and publish it to Docker Hub.
  4 | 
  5 | ## Prerequisites
  6 | 
  7 | - [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed and running
  8 | - Access to the `svenklaassen` [Docker Hub](https://www.docker.com/products/docker-hub/) account
  9 | - [doubleml-docs](https://github.com/DoubleML/doubleml-docs) repository cloned to your local machine
 10 | 
 11 | ## Step 1: Login to Docker Hub
 12 | 
 13 | Open a terminal and login to Docker Hub:
 14 | 
 15 | ```bash
 16 | docker login
 17 | ```
 18 | 
 19 | Enter the Docker Hub username (`svenklaassen`) and password (or token) when prompted.
 20 | 
 21 | ## Step 2: Build the Docker Image
 22 | 
 23 | Navigate to your project root directory and build the image (using the `latest`-tag):
 24 | 
 25 | ```bash
 26 | docker build -t svenklaassen/doubleml-docs:latest -f .devcontainer/Dockerfile.dev .
 27 | ```
 28 | 
 29 | To force a complete rebuild without using cache:
 30 | 
 31 | ```bash
 32 | docker build --no-cache -t svenklaassen/doubleml-docs:latest -f .devcontainer/Dockerfile.dev .
 33 | ```
 34 | 
 35 | ## Step 3 (Optional): Verify the image
 36 | 
 37 | ### Open the repository in VS Code
 38 | 
 39 | 1. Ensure your `.devcontainer/devcontainer.json` is configured to use your local image:
 40 | 
 41 |     ```json
 42 |     "image": "svenklaassen/doubleml-docs:latest"
 43 |     ```
 44 |     Note: The `.devcontainer/devcontainer.json` file is configured to use the pre-built image. If you want to build the container from scratch, uncomment the `dockerFile` and `context` lines and comment out the `image` line.
 45 | 
 46 | 2. Open the `doubleml-docs` repository in VS Code:
 47 | 
 48 |    ```bash
 49 |    code /path/to/doubleml-docs
 50 |    ```
 51 | 
 52 | 3. Open the Command Palette (`Ctrl+Shift+P`) and select `Dev Containers: Reopen in Container`.
 53 |    VS Code will use your locally built image.
 54 | 
 55 | ### Build the documentation
 56 | 
 57 | Once inside the container, verify that you can successfully build the documentation:
 58 | 
 59 | 1. Open a terminal in VS Code (`Terminal > New Terminal`)
 60 | 
 61 | 2. Build the documentation:
 62 | 
 63 |    ```bash
 64 |    cd doc
 65 |    make html
 66 |    ```
 67 | 
 68 | 3. Check the output for any errors or warnings
 69 | 
 70 | 4. View the built documentation by opening the output files:
 71 | 
 72 |    ```bash
 73 |    # On Windows
 74 |    explorer.exe _build/html
 75 |    
 76 |    # On Linux
 77 |    xdg-open _build/html
 78 |    
 79 |    # On macOS
 80 |    open _build/html
 81 |    ```
 82 | 
 83 | If the documentation builds successfully and looks correct, your Docker image is working properly and ready to be pushed to Docker Hub.
 84 | 
 85 | ## Step 4: Push to Docker Hub
 86 | 
 87 | Push your built image to Docker Hub:
 88 | 
 89 | ```bash
 90 | docker push svenklaassen/doubleml-docs:latest
 91 | ```
 92 | 
 93 | ## Step 5: Using the Published Image
 94 | 
 95 | After publishing, there are two ways to use the image:
 96 | 
 97 | ### Option 1: Manual Container Management
 98 | Pull and run the container manually:
 99 | 
100 | ```bash
101 | docker pull svenklaassen/doubleml-docs:latest
102 | # Then run commands to create a container from this image
103 | ```
104 | 
105 | ### Option 2: VS Code Integration (Recommended)
106 | Simply reference the image in your `devcontainer.json` file:
107 | 
108 | ```json
109 | "image": "svenklaassen/doubleml-docs:latest"
110 | ```
111 | 
112 | VS Code will automatically pull the image when opening the project in a container - no separate `docker pull` command needed.
113 | 
114 | ## Troubleshooting
115 | 
116 | ### Clear Docker Cache
117 | 
118 | If you're experiencing issues with cached layers:
119 | 
120 | ```bash
121 | # Remove build cache
122 | docker builder prune
123 | 
124 | # For a more thorough cleanup
125 | docker system prune -a
126 | ```
127 | 
128 | ### Check Image Size
129 | 
130 | To verify the image size before pushing:
131 | 
132 | ```bash
133 | docker images svenklaassen/doubleml-docs
134 | ```


--------------------------------------------------------------------------------
/doc/guide/scores/did/did_cs_binary_score.rst:
--------------------------------------------------------------------------------
  1 | For the difference-in-differences model implemented in ``DoubleMLDIDCS`` one can choose between
  2 | ``score='observational'`` and ``score='experimental'``.
  3 | 
  4 | ``score='observational'`` implements the score function (dropping the unit index :math:`i`):
  5 | 
  6 | .. math::
  7 | 
  8 |     \psi(W,\theta,\eta) :=\; & - \frac{D}{\mathbb{E}_n[D]}\theta + \frac{D}{\mathbb{E}_n[D]}\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
  9 | 
 10 |     & + \frac{DT}{\mathbb{E}_n[DT]} (Y - g(1,1,X)) 
 11 | 
 12 |     & - \frac{D(1-T)}{\mathbb{E}_n[D(1-T)]}(Y - g(1,0,X))
 13 | 
 14 |     & - \frac{m(X) (1-D)T}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-D)T}{1-m(X)}\right]^{-1} (Y-g(0,1,X)) 
 15 | 
 16 |     & + \frac{m(X) (1-D)(1-T)}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-D)(1-T)}{1-m(X)}\right]^{-1} (Y-g(0,0,X))
 17 | 
 18 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
 19 | 
 20 | where the components of the linear score are
 21 | 
 22 | .. math::
 23 | 
 24 |     \psi_a(W; \eta) =\; &- \frac{D}{\mathbb{E}_n[D]},
 25 | 
 26 |     \psi_b(W; \eta) =\; &\frac{D}{\mathbb{E}_n[D]}\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 27 | 
 28 |     & + \frac{DT}{\mathbb{E}_n[DT]} (Y - g(1,1,X)) 
 29 | 
 30 |     & - \frac{D(1-T)}{\mathbb{E}_n[D(1-T)]}(Y - g(1,0,X))
 31 | 
 32 |     & - \frac{m(X) (1-D)T}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-D)T}{1-m(X)}\right]^{-1} (Y-g(0,1,X)) 
 33 | 
 34 |     & + \frac{m(X) (1-D)(1-T)}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-D)(1-T)}{1-m(X)}\right]^{-1} (Y-g(0,0,X))
 35 | 
 36 | and the nuisance elements :math:`\eta=(g)` are defined as
 37 | 
 38 | .. math::
 39 | 
 40 |     g_{0}(d, t, X) = \mathbb{E}[Y|D=d, T=t, X].
 41 | 
 42 | If ``in_sample_normalization='False'``, the score is set to
 43 | 
 44 | .. math::
 45 | 
 46 |     \psi(W,\theta,\eta) :=\; & - \frac{D}{p}\theta + \frac{D}{p}\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 47 | 
 48 |     & + \frac{DT}{p\lambda} (Y - g(1,1,X)) 
 49 | 
 50 |     & - \frac{D(1-T)}{p(1-\lambda)}(Y - g(1,0,X))
 51 | 
 52 |     & - \frac{m(X) (1-D)T}{p(1-m(X))\lambda} (Y-g(0,1,X)) 
 53 | 
 54 |     & + \frac{m(X) (1-D)(1-T)}{p(1-m(X))(1-\lambda)} (Y-g(0,0,X))
 55 | 
 56 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
 57 | 
 58 | with :math:`\eta=(g, p, \lambda)`, where :math:`p_0 = \mathbb{E}[D]` and :math:`\lambda_0 = \mathbb{E}[T]` are estimated on the whole sample.
 59 | Remark that this will result in a similar score, but just uses slightly different normalization.
 60 | 
 61 | ``score='experimental'`` assumes that the treatment probability is independent of the covariates :math:`X` and
 62 | implements the score function:
 63 | 
 64 | .. math::
 65 | 
 66 |     \psi(W,\theta,\eta) :=\; & - \theta + \Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 67 | 
 68 |     & + \frac{DT}{\mathbb{E}_n[DT]} (Y - g(1,1,X)) 
 69 | 
 70 |     & - \frac{D(1-T)}{\mathbb{E}_n[D(1-T)]}(Y - g(1,0,X))
 71 | 
 72 |     & - \frac{(1-D)T}{\mathbb{E}_n[(1-D)T]} (Y-g(0,1,X)) 
 73 | 
 74 |     & + \frac{(1-D)(1-T)}{\mathbb{E}_n[(1-D)(1-T)]} (Y-g(0,0,X))
 75 | 
 76 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
 77 | 
 78 | where the components of the linear score are
 79 | 
 80 | .. math::
 81 | 
 82 |     \psi_a(W; \eta) \;=  &- 1,
 83 | 
 84 |     \psi_b(W; \eta) \;= &\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 85 | 
 86 |     & + \frac{DT}{\mathbb{E}_n[DT]} (Y - g(1,1,X)) 
 87 | 
 88 |     & - \frac{D(1-T)}{\mathbb{E}_n[D(1-T)]}(Y - g(1,0,X))
 89 | 
 90 |     & - \frac{(1-D)T}{\mathbb{E}_n[(1-D)T]} (Y-g(0,1,X)) 
 91 | 
 92 |     & + \frac{(1-D)(1-T)}{\mathbb{E}_n[(1-D)(1-T)]} (Y-g(0,0,X))
 93 | 
 94 | and the nuisance elements :math:`\eta=(g, m)` are defined as
 95 | 
 96 | .. math::
 97 | 
 98 |     g_{0}(d, t, X) &= \mathbb{E}[Y|D=d, T=t, X]
 99 | 
100 |     m_0(X) &= P(D=1|X).
101 | 
102 | Analogously, if ``in_sample_normalization='False'``,  the score is set to
103 | 
104 | .. math::
105 | 
106 |     \psi(W,\theta,\eta) :=\; & - \theta + \Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
107 | 
108 |     & + \frac{DT}{p\lambda} (Y - g(1,1,X)) 
109 | 
110 |     & - \frac{D(1-T)}{p(1-\lambda)}(Y - g(1,0,X))
111 | 
112 |     & - \frac{(1-D)T}{(1-p)\lambda} (Y-g(0,1,X)) 
113 | 
114 |     & + \frac{(1-D)(1-T)}{(1-p)(1-\lambda)} (Y-g(0,0,X))
115 | 
116 |     =\; &\psi_a(W; \eta) \theta + \psi_b(W; \eta)
117 | 
118 | with :math:`\eta=(g, m, p, \lambda)`, where :math:`p_0 = \mathbb{E}[D]` and :math:`\lambda_0 = \mathbb{E}[T]` are estimated on the whole sample.
119 | Remark that this will result in a similar score, but just uses slightly different normalization.
120 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_binary.rst:
--------------------------------------------------------------------------------
 1 | **Difference-in-Differences Models (DID)** implemented in the package focus on the the binary treatment case with
 2 | with two treatment periods.
 3 | 
 4 | Adopting the notation from `Sant'Anna and Zhao (2020) <https://doi.org/10.1016/j.jeconom.2020.06.003>`_, 
 5 | let :math:`Y_{it}` be the outcome of interest for unit :math:`i` at time :math:`t`. Further, let :math:`D_{it}=1` indicate 
 6 | if unit :math:`i` is treated before time :math:`t` (otherwise :math:`D_{it}=0`). Since all units start as untreated (:math:`D_{i0}=0`), define 
 7 | :math:`D_{i}=D_{i1}.` Relying on the potential outcome notation, denote :math:`Y_{it}(0)` as the outcome of unit :math:`i` at time :math:`t` if the unit did not receive 
 8 | treatment up until time :math:`t` and analogously for :math:`Y_{it}(1)` with treatment. Consequently, the observed outcome 
 9 | for unit is :math:`i` at time :math:`t` is :math:`Y_{it}=D_{it} Y_{it}(1) + (1-D_{it}) Y_{it}(0)`. Further, let 
10 | :math:`X_i` be a vector of pre-treatment covariates.
11 | 
12 | Target parameter of interest is the average treatment effect on the treated (ATTE)
13 | 
14 | .. math::
15 | 
16 |     \theta_0 = \mathbb{E}[Y_{i1}(1)- Y_{i1}(0)|D_i=1].
17 | 
18 | The corresponding identifying assumptions are
19 | 
20 | - **(Cond.) Parallel Trends:** :math:`\mathbb{E}[Y_{i1}(0) - Y_{i0}(0)|X_i, D_i=1] = \mathbb{E}[Y_{i1}(0) - Y_{i0}(0)|X_i, D_i=0]\quad a.s.`
21 | - **Overlap:** :math:`\exists\epsilon > 0`: :math:`P(D_i=1) > \epsilon` and :math:`P(D_i=1|X_i) \le 1-\epsilon\quad a.s.`
22 | 
23 | .. note::
24 |     For a more detailed introduction and recent developments of the difference-in-differences literature see e.g. `Roth et al. (2022) <https://arxiv.org/abs/2201.01194>`_.
25 | 
26 | 
27 | Panel Data
28 | ~~~~~~~~~~~
29 | 
30 | If panel data are available, the observations are assumed to be iid. of form :math:`(Y_{i0}, Y_{i1}, D_i, X_i)`.
31 | Remark that the difference :math:`\Delta Y_i= Y_{i1}-Y_{i0}` has to be defined as the outcome ``y`` in the ``DoubleMLData`` object.
32 | 
33 | ``DoubleMLIDID`` implements difference-in-differences models for panel data.
34 | Estimation is conducted via its ``fit()`` method:
35 | 
36 | .. tab-set::
37 | 
38 |     .. tab-item:: Python
39 |         :sync: py
40 | 
41 |         .. ipython:: python
42 |             :okwarning:
43 | 
44 |             import numpy as np
45 |             import doubleml as dml
46 |             from doubleml.did.datasets import make_did_SZ2020
47 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
48 | 
49 |             ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
50 |             ml_m = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=5)
51 |             np.random.seed(42)
52 |             data = make_did_SZ2020(n_obs=500, return_type='DataFrame') 
53 |             # y is already defined as the difference of observed outcomes
54 |             obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd')
55 |             dml_did_obj = dml.DoubleMLDID(obj_dml_data, ml_g, ml_m)
56 |             print(dml_did_obj.fit())
57 | 
58 | 
59 | Repeated cross-sections
60 | ~~~~~~~~~~~~~~~~~~~~~~~~~
61 | 
62 | For repeated cross-sections, the observations are assumed to be iid. of form :math:`(Y_{i}, D_i, X_i, T_i)`,
63 | where :math:`T_i` is a dummy variable if unit :math:`i` is observed pre- or post-treatment period, such 
64 | that the observed outcome can be defined as 
65 | 
66 | .. math::
67 | 
68 |     Y_i = T_i Y_{i1} + (1-T_i) Y_{i0}.
69 | 
70 | Further, treatment and covariates are assumed to be stationary, such that the joint distribution of :math:`(D,X)` is invariant to :math:`T`.
71 | 
72 | ``DoubleMLIDIDCS`` implements difference-in-differences models for repeated cross-sections.
73 | Estimation is conducted via its ``fit()`` method:
74 | 
75 | .. tab-set::
76 | 
77 |     .. tab-item:: Python
78 |         :sync: py
79 | 
80 |         .. ipython:: python
81 |             :okwarning:
82 | 
83 |             import numpy as np
84 |             import doubleml as dml
85 |             from doubleml.did.datasets import make_did_SZ2020
86 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
87 | 
88 |             ml_g = RandomForestRegressor(n_estimators=100, max_depth=5, min_samples_leaf=5)
89 |             ml_m = RandomForestClassifier(n_estimators=100, max_depth=5, min_samples_leaf=5)
90 |             np.random.seed(42)
91 |             data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type='DataFrame')
92 |             obj_dml_data = dml.DoubleMLDIDData(data, 'y', 'd', t_col='t')
93 |             dml_did_obj = dml.DoubleMLDIDCS(obj_dml_data, ml_g, ml_m)
94 |             print(dml_did_obj.fit())
95 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_cs.rst:
--------------------------------------------------------------------------------
 1 | For the estimation of the target parameters :math:`ATT(\mathrm{g},t)` the following nuisance functions are required:
 2 | 
 3 | .. math::
 4 |     \begin{align}
 5 |     g^{\text{treat}}_{0,\mathrm{g}, t, \text{eval} + \delta}(X_i) &:= \mathbb{E}[Y_{i,t} |X_i, G_i^{\mathrm{g}}=1, T_i=t], \\
 6 |     g^{\text{control}}_{0,\mathrm{g}, t, \text{eval} + \delta}(X_i) &:= \mathbb{E}[Y_{i,t} |X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)}=1, T_i=t], \\
 7 |     m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i) &:= P(G_i^{\mathrm{g}}=1|X_i, G_i^{\mathrm{g}} + C_{i,t_\text{eval} + \delta}^{(\cdot)}=1).
 8 |     \end{align}
 9 | 
10 | for :math:`t\in\{t_\text{pre}, t_\text{eval}\}`.
11 | Here, :math:`g^{(\cdot)}_{\mathrm{g}, t, \text{eval} + \delta}(\cdot)` denotes the population outcome regression function (for either treatment or control group at time period :math:`t`) and :math:`m_{0, \mathrm{g}, t_\text{eval} + \delta}(\cdot)` the generalized propensity score.
12 | 
13 | .. note::
14 |     Remark that the nuisance functions depend on the control group used for the estimation of the target parameter.
15 |     By slight abuse of notation we use the same notation for both control groups :math:`C_{i,t}^{(\text{nev})}` and :math:`C_{i,t}^{(\text{nyt})}`. More specifically, the
16 |     control group only depends on :math:`\delta` for *not yet treated* units.
17 | 
18 | For a given tuple :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` the target parameter :math:`ATT(\mathrm{g},t)` is estimated by solving the empirical version of the the following linear moment condition:
19 | 
20 | .. math::
21 |     ATT(\mathrm{g}, t_\text{pre}, t_\text{eval}):= -\frac{\mathbb{E}[\psi_b(W,\eta_0)]}{\mathbb{E}[\psi_a(W,\eta_0)]}
22 | 
23 | with nuisance elements :math:`\eta_0=(g^{0,\text{treat}}_{\mathrm{g}, t_\text{pre}, t_\text{eval} + \delta}, g^{0,\text{control}}_{\mathrm{g}, t_\text{pre}, t_\text{eval} + \delta}, g^{0,\text{treat}}_{\mathrm{g}, t_\text{eval}, t_\text{eval} + \delta}, g^{0,\text{control}}_{\mathrm{g}, t_\text{eval}, t_\text{eval} + \delta}, m_{0, \mathrm{g}, t_\text{eval}})` and score function :math:`\psi(W,\theta, \eta)` defined in the :ref:`DiD Score Section<did-cs-score>`.
24 | 
25 | Setting ``panel=False`` will estimate the target parameter for repeated cross sections. Estimation is conducted via its ``fit()`` method:
26 | 
27 | .. tab-set::
28 | 
29 |     .. tab-item:: Python
30 |         :sync: py
31 | 
32 |         .. ipython:: python
33 |             :okwarning:
34 | 
35 |             import numpy as np
36 |             import doubleml as dml
37 |             from doubleml.did.datasets import make_did_cs_CS2021
38 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
39 | 
40 |             np.random.seed(42)
41 |             df = make_did_cs_CS2021(n_obs=500) 
42 |             dml_data = dml.data.DoubleMLPanelData(
43 |                 df,
44 |                 y_col="y",
45 |                 d_cols="d",
46 |                 id_col="id",
47 |                 t_col="t",
48 |                 x_cols=["Z1", "Z2", "Z3", "Z4"],
49 |                 datetime_unit="M"
50 |             )
51 |             dml_did_obj = dml.did.DoubleMLDIDMulti(
52 |                 obj_dml_data=dml_data,
53 |                 ml_g=RandomForestRegressor(min_samples_split=10),
54 |                 ml_m=RandomForestClassifier(min_samples_split=10),
55 |                 gt_combinations="standard",
56 |                 control_group="never_treated",
57 |                 panel=False,
58 |             )
59 |             print(dml_did_obj.fit())
60 | 
61 | .. note::
62 |     Remark that the output contains four different outcome regressions :math:`g(d,t, X)` for :math:`d,t\in\{0,1\}` . As in the :ref:`IRM model <irm-model>`
63 |     the outcome regression with :math:`d=0` refers to the control group, whereas :math:`t=0` refers to the pre-treatment period, i.e.
64 | 
65 |     .. math::
66 |         \begin{align}
67 |         g(0,0,X) &\approx g^{\text{control}}_{0,\mathrm{g}, t_\text{pre}, \text{eval} + \delta}(X_i) = \mathbb{E}[Y_{i,t_\text{pre}} |X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)}=1, T_i=t_\text{pre}],\\
68 |         g(0,1,X) &\approx g^{\text{control}}_{0,\mathrm{g}, t_\text{eval}, \text{eval} + \delta}(X_i) = \mathbb{E}[Y_{i,t_\text{eval}} |X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)}=1, T_i=t_\text{eval}],\\
69 |         g(1,0,X) &\approx g^{\text{treat}}_{0,\mathrm{g}, t_\text{pre}, \text{eval} + \delta}(X_i) = \mathbb{E}[Y_{i,t_\text{pre}} |X_i, G_i^{\mathrm{g}}=1,, T_i=t_\text{pre}],\\
70 |         g(1,1,X) &\approx g^{\text{treat}}_{0,\mathrm{g}, t_\text{eval}, \text{eval} + \delta}(X_i) = \mathbb{E}[Y_{i,t_\text{eval}} |X_i, G_i^{\mathrm{g}}=1,, T_i=t_\text{eval}].
71 |         \end{align}
72 | 
73 | .. note::
74 |     A more detailed example is available in the :ref:`Example Gallery <did_examplegallery>`.


--------------------------------------------------------------------------------
/doc/guide/sensitivity/benchmarking.rst:
--------------------------------------------------------------------------------
 1 | The input parameters for the sensitivity analysis are quite hard to interpret (depending on the model). Consequently it is challenging to come up with reasonable bounds 
 2 | for the confounding strength ``cf_y`` and ``cf_d`` (and ``rho``). To get a grasp on the magnitude of the bounds a popular approach is to rely on observed confounders
 3 | to obtain an informed guess on the strength of possibly unobserved confounders.
 4 | 
 5 | The underlying principle is relatively simple. If we have an observed confounder :math:`X_1`, we are able to emulate omitted confounding by purposely omitting
 6 | :math:`X_1` and refitting the whole model. This enables us to compare the "long" and "short" form with and without omitted confounding.
 7 | Considering the ``sensitivity_params`` of both models one can estimate the corresponding strength of confounding ``cf_y`` and ``cf_d`` (and ``rho``).
 8 | 
 9 | .. note::
10 | 
11 |  - The benchmarking can also be done with a set of benchmarking variables (e.g. :math:`X_1, X_2, X_3`), which tries to emulate the effect of multiple unobserved confounders.
12 |  - The approach is quite computationally demanding, as the short model that omits the benchmark variables has to be fitted.
13 | 
14 | The ``sensitivity_benchmark()`` method implements this approach. 
15 | The method just requires a set of valid covariates, the ``benchmarking_set``, to compute the benchmark. The benchmark variables have to be a subset of the covariates used in the main analysis.
16 | 
17 | .. tab-set::
18 | 
19 |     .. tab-item:: Python
20 |         :sync: py
21 | 
22 |         .. ipython:: python
23 |             
24 |             dml_plr_obj.sensitivity_benchmark(benchmarking_set=["X1"])
25 | 
26 | The method returns a :py:class:`pandas.DataFrame`, containing the benchmarked values for ``cf_y``, ``cf_d``,  ``rho`` and the change in the estimates
27 | ``delta_theta``.
28 | 
29 | .. note::
30 | 
31 |  -  The benchmarking results should be used to get an idea of the magnitude/validity of proposed confounding strength of the omitted confounders. Whether these values are close to the real confounding, depends entirely on the 
32 |     setting and choice of the benchmarking variables. A good benchmarking set has a strong justification which refers to the omitted confounders.
33 |  -  If the benchmarking variables are only weak confounders, the estimates of ``rho`` can be slightly unstable (due to small denominators).
34 | 
35 | The implementation is based on `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_ Appendix D and corresponds to a generalization of 
36 | the benchmarking process in the `Sensemakr package <https://github.com/carloscinelli/sensemakr>`_ for regression models to the use with double machine learning.
37 | For an introduction to Sensemakr see `Cinelli and Hazlett (2020) <https://doi.org/10.1111/rssb.12348>`_ and the `Sensemakr introduction <https://cran.r-project.org/web/packages/sensemakr/vignettes/sensemakr.html>`_.
38 | 
39 | The benchmarked estimates are the following:
40 | 
41 | Let the subscript :math:`short`, denote the "short" form of the model, where the benchmarking variables are omitted.
42 | 
43 | - :math:`\hat{\sigma}^2_{short}` denotes the variance of the outcome regression in the "short" form.
44 | - :math:`\hat{\nu}^2_{short}` denotes the second moment of the Riesz representer in the "short" form.
45 | 
46 | Both parameters are contained in the ``sensitivity_params`` of the "short" form.
47 | This enables the following estimation of the nonparametric :math:`R^2`'s of the outcome regression
48 | 
49 | - :math:`\hat{R}^2:= 1 - \frac{\hat{\sigma}^2}{\textrm{Var}(Y)}`
50 | - :math:`\hat{R}^2_{short}:= 1 - \frac{\hat{\sigma}^2_{short}}{\textrm{Var}(Y)}`
51 | 
52 | and the correlation ratio of the estimated Riesz representations
53 | 
54 | .. math::
55 | 
56 |     \hat{R}^2_{\alpha}:= \frac{\hat{\nu}^2_{short}}{\hat{\nu}^2}.
57 | 
58 | The benchmarked estimates are then defined as
59 | 
60 | - ``cf_y``:math:`:=\frac{\hat{R}^2 - \hat{R}^2_{short}}{1 - \hat{R}^2}`  measures the proportion of residual variance in the outcome :math:`Y` explained by adding the purposely omitted ``benchmarking_set``
61 | 
62 | - ``cf_d``:math:`:=\frac{1 - \hat{R}^2_{\alpha}}{\hat{R}^2_{\alpha}}` measures the proportional gain in variation that the ``benchmarking_set`` creates in the Riesz representer
63 | 
64 | Further, the degree of adversity :math:`\rho` can be estimated via
65 | 
66 | .. math::
67 | 
68 |     \hat{\rho} := \frac{\hat{\theta}_{short} - \hat{\theta}}{ \sqrt{(\hat{\sigma}^2_{short} - \hat{\sigma}^2)(\hat{\nu}^2 - \hat{\nu}^2_{short})}}.
69 | 
70 | 
71 | For a more detailed description, see `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_ Appendix D.
72 | 
73 | .. note::
74 |     - As benchmarking requires the estimation of a seperate model, the use with external predictions is generally not possible, without supplying further predictions.


--------------------------------------------------------------------------------
/.github/workflows/test_build_docu_dev.yml:
--------------------------------------------------------------------------------
  1 | # Workflow based on https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml
  2 | 
  3 | name: Test Docu Build (with dev pkgs)
  4 | 
  5 | on:
  6 |   push:
  7 |     branches:
  8 |       - main
  9 |   pull_request:
 10 |     branches:
 11 |       - main
 12 |       - dev
 13 |   workflow_dispatch:
 14 |     inputs:
 15 |       doubleml-py-branch:
 16 |         description: 'Branch in https://github.com/DoubleML/doubleml-for-py'
 17 |         required: true
 18 |         default: 'main'
 19 |       doubleml-r-branch:
 20 |         description: 'Branch in https://github.com/DoubleML/doubleml-for-r'
 21 |         required: true
 22 |         default: 'main'
 23 |       nbsphinx-execute:
 24 |         description: 'Execute notebooks with nbsphinx'
 25 |         required: false
 26 |         default: 'auto'
 27 |   schedule:
 28 |     - cron: "0 9 * * 1,3,5"
 29 | 
 30 | 
 31 | jobs:
 32 |   build:
 33 | 
 34 |     runs-on: ubuntu-22.04
 35 |     env:
 36 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 37 | 
 38 |     steps:
 39 |     - name: Check out the repo containing the docu source
 40 |       uses: actions/checkout@v4
 41 | 
 42 |     - name: Check out the repo containing the python pkg DoubleML (dev)
 43 |       if: ${{ github.event_name != 'workflow_dispatch' }}
 44 |       uses: actions/checkout@v4
 45 |       with:
 46 |         repository: DoubleML/doubleml-for-py
 47 |         path: doubleml-for-py
 48 | 
 49 |     - name: Check out the repo containing the python pkg DoubleML (dev)
 50 |       if: ${{ github.event_name == 'workflow_dispatch' }}
 51 |       uses: actions/checkout@v4
 52 |       with:
 53 |         repository: DoubleML/doubleml-for-py
 54 |         path: doubleml-for-py
 55 |         ref: ${{ github.event.inputs.doubleml-py-branch }}
 56 | 
 57 |     - name: Install graphviz
 58 |       run: sudo apt-get install graphviz
 59 | 
 60 |     - name: Install python
 61 |       uses: actions/setup-python@v5
 62 |       with:
 63 |         python-version: '3.12'
 64 |     - name: Install dependencies and the python package
 65 |       run: |
 66 |         python -m pip install --upgrade pip
 67 |         pip install -r requirements.txt
 68 |         pip uninstall -y DoubleML
 69 |         cd doubleml-for-py
 70 |         pip install -e .[rdd]
 71 | 
 72 |     - name: Add R repository
 73 |       run: |
 74 |         sudo apt install dirmngr gnupg apt-transport-https ca-certificates software-properties-common
 75 |         sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9
 76 |         sudo add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
 77 |     - name: Install R
 78 |       run: |
 79 |         sudo apt-get update
 80 |         sudo apt-get install r-base
 81 |         sudo apt-get install r-base-dev
 82 |         sudo apt-get install -y zlib1g-dev libicu-dev pandoc make libcurl4-openssl-dev libssl-dev
 83 | 
 84 |     - name: Get user library folder
 85 |       run: |
 86 |         mkdir ${GITHUB_WORKSPACE}/tmp_r_libs_user
 87 |         echo R_LIBS_USER=${GITHUB_WORKSPACE}/tmp_r_libs_user >> $GITHUB_ENV
 88 | 
 89 |     - name: Query R version
 90 |       run: |
 91 |         writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
 92 |       shell: Rscript {0}
 93 | 
 94 |     - name: Cache R packages
 95 |       uses: actions/cache@v4
 96 |       with:
 97 |         path: ${{ env.R_LIBS_USER }}
 98 |         key: doubleml-test-build-dev-${{ hashFiles('.github/R-version') }}
 99 | 
100 |     - name: Install R kernel for Jupyter and the R package DoubleML (dev)
101 |       if: ${{ github.event_name != 'workflow_dispatch' }}
102 |       run: |
103 |         install.packages('remotes')
104 |         remotes::install_github('DoubleML/doubleml-for-r', dependencies = TRUE)
105 |         install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', "igraph", "mlr3filters", "mlr3measures", "did"))
106 |         IRkernel::installspec()
107 |       shell: Rscript {0}
108 | 
109 |     - name: Install R kernel for Jupyter and the R package DoubleML (dev)
110 |       if: ${{ github.event_name == 'workflow_dispatch' }}
111 |       run: |
112 |         install.packages('remotes')
113 |         remotes::install_github('DoubleML/doubleml-for-r@${{ github.event.inputs.doubleml-r-branch }}', dependencies = TRUE)
114 |         install.packages(c('ggplot2', 'IRkernel', 'xgboost', 'hdm', 'reshape2', 'gridExtra', "igraph", "mlr3filters", "mlr3measures", "did"))
115 |         IRkernel::installspec()
116 |       shell: Rscript {0}
117 | 
118 |     - name: Build docu with sphinx
119 |       run: |
120 |         make -C doc html NBSPHINX_EXECUTE=${{ github.event.inputs.nbsphinx-execute || 'auto' }}
121 | 
122 |     - name: Check for broken links / URLs
123 |       run: |
124 |         make -C doc linkcheck
125 | 
126 |     - name: Upload html artifacts
127 |       uses: actions/upload-artifact@v4
128 |       with:
129 |         name: build_html
130 |         path: doc/_build/html/
131 | 


--------------------------------------------------------------------------------
/doc/guide/scores/did/did_pa_score.rst:
--------------------------------------------------------------------------------
  1 | As in the description of the :ref:`DiD model <did-pa-model>`, the required nuisance elements are
  2 | 
  3 | .. math::
  4 |     \begin{align}
  5 |     g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval}, \delta}(X_i) &:= \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)} = 1], \\
  6 |     m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i) &:= P(G_i^{\mathrm{g}}=1|X_i, G_i^{\mathrm{g}} + C_{i,t_\text{eval} + \delta}^{(\cdot)}=1).
  7 |     \end{align}
  8 | 
  9 | for a certain choice of :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` and :math:`\delta` and control group :math:`C_{i,t_\text{eval} + \delta}^{(\cdot)}`.
 10 | 
 11 | For notational purposes, we will omit the subscripts :math:`\mathrm{g}, t_\text{pre}, t_\text{eval}, \delta` in the following and use the notation 
 12 | 
 13 | * :math:`g_0(0, X_i)\equiv g_{0, \mathrm{g}, t_\text{pre}, t_\text{eval}, \delta}(X_i)` (population outcome change regression function of the control group)
 14 | * :math:`m_0(X_i)\equiv m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i)` (generalized propensity score)
 15 | 
 16 | All scores in the multi-period setting have the form 
 17 | 
 18 | .. math::
 19 | 
 20 |     \psi(W_i,\theta, \eta) := 
 21 |     \begin{cases}
 22 |     \tilde{\psi}(W_i,\theta, \eta) & \text{for } G_i^{\mathrm{g}} \vee C_{i,t_\text{eval} + \delta}^{(\cdot)}=1 \\
 23 |     0 & \text{otherwise}
 24 |     \end{cases}
 25 | 
 26 | i.e. the score is only non-zero for units in the corresponding treatment group :math:`\mathrm{g}` and control group :math:`C_{i,t_\text{eval} + \delta}^{(\cdot)}`.
 27 | 
 28 | For the difference-in-differences model implemented in ``DoubleMLDIDMulti`` one can choose between
 29 | ``score='observational'`` and ``score='experimental'``.
 30 | 
 31 | ``score='observational'`` implements the score function (dropping the unit index :math:`i`):
 32 | 
 33 | .. math::
 34 | 
 35 |     \tilde{\psi}(W,\theta, \eta) 
 36 |     :&= -\frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\theta + \left(\frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]} - \frac{\frac{m(X) (1-G^{\mathrm{g}})}{1-m(X)}}{\mathbb{E}_n\left[\frac{m(X) (1-G^{\mathrm{g}})}{1-m(X)}\right]}\right) \left(Y_{t_\text{eval}} - Y_{t_\text{pre}} - g(0,X)\right)
 37 | 
 38 |     &= \tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 39 | 
 40 | where the components of the final linear score :math:`\psi` are
 41 | 
 42 | .. math::
 43 |     \psi_a(W; \eta) &=  \tilde{\psi}_a(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}),
 44 | 
 45 |     \psi_b(W; \eta) &= \tilde{\psi}_b(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
 46 | 
 47 | and the nuisance elements :math:`\eta=(g, m)`.
 48 | 
 49 | .. note::
 50 |     Remark that :math:`1-G^{\mathrm{g}}=C^{(\cdot)}` if :math:`G^{\mathrm{g}} \vee C_{t_\text{eval} + \delta}^{(\cdot)}=1`.
 51 | 
 52 | If ``in_sample_normalization='False'``, the score is set to
 53 | 
 54 | .. math::
 55 | 
 56 |     \tilde{\psi}(W,\theta,\eta) &= - \frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\theta + \frac{G^{\mathrm{g}} - m(X)}{\mathbb{E}_n[G^{\mathrm{g}}](1-m(X))}\left(Y_{t_\text{eval}} - Y_{t_\text{pre}} - g(0,X)\right)
 57 | 
 58 |     &= \tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 59 | 
 60 | with :math:`\eta=(g, m)`.
 61 | Remark that this will result in the same score, but just uses slightly different normalization.
 62 | 
 63 | ``score='experimental'`` assumes that the treatment probability is independent of the covariates :math:`X` and does not rely on the propensity score. Instead define
 64 | the population outcome regression for treated and control group as
 65 | 
 66 | * :math:`g_0(0, X_i)\equiv \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)} = 1]` (control group)
 67 | * :math:`g_0(1, X_i)\equiv \mathbb{E}[Y_{i,t_\text{eval}} - Y_{i,t_\text{pre}}|X_i, G_i^{\mathrm{g}} = 1]` (treated group)
 68 | 
 69 | ``score='experimental'`` implements the score function:
 70 | 
 71 | .. math::
 72 | 
 73 |     \tilde{\psi}(W,\theta, \eta) 
 74 |     :=\; &-\theta + \left(\frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]} - \frac{1-G^{\mathrm{g}}}{\mathbb{E}_n[1-G^{\mathrm{g}}]}\right)\left(Y_{t_\text{eval}} - Y_{t_\text{pre}} - g(0,X)\right)
 75 | 
 76 |     &+ \left(1 - \frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\right) \left(g(1,X) - g(0,X)\right)
 77 | 
 78 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 79 | 
 80 | where the components of the final linear score :math:`\psi` are
 81 | 
 82 | .. math::
 83 |     \psi_a(W; \eta) &=  \tilde{\psi}_a(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}),
 84 | 
 85 |     \psi_b(W; \eta) &= \tilde{\psi}_b(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
 86 | 
 87 | and the nuisance elements :math:`\eta=(g)`.
 88 | 
 89 | Analogously, if ``in_sample_normalization='False'``,  the score is set to
 90 | 
 91 | .. math::
 92 | 
 93 |     \tilde{\psi}(W,\theta, \eta) 
 94 |     :=\; &-\theta +  \frac{G^{\mathrm{g}} - \mathbb{E}_n[G^{\mathrm{g}}]}{\mathbb{E}_n[G^{\mathrm{g}}](1-\mathbb{E}_n[G^{\mathrm{g}}])}\left(Y_{t_\text{eval}} - Y_{t_\text{pre}} - g(0,X)\right)
 95 | 
 96 |     &+ \left(1 - \frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\right) \left(g(1,X) - g(0,X)\right)
 97 | 
 98 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 99 | 
100 | with :math:`\eta=(g)`.
101 | Remark that this will result in the same score, but just uses slightly different normalization.
102 | 


--------------------------------------------------------------------------------
/doc/guide/models/did/did_setup.rst:
--------------------------------------------------------------------------------
 1 | **Difference-in-Differences Models (DID)** implemented in the package focus on the the binary treatment case with staggered adoption.
 2 | 
 3 | .. note::
 4 |     The notation and identifying assumptions are based on `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_, but adjusted to better fit into the general package documentation conventions, sometimes slightly abusing notation.
 5 |     The underlying score functions are based on `Sant'Anna and Zhao (2020) <https://doi.org/10.1016/j.jeconom.2020.06.003>`_, `Zimmert (2018) <https://arxiv.org/abs/1809.01643>`_ and `Chang (2020) <https://doi.org/10.1093/ectj/utaa001>`_.
 6 |     For a more detailed introduction and recent developments of the difference-in-differences literature see e.g. `Roth et al. (2022) <https://arxiv.org/abs/2201.01194>`_.
 7 | 
 8 | We consider :math:`n` observed units at time periods :math:`t=1,\dots, \mathcal{T}`.
 9 | The treatment status for unit :math:`i` at time period :math:`t` is denoted by the binary variable :math:`D_{i,t}=1`. The package considers the staggered adoption setting,
10 | where a unit stays treated after it has been treated once (*Irreversibility of Treatment*).
11 | 
12 | Let :math:`G^{\mathrm{g}}_i` be an indicator variable that takes value one if unit :math:`i` is treated at time period :math:`t=\mathrm{g}`, :math:`G^{\mathrm{g}}_i=1\{G_i=\mathrm{g}\}` with :math:`G_i` refering to the first post-treatment period.
13 | I units are never exposed to the treatment, define :math:`G_i=\infty`.
14 | 
15 | The target parameters are defined in terms of differences in potential outcomes. The observed and potential outcome for each unit :math:`i` at time period :math:`t` are assumed to be of the form
16 | 
17 | .. math::
18 |     Y_{i,t} = Y_{i,t}(0) + \sum_{\mathrm{g}=2}^{\mathcal{T}} (Y_{i,t}(\mathrm{g}) - Y_{i,t}(0)) \cdot G^{\mathrm{g}}_i,
19 | 
20 | such that we observe one consistent potential outcome for each unit at each time period.
21 | 
22 | The corresponding target parameters are the average causal effects of the treatment 
23 | 
24 | .. math::
25 |     ATT(\mathrm{g},t):= \mathbb{E}[Y_{i,t}(\mathrm{g}) - Y_{i,t}(0)|G^{\mathrm{g}}_i=1].
26 | 
27 | This target parameter quantifies the average change in potential outcomes for units that are treated the first time in period :math:`\mathrm{g}` with the difference in outcome being evaluated for time period :math:`t`.
28 | The corresponding control groups, defined by an indicator :math:`C`, can be typically set as either the *never treated* or *not yet treated* units.
29 | Let
30 | 
31 | .. math::
32 |     \begin{align}
33 |     C_{i,t}^{(\text{nev})} \equiv C_{i}^{(\text{nev})} &:= 1\{G_i=\infty\} \quad \text{(never treated)}, \\
34 |     C_{i,t}^{(\text{nyt})} &:= 1\{G_i > t\} \quad \text{(not yet treated)}.
35 |     \end{align}
36 | 
37 | The corresponding identifying assumptions are:
38 | 
39 | 1. **Irreversibility of Treatment:** 
40 |     :math:`D_{i,1} = 0 \quad a.s.`
41 |     For all :math:`t=2,\dots,\mathcal{T}`, :math:`D_{i,t-1} = 1` implies :math:`D_{i,t} = 1 \quad a.s.`
42 | 
43 | 2. **Data:**
44 |     The observed data are generated according to the following mechanisms:
45 | 
46 |     a. **Panel Data (Random Sampling):** 
47 |     The sample :math:`(Y_{i,1},\dots, Y_{i,\mathcal{T}}, X_i, D_{i,1}, \dots, D_{i,\mathcal{T}})_{i=1}^n` is independent and identically distributed.
48 |     
49 |     b. **Repeated Cross Sections:**
50 |     The sample consists of :math:`(Y_{i,t},G^{2}_i,\dots,G^{\mathcal{T}}_i, C_i,T_i, X_i)_{i=1}^n`, where :math:`T_i\in \{1,\dots,\mathcal{T}\}` denotes the time period of unit :math:`i` being observed.
51 |     Conditional on :math:`T=t`, the data are independent and identically distributed from the distribution of :math:`(Y_{t},G^{2},\dots,G^{\mathcal{T}}, C, X)`, with :math:`(G^{2},\dots,G^{\mathcal{T}}, C, X)` being invariant to :math:`T`.
52 | 
53 | 3. **Limited Treatment Anticipation:**
54 |     There is a known :math:`\delta\ge 0` such that
55 |     :math:`\mathbb{E}[Y_{i,t}(\mathrm{g})|X_i, G_i^{\mathrm{g}}=1] = \mathbb{E}[Y_{i,t}(0)|X_i, G_i^{\mathrm{g}}=1]\quad a.s.` for all :math:`\mathrm{g}\in\mathcal{G}, t\in\{1,\dots,\mathcal{T}\}` such that :math:`t< \mathrm{g}-\delta`.
56 | 
57 | 4. **Conditional Parallel Trends:** 
58 |     Let :math:`\delta` be defined as in Assumption 3.\\
59 |     For each :math:`\mathrm{g}\in\mathcal{G}` and :math:`t\in\{2,\dots,\mathcal{T}\}` such that :math:`t\ge \mathrm{g}-\delta`:
60 | 
61 |     a. **Never Treated:**
62 |         :math:`\mathbb{E}[Y_{i,t}(0) - Y_{i,t-1}(0)|X_i, G_i^{\mathrm{g}}=1] = \mathbb{E}[Y_{i,t}(0) - Y_{i,t-1}(0)|X_i,C_{i}^{(\text{nev})}=1] \quad a.s.`
63 | 
64 |     b. **Not Yet Treated:**
65 |         :math:`\mathbb{E}[Y_{i,t}(0) - Y_{i,t-1}(0)|X_i, G_i^{\mathrm{g}}=1] = \mathbb{E}[Y_{i,t}(0) - Y_{i,t-1}(0)|X_i,C_{i,t+\delta}^{(\text{nyt})}=1] \quad a.s.`
66 | 
67 | 5. **Overlap:** 
68 |     For each time period :math:`t=2,\dots,\mathcal{T}` and :math:`\mathrm{g}\in\mathcal{G}` there exists a :math:`\epsilon > 0` such that
69 |     :math:`P(G_i^{\mathrm{g}}=1) > \epsilon` and :math:`P(G_i^{\mathrm{g}}=1|X_i, G_i^{\mathrm{g}} + C_{i,t}^{(\text{nyt})}=1) < 1-\epsilon\quad a.s.`
70 | 
71 | .. note:: 
72 |     For a detailed discussion of the assumptions see `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
73 | 
74 | Under the assumptions above (either Assumption a. or b.), the target parameter :math:`ATT(\mathrm{g},t)` is identified see Theorem 1. `Callaway and Sant'Anna (2021) <https://doi.org/10.1016/j.jeconom.2020.12.001>`_.
75 | 


--------------------------------------------------------------------------------
/doc/guide/sensitivity/theory.rst:
--------------------------------------------------------------------------------
  1 | Assume that we can write the model in the following representation
  2 | 
  3 | .. math::
  4 | 
  5 |     \theta_0 = \mathbb{E}[m(W,g_0)],
  6 | 
  7 | where usually :math:`g_0(W) = \mathbb{E}[Y|X, D]` (currently, the sensitivity analysis is only available for linear models).
  8 | As long as :math:`\mathbb{E}[m(W,f)]` is a continuous linear functional of :math:`f`, there exists a unique square 
  9 | integrable random variable :math:`\alpha_0(W)`, called Riesz representer
 10 | (see `Riesz-Fréchet representation theorem <https://en.wikipedia.org/wiki/Riesz_representation_theorem>`_), such that
 11 | 
 12 | .. math::
 13 | 
 14 |     \theta_0 = \mathbb{E}[g_0(W)\alpha_0(W)].
 15 | 
 16 | The target parameter :math:`\theta_0` has the following representation
 17 | 
 18 | .. math::
 19 | 
 20 |     \theta_0 = \mathbb{E}[m(W,g_0) + (Y-g_0(W))\alpha_0(W)],
 21 | 
 22 | which corresponds to a Neyman orthogonal score function (orthogonal with respect to nuisance elements :math:`(g, \alpha)`).
 23 | To bound the omitted variable bias, the following further elements are needed. 
 24 | The variance of the outcome regression 
 25 | 
 26 | .. math::
 27 | 
 28 |     \sigma_0^2 := \mathbb{E}[(Y-g_0(W))^2]
 29 | 
 30 | and the second moment of the Riesz representer 
 31 | 
 32 | .. math::
 33 | 
 34 |     \nu_0^2 := \mathbb{E}[\alpha_0(W)^2] =2\mathbb{E}[m(W,\alpha_0)] -  \mathbb{E}[\alpha_0(W)^2].
 35 | 
 36 | Both representations are Neyman orthogonal with respect to :math:`g` and :math:`\alpha`, respectively.
 37 | Further, define the corresponding score functions
 38 | 
 39 | .. math::
 40 | 
 41 |     \psi_{\sigma^2}(W, \sigma^2, g) &:= (Y-g_0(W))^2 - \sigma^2\\
 42 |     \psi_{\nu^2}(W, \nu^2, \alpha) &:= 2m(W,\alpha) - \alpha(W)^2 - \nu^2.
 43 | 
 44 | Recall that the parameter :math:`\theta_0` is identified via the moment condition
 45 | 
 46 | .. math::
 47 | 
 48 |     \theta_0 = \mathbb{E}[m(W,g_0)].
 49 | 
 50 | If :math:`W=(Y, D, X)` does not include all confounding variables, the "true" target parameter :math:`\tilde{\theta}_0`
 51 | would only be identified via the extendend (or "long") form
 52 | 
 53 | .. math::
 54 | 
 55 |     \tilde{\theta}_0 = \mathbb{E}[m(\tilde{W},\tilde{g}_0)],
 56 | 
 57 | where :math:`\tilde{W}=(Y, D, X, A)` includes the unobserved counfounders :math:`A`.
 58 | In Theorem 2 of their paper `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_ are able to bound the omitted variable bias
 59 | 
 60 | .. math::
 61 | 
 62 |     |\tilde{\theta}_0 -\theta_0|^2 = \rho^2 B^2,
 63 | 
 64 | where 
 65 | 
 66 | .. math::
 67 | 
 68 |     B^2 := \mathbb{E}\Big[\big(g(W) - \tilde{g}(\tilde{W})\big)^2\Big]\mathbb{E}\Big[\big(\alpha(W) - \tilde{\alpha}(\tilde{W})\big)^2\Big],
 69 | 
 70 | denotes the product of additional variations in the outcome regression and Riesz representer generated by omitted confounders and
 71 | 
 72 | .. math::
 73 | 
 74 |     \rho^2 := \textrm{Cor}^2\Big(g(W) - \tilde{g}(\tilde{W}),\alpha(W) - \tilde{\alpha}(\tilde{W})\Big),
 75 | 
 76 | denotes the correlations between the deviations generated by omitted confounders. The choice :math:`\rho=1` is conservative and
 77 | accounts for adversarial confounding. Further, the bound can be expressed as
 78 | 
 79 | .. math::
 80 | 
 81 |     B^2 := \sigma_0^2 \nu_0^2 C_Y^2 C_D^2,
 82 | 
 83 | where
 84 | 
 85 | .. math::
 86 | 
 87 |     C_Y^2 &:= \frac{\mathbb{E}[(\tilde{g}(\tilde{W}) - g(W))^2]}{\mathbb{E}[(Y - g(W))^2]}
 88 | 
 89 |     C_D^2 &:=\frac{1 - \frac{\mathbb{E}\big[\alpha(W)^2\big]}{\mathbb{E}\big[\tilde{\alpha}(\tilde{W})^2\big]}}{\frac{\mathbb{E}\big[\alpha(W)^2\big]}{\mathbb{E}\big[\tilde{\alpha}(\tilde{W})^2\big]}}.
 90 | 
 91 | As :math:`\sigma_0^2` and :math:`\nu_0^2` do not depend on the unobserved confounders :math:`A` they are identified. Further, the other parts have the following interpretations
 92 | 
 93 | - ``cf_y``:math:`:=\frac{\mathbb{E}[(\tilde{g}(\tilde{W}) - g(W))^2]}{\mathbb{E}[(Y - g(W))^2]}`  measures the proportion of residual variance in the outcome :math:`Y` explained by the latent confounders :math:`A`
 94 | 
 95 | - ``cf_d``:math:`:=1 - \frac{\mathbb{E}\big[\alpha(W)^2\big]}{\mathbb{E}\big[\tilde{\alpha}(\tilde{W})^2\big]}` measures the proportion of residual variance in the Riesz representer :math:`\tilde{\alpha}(\tilde{W})` generated by the latent confounders :math:`A`
 96 | 
 97 | .. note::
 98 |     - ``cf_y`` has the interpretation as the *nonparametric partial* :math:`R^2` *of* :math:`A` *with* :math:`Y` *given* :math:`(D,X)`
 99 |     
100 |     .. math:: 
101 |         
102 |         \frac{\textrm{Var}(\mathbb{E}[Y|D,X,A]) - \textrm{Var}(\mathbb{E}[Y|D,X])}{\textrm{Var}(Y)-\textrm{Var}(\mathbb{E}[Y|D,X])}
103 | 
104 |     - For model-specific interpretations of ``cf_d`` or :math:`C_D^2`, see the corresponding chapters (e.g. :ref:`sensitivity_plr`).
105 | 
106 | Consequently, for given values ``cf_y`` and ``cf_d``, we can create lower and upper bounds for target parameter :math:`\tilde{\theta}_0` of the form
107 | 
108 | .. math::
109 | 
110 |     \theta_{\pm}:=\theta_0 \pm |\rho| \sigma_0 \nu_0 C_Y C_D
111 | 
112 | Let :math:`\psi(W,\theta,\eta)` the (correctly scaled) score function for the target parameter :math:`\theta_0`. Then
113 | 
114 | .. math::
115 | 
116 |     \psi_{\pm}(W,\theta,\eta_\pm):= \psi(W,\theta,\eta) \pm \frac{|\rho| C_Y C_D}{2 \sigma \nu} \Big(\sigma^2 \psi_{\nu^2}(W, \nu^2, \alpha) + \nu^2 \psi_{\sigma^2}(W, \sigma^2, g)\Big)
117 | 
118 | determines a orthongonal score function for :math:`\theta_{\pm}`, with nuisance elements :math:`\eta_\pm:=(g, \alpha, \sigma, \nu)`.
119 | The score can be used to calculate the standard deviations of :math:`\theta_{\pm}` via
120 | 
121 | .. math::
122 | 
123 |     \sigma^2_{\pm}= \mathbb{E}[\psi_{\pm}(W,\theta,\eta_\pm)^2]
124 |     
125 | For more detail and interpretations see `Chernozhukov et al. (2022) <https://www.nber.org/papers/w30302>`_.


--------------------------------------------------------------------------------
/doc/guide/sensitivity/implementation.rst:
--------------------------------------------------------------------------------
  1 | The :ref:`plr-model` will be used as an example
  2 | 
  3 | .. tab-set::
  4 | 
  5 |     .. tab-item:: Python
  6 |         :sync: py
  7 | 
  8 |         .. ipython:: python
  9 | 
 10 |             import numpy as np
 11 |             import doubleml as dml
 12 |             from doubleml.plm.datasets import make_plr_CCDDHNR2018
 13 |             from sklearn.ensemble import RandomForestRegressor
 14 |             from sklearn.base import clone
 15 | 
 16 |             learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
 17 |             ml_l = clone(learner)
 18 |             ml_m = clone(learner)
 19 |             np.random.seed(1111)
 20 |             data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
 21 |             obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
 22 |             dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)
 23 | 
 24 | If the sensitivity analysis is implemented (see :ref:`sensitivity_models`), the corresponding sensitivity elements are estimated
 25 | automatically by calling the ``fit()`` method. In most cases these elements are based on the following plug-in estimators
 26 | 
 27 | .. math::
 28 | 
 29 |     \hat{\sigma}^2 &:= \mathbb{E}_n[(Y-\hat{g}(W))^2]
 30 | 
 31 |     \hat{\nu}^2 &:= \mathbb{E}_n[2m(W,\hat{\alpha}) -  \hat{\alpha}(W)^2]
 32 | 
 33 | where :math:`\hat{g}(W)` and :math:`\hat{\alpha}(W)` denote the cross-fitted predictions of the outcome regression and the Riesz
 34 | representer (both are model specific, see :ref:`sensitivity_models`). Further, the corresponding scores are defined as
 35 | 
 36 | .. math::
 37 | 
 38 |     \psi_{\sigma^2}(W, \hat{\sigma}^2, g) &:= (Y-\hat{g}(W))^2 - \hat{\sigma}^2\\
 39 |     \psi_{\nu^2}(W, \hat{\nu}^2, \alpha) &:= 2m(W,\hat{\alpha}) - \hat{\alpha}(W)^2 - \hat{\nu}^2.
 40 | 
 41 | After the ``fit()`` call, the sensitivity elements are stored in a dictionary and can be accessed via the ``sensitivity_elements`` property.
 42 | 
 43 | .. tab-set::
 44 | 
 45 |     .. tab-item:: Python
 46 |         :sync: py
 47 | 
 48 |         .. ipython:: python
 49 |             
 50 |             dml_plr_obj.fit()
 51 |             dml_plr_obj.sensitivity_elements.keys()
 52 | 
 53 | Each value is a :math:`3`-dimensional array, with the variances being of form ``(1, n_rep, n_coefs)`` and the scores of form ``(n_obs, n_rep, n_coefs)``.
 54 | The ``sensitivity_analysis()`` method then computes the upper and lower bounds for the estimate, based on the sensitivity parameters
 55 | ``cf_y``, ``cf_d`` and ``rho`` (default is ``rho=1.0`` to account for adversarial confounding). Additionally, one-sided confidence bounds are computed 
 56 | based on a supplied significance level (default ``level=0.95``). 
 57 | The results are summarized as a formatted string in the ``sensitivity_summary``
 58 | 
 59 | .. tab-set::
 60 | 
 61 |     .. tab-item:: Python
 62 |         :sync: py
 63 | 
 64 |         .. ipython:: python
 65 |             
 66 |             dml_plr_obj.sensitivity_analysis(cf_y=0.03, cf_d=0.03, rho=1.0, level=0.95)
 67 |             print(dml_plr_obj.sensitivity_summary)
 68 | 
 69 | or can be directly accessed via the ``sensitivity_params`` property.
 70 | 
 71 | .. tab-set::
 72 | 
 73 |     .. tab-item:: Python
 74 |         :sync: py
 75 | 
 76 |         .. ipython:: python
 77 |             
 78 |             dml_plr_obj.sensitivity_params
 79 | 
 80 | The bounds are saved as a nested dictionary, where the keys ``'theta'``
 81 | denote the bounds on the parameter :math:`\hat{\theta}_{\pm}`, ``'se'`` denotes the corresponding standard error and ``'ci'`` denotes the lower and upper
 82 | confidence bounds for :math:`\hat{\theta}_{\pm}`. Each of the keys refers to a dictionary with keys ``'lower'`` and ``'upper'``
 83 | which refer to the lower or upper bound, e.g. ``sensitivity_params['theta']['lower']`` refers to the lower bound :math:`\hat{\theta}_{-}` of the estimated cofficient .
 84 | 
 85 | Further, the sensitivity analysis has an input parameter ``theta`` (with default ``theta=0.0``), which refers to the null hypothesis used for each coefficient.
 86 | This null hypothesis is used to calculate the robustness values as displayed in the ``sensitivity_params``.
 87 | 
 88 | The robustness value $RV$ is defined as the required confounding strength (``cf_y=rv`` and ``cf_d=rv``), such that the lower or upper bound of the causal parameter includes the null hypothesis.
 89 | If the estimated parameter :math:`\hat{\theta}` is larger than the null hypothesis the lower bound is used and vice versa.
 90 | The robustness value $RVa$ defined analogous, but additionally incorporates statistical uncertainty (as it is based on the confidence intervals of the bounds). 
 91 | 
 92 | To obtain a more complete overview over the sensitivity one can call the ``sensitivity_plot()`` method. The methods creates a contour plot, which calculates estimate of the upper or lower bound for :math:`\theta`
 93 | (based on the null hypothesis) for each combination of ``cf_y`` and ``cf_d`` in a grid of values.
 94 | 
 95 | .. figure:: /_static/sensitivity_example_nb.png
 96 |    :alt: Contour plot
 97 |    :figclass: captioned-image
 98 | 
 99 |    Contour plot example (see :ref:`examplegallery`)
100 | 
101 | By adjusting the parameter ``value='ci'`` in the ``sensitivity_plot()`` method the bounds are displayed for the corresponding confidence level.
102 | 
103 | .. note::
104 | 
105 |  -  The ``sensitivity_plot()`` requires to call ``sensitivity_analysis`` first, since the choice of the bound (upper or lower) is based on
106 |     the corresponding null hypothesis. Further, the parameters ``rho`` and ``level`` are used. Both are contained in the ``sensitivity_params`` property.   
107 |  -  The ``sensitivity_plot()`` is created for the first treatment variable. This can be changed via the ``idx_treatment`` parameter.
108 |  -  The robustness values are given via the intersection countour of the null hypothesis and the identity.


--------------------------------------------------------------------------------
/doc/guide/learners/r/set_hyperparams.rst:
--------------------------------------------------------------------------------
  1 | The learners are set during initialization of the :ref:`DoubleML <doubleml_package>` model classes
  2 | `DoubleML::DoubleMLPLR <https://docs.doubleml.org/r/stable/reference/DoubleMLPLR.html>`_,
  3 | `DoubleML::DoubleMLPLIV <https://docs.doubleml.org/r/stable/reference/DoubleMLPLIV.html>`_ ,
  4 | `DoubleML::DoubleMLIRM <https://docs.doubleml.org/r/stable/reference/DoubleMLIRM.html>`_
  5 | and `DoubleML::DoubleMLIIVM <https://docs.doubleml.org/r/stable/reference/DoubleMLIIVM.html>`_.
  6 | Lets simulate some data and consider the partially linear regression model.
  7 | We need to specify learners for the nuisance functions :math:`g_0(X) = E[Y|X]` and :math:`m_0(X) = E[D|X]`,
  8 | for example `LearnerRegrRanger <https://mlr3learners.mlr-org.com/reference/mlr_learners_regr.ranger.html>`_
  9 | (``lrn("regr.ranger")``) for regression with random forests based on the  `ranger <https://github.com/imbs-hl/ranger>`_
 10 | package for R.
 11 | 
 12 | .. tab-set::
 13 | 
 14 |   .. tab-item:: R
 15 |     :sync: r
 16 | 
 17 |     .. jupyter-execute::
 18 | 
 19 |         library(DoubleML)
 20 |         library(mlr3)
 21 |         library(mlr3learners)
 22 |         library(data.table)
 23 |         lgr::get_logger("mlr3")$set_threshold("warn")
 24 | 
 25 |         # set up a mlr3 learner
 26 |         learner = lrn("regr.ranger")
 27 |         ml_l = learner$clone()
 28 |         ml_m = learner$clone()
 29 |         set.seed(3141)
 30 |         data = make_plr_CCDDHNR2018(alpha=0.5, return_type='data.table')
 31 |         obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
 32 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l, ml_m)
 33 |         dml_plr_obj$fit()
 34 |         dml_plr_obj$summary()
 35 | 
 36 | Without further specification of the hyperparameters, default values are used. To set hyperparameters:
 37 | 
 38 | * We can also use pre-parametrized learners ``lrn("regr.ranger", num.trees=10)``.
 39 | * Alternatively, hyperparameters can be set after initialization via the method
 40 |   ``set_ml_nuisance_params(learner, treat_var, params, set_fold_specific)``.
 41 | 
 42 | .. tab-set::
 43 | 
 44 |   .. tab-item:: R
 45 |     :sync: r
 46 | 
 47 |     .. jupyter-execute::
 48 | 
 49 |         set.seed(3141)
 50 |         ml_l = lrn("regr.ranger", num.trees=10)
 51 |         ml_m = lrn("regr.ranger")
 52 |         obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
 53 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l, ml_m)
 54 |         dml_plr_obj$fit()
 55 |         dml_plr_obj$summary()
 56 | 
 57 |         set.seed(3141)
 58 |         ml_l = lrn("regr.ranger")
 59 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l , ml_m)
 60 |         dml_plr_obj$set_ml_nuisance_params("ml_l", "d", list("num.trees"=10))
 61 |         dml_plr_obj$fit()
 62 |         dml_plr_obj$summary()
 63 | 
 64 | Setting treatment-variable-specific or fold-specific hyperparameters:
 65 | 
 66 | * In the multiple-treatment case, the method ``set_ml_nuisance_params(learner, treat_var, params, set_fold_specific)``
 67 |   can be used to set different hyperparameters for different treatment variables.
 68 | * The method ``set_ml_nuisance_params(learner, treat_var, params, set_fold_specific)`` accepts lists for ``params``.
 69 |   The structure of the list depends on whether the same parameters should be provided for all folds or separate values
 70 |   are passed for specific folds.
 71 | * Global parameter passing: The values in ``params`` are used for estimation on all folds.
 72 |   The named list in the argument ``params`` should have entries with names corresponding to
 73 |   the parameters of the learners. It is required that option ``set_fold_specific`` is set to ``FALSE`` (default).
 74 | * Fold-specific parameter passing: ``params`` is a nested list. The outer list needs to be of length ``n_rep`` and the inner
 75 |   list of length ``n_folds``. The innermost list must have named entries that correspond to the parameters of the learner.
 76 |   It is required that option ``set_fold_specific`` is set to ``TRUE``. Moreover, fold-specific
 77 |   parameter passing is only supported, if all parameters are set fold-specific.
 78 | * External setting of parameters will override previously set parameters. To assert the choice of parameters, access the
 79 |   fields ``$learner`` and ``$params``.
 80 | 
 81 | .. tab-set::
 82 | 
 83 |   .. tab-item:: R
 84 |     :sync: r
 85 | 
 86 |     .. jupyter-execute::
 87 | 
 88 |         set.seed(3141)
 89 |         ml_l = lrn("regr.ranger")
 90 |         ml_m = lrn("regr.ranger")
 91 |         obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
 92 | 
 93 |         n_rep = 2
 94 |         n_folds = 3
 95 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l, ml_m, n_rep=n_rep, n_folds=n_folds)
 96 | 
 97 |         # Set globally
 98 |         params = list("num.trees"=10)
 99 |         dml_plr_obj$set_ml_nuisance_params("ml_l", "d", params=params)
100 |         dml_plr_obj$set_ml_nuisance_params("ml_m", "d", params=params)
101 |         dml_plr_obj$learner
102 |         dml_plr_obj$params
103 |         dml_plr_obj$fit()
104 |         dml_plr_obj$summary()
105 | 
106 | 
107 | The following example illustrates how to set parameters for each fold.
108 | 
109 | .. tab-set::
110 | 
111 |   .. tab-item:: R
112 |     :sync: r
113 | 
114 |     .. jupyter-execute::
115 | 
116 |         learner = lrn("regr.ranger")
117 |         ml_l = learner$clone()
118 |         ml_m = learner$clone()
119 |         dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l, ml_m, n_rep=n_rep, n_folds=n_folds)
120 | 
121 |         # Set values for each fold
122 |         params_exact = rep(list(rep(list(params), n_folds)), n_rep)
123 |         dml_plr_obj$set_ml_nuisance_params("ml_l", "d", params=params_exact,
124 |                                              set_fold_specific=TRUE)
125 |         dml_plr_obj$set_ml_nuisance_params("ml_m", "d", params=params_exact,
126 |                                              set_fold_specific=TRUE)
127 |         dml_plr_obj$learner
128 |         dml_plr_obj$params
129 |         dml_plr_obj$fit()
130 |         dml_plr_obj$summary()
131 | 


--------------------------------------------------------------------------------
/doc/guide/models/plm/plm_models.inc:
--------------------------------------------------------------------------------
  1 | The partially linear models (PLM) take the form
  2 | 
  3 | .. math::
  4 | 
  5 |     Y = D \theta_0 + g_0(X) + \zeta,
  6 | 
  7 | where treatment effects are additive with some sort of linear form.
  8 | 
  9 | .. _plr-model:
 10 | 
 11 | Partially linear regression model (PLR)
 12 | ***************************************
 13 | 
 14 | .. include:: /guide/models/plm/plr.rst
 15 | 
 16 | .. include:: /shared/causal_graphs/plr_irm_causal_graph.rst
 17 | 
 18 | ``DoubleMLPLR`` implements PLR models. Estimation is conducted via its ``fit()`` method. 
 19 | 
 20 | .. note::
 21 |     Remark that the standard approach with ``score='partialling out'`` does not rely on a direct estimate of :math:`g_0(X)`,
 22 |     but :math:`\ell_0(X) := \mathbb{E}[Y \mid X] = \theta_0 \mathbb{E}[D \mid X] + g(X)`.
 23 | 
 24 | .. tab-set::
 25 | 
 26 |     .. tab-item:: Python
 27 |         :sync: py
 28 | 
 29 |         .. ipython:: python
 30 | 
 31 |             import numpy as np
 32 |             import doubleml as dml
 33 |             from doubleml.plm.datasets import make_plr_CCDDHNR2018
 34 |             from sklearn.ensemble import RandomForestRegressor
 35 |             from sklearn.base import clone
 36 | 
 37 |             learner = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
 38 |             ml_l = clone(learner)
 39 |             ml_m = clone(learner)
 40 |             np.random.seed(1111)
 41 |             data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
 42 |             obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
 43 |             dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l, ml_m)
 44 |             print(dml_plr_obj.fit())
 45 | 
 46 |     .. tab-item:: R
 47 |         :sync: r
 48 | 
 49 |         .. jupyter-execute::
 50 | 
 51 |             library(DoubleML)
 52 |             library(mlr3)
 53 |             library(mlr3learners)
 54 |             library(data.table)
 55 |             lgr::get_logger("mlr3")$set_threshold("warn")
 56 | 
 57 |             learner = lrn("regr.ranger", num.trees = 100, mtry = 20, min.node.size = 2, max.depth = 5)
 58 |             ml_l = learner$clone()
 59 |             ml_m = learner$clone()
 60 |             set.seed(1111)
 61 |             data = make_plr_CCDDHNR2018(alpha=0.5, n_obs=500, dim_x=20, return_type='data.table')
 62 |             obj_dml_data = DoubleMLData$new(data, y_col="y", d_cols="d")
 63 |             dml_plr_obj = DoubleMLPLR$new(obj_dml_data, ml_l, ml_m)
 64 |             dml_plr_obj$fit()
 65 |             print(dml_plr_obj)
 66 | 
 67 | .. _lplr-model:
 68 | 
 69 | Logistic partially linear regression model (LPLR)
 70 | *************************************************
 71 | 
 72 | .. include:: /guide/models/plm/lplr.rst
 73 | 
 74 | .. include:: /shared/causal_graphs/plr_irm_causal_graph.rst
 75 | 
 76 | ``DoubleMLLPLR`` implements LPLR models. Estimation is conducted via its ``fit()`` method.
 77 | 
 78 | .. note::
 79 |     Remark that the treatment effects are not additive in this model. The partial linear term enters the model through a logistic link function.
 80 | 
 81 | .. tab-set::
 82 | 
 83 |     .. tab-item:: Python
 84 |         :sync: py
 85 | 
 86 |         .. ipython:: python
 87 | 
 88 |             import numpy as np
 89 |             import doubleml as dml
 90 |             from doubleml.plm.datasets import make_lplr_LZZ2020
 91 |             from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 92 |             from sklearn.base import clone
 93 |             np.random.seed(3141)
 94 |             ml_t = RandomForestRegressor(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
 95 |             ml_m = RandomForestRegressor(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
 96 |             ml_M = RandomForestClassifier(n_estimators=100, max_features=15, max_depth=15, min_samples_leaf=5)
 97 |             obj_dml_data = make_lplr_LZZ2020(alpha=0.5, n_obs=1000, dim_x=15)
 98 |             dml_lplr_obj = dml.DoubleMLLPLR(obj_dml_data, ml_M, ml_t, ml_m)
 99 |             dml_lplr_obj.fit().summary
100 | 
101 | 
102 | .. _pliv-model:
103 | 
104 | Partially linear IV regression model (PLIV)
105 | *******************************************
106 | 
107 | .. include:: /guide/models/plm/pliv.rst
108 | 
109 | .. include:: /shared/causal_graphs/pliv_iivm_causal_graph.rst
110 | 
111 | ``DoubleMLPLIV`` implements PLIV models.
112 | Estimation is conducted via its ``fit()`` method:
113 | 
114 | .. tab-set::
115 | 
116 |     .. tab-item:: Python
117 |         :sync: py
118 | 
119 |         .. ipython:: python
120 |             :okwarning:
121 | 
122 |             import numpy as np
123 |             import doubleml as dml
124 |             from doubleml.plm.datasets import make_pliv_CHS2015
125 |             from sklearn.ensemble import RandomForestRegressor
126 |             from sklearn.base import clone
127 | 
128 |             learner = RandomForestRegressor(n_estimators=100, max_features=5, max_depth=5, min_samples_leaf=5)
129 |             ml_l = clone(learner)
130 |             ml_m = clone(learner)
131 |             ml_r = clone(learner)
132 |             np.random.seed(2222)
133 |             data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=5, dim_z=1, return_type='DataFrame')
134 |             obj_dml_data = dml.DoubleMLData(data, 'y', 'd', z_cols='Z1')
135 |             dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_l, ml_m, ml_r)
136 |             print(dml_pliv_obj.fit())
137 | 
138 |     .. tab-item:: R
139 |         :sync: r
140 | 
141 |         .. jupyter-execute::
142 | 
143 |             library(DoubleML)
144 |             library(mlr3)
145 |             library(mlr3learners)
146 |             library(data.table)
147 | 
148 |             learner = lrn("regr.ranger", num.trees = 100, mtry = 20, min.node.size = 2, max.depth = 5)
149 |             ml_l = learner$clone()
150 |             ml_m = learner$clone()
151 |             ml_r = learner$clone()
152 |             set.seed(2222)
153 |             data = make_pliv_CHS2015(alpha=0.5, n_obs=500, dim_x=20, dim_z=1, return_type="data.table")
154 |             obj_dml_data = DoubleMLData$new(data, y_col="y", d_col = "d", z_cols= "Z1")
155 |             dml_pliv_obj = DoubleMLPLIV$new(obj_dml_data, ml_l, ml_m, ml_r)
156 |             dml_pliv_obj$fit()
157 |             print(dml_pliv_obj)
158 | 


--------------------------------------------------------------------------------
/doc/guide/scores/did/did_cs_score.rst:
--------------------------------------------------------------------------------
  1 | As in the description of the :ref:`DiD model <did-cs-model>`, the required nuisance elements are
  2 | 
  3 | .. math::
  4 |     \begin{align}
  5 |     g^{\text{treat}}_{0,\mathrm{g}, t, \text{eval} + \delta}(X_i) &:= \mathbb{E}[Y_{i,t} |X_i, G_i^{\mathrm{g}}=1, T_i=t], \\
  6 |     g^{\text{control}}_{0,\mathrm{g}, t, \text{eval} + \delta}(X_i) &:= \mathbb{E}[Y_{i,t} |X_i, C_{i,t_\text{eval} + \delta}^{(\cdot)}=1, T_i=t], \\
  7 |     m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i) &:= P(G_i^{\mathrm{g}}=1|X_i, G_i^{\mathrm{g}} + C_{i,t_\text{eval} + \delta}^{(\cdot)}=1).
  8 |     \end{align}
  9 | 
 10 | for :math:`t\in\{t_\text{pre}, t_\text{eval}\}` and a certain choice of :math:`(\mathrm{g}, t_\text{pre}, t_\text{eval})` and :math:`\delta` and control group :math:`C_{i,t_\text{eval} + \delta}^{(\cdot)}`.
 11 | 
 12 | For notational purposes, we will omit the subscripts :math:`\mathrm{g}, t_\text{pre}, t_\text{eval}, \delta` in the following and use the notation 
 13 | 
 14 | * :math:`g_0(1, 0, X_i) \equiv g^{\text{treat}}_{0,\mathrm{g}, t_\text{pre}, \text{eval} + \delta}(X_i)` (pop. outcome regr. function for treatment group in :math:`t_\text{pre}`)
 15 | * :math:`g_0(1, 1, X_i) \equiv g^{\text{treat}}_{0,\mathrm{g}, t_\text{eval}, \text{eval} + \delta}(X_i)` (pop. outcome regr. function for treatment group in :math:`t_\text{eval}`)
 16 | * :math:`g_0(0, 0, X_i) \equiv g^{\text{control}}_{0,\mathrm{g}, t_\text{pre}, \text{eval} + \delta}(X_i)` (pop. outcome regr. function for control group in :math:`t_\text{pre}`)
 17 | * :math:`g_0(0, 1, X_i) \equiv g^{\text{control}}_{0,\mathrm{g}, t_\text{eval}, \text{eval} + \delta}(X_i)` (pop. outcome regr. function for control group in :math:`t_\text{eval}`)
 18 | * :math:`m_0(X_i)\equiv m_{0, \mathrm{g}, t_\text{eval} + \delta}(X_i)` (generalized propensity score).
 19 | 
 20 | All scores in the multi-period setting have the form 
 21 | 
 22 | .. math::
 23 | 
 24 |     \psi(W_i,\theta, \eta) := 
 25 |     \begin{cases}
 26 |     \tilde{\psi}(W_i,\theta, \eta) & \text{for } G_i^{\mathrm{g}} \vee C_{i,t_\text{eval} + \delta}^{(\cdot)}=1 \\
 27 |     0 & \text{otherwise}
 28 |     \end{cases}
 29 | 
 30 | i.e. the score is only non-zero for units in the corresponding treatment group :math:`\mathrm{g}` and control group :math:`C_{i,t_\text{eval} + \delta}^{(\cdot)}`.
 31 | 
 32 | For the difference-in-differences model implemented in ``DoubleMLDIDMulti`` one can choose between
 33 | ``score='observational'`` and ``score='experimental'``.
 34 | 
 35 | ``score='observational'`` implements the score function (dropping the unit index :math:`i`):
 36 | 
 37 | .. math::
 38 | 
 39 |     \tilde{\psi}(W,\theta,\eta) :=\; & - \frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\theta + \frac{G^{\mathrm{g}}}{\mathbb{E}_n[G^{\mathrm{g}}]}\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 40 | 
 41 |     & + \frac{G^{\mathrm{g}}T}{\mathbb{E}_n[G^{\mathrm{g}}T]} (Y - g(1,1,X)) 
 42 | 
 43 |     & - \frac{G^{\mathrm{g}}(1-T)}{\mathbb{E}_n[G^{\mathrm{g}}(1-T)]}(Y - g(1,0,X))
 44 | 
 45 |     & - \frac{m(X) (1-G^{\mathrm{g}})T}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-G^{\mathrm{g}})T}{1-m(X)}\right]^{-1} (Y-g(0,1,X)) 
 46 | 
 47 |     & + \frac{m(X) (1-G^{\mathrm{g}})(1-T)}{1-m(X)} \mathbb{E}_n\left[\frac{m(X) (1-G^{\mathrm{g}})(1-T)}{1-m(X)}\right]^{-1} (Y-g(0,0,X))
 48 | 
 49 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 50 | 
 51 | where the components of the final linear score :math:`\psi` are
 52 | 
 53 | .. math::
 54 |     \psi_a(W; \eta) &=  \tilde{\psi}_a(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}),
 55 | 
 56 |     \psi_b(W; \eta) &= \tilde{\psi}_b(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
 57 | 
 58 | and the nuisance elements :math:`\eta=(g, m)`.
 59 | 
 60 | .. note::
 61 |     Remark that :math:`1-G^{\mathrm{g}}=C^{(\cdot)}` if :math:`G^{\mathrm{g}} \vee C_{t_\text{eval} + \delta}^{(\cdot)}=1`.
 62 | 
 63 | If ``in_sample_normalization='False'``, the score is set to
 64 | 
 65 | .. math::
 66 | 
 67 |     \tilde{\psi}(W,\theta,\eta) :=\; & - \frac{G^{\mathrm{g}}}{p}\theta + \frac{G^{\mathrm{g}}}{p}\Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 68 | 
 69 |     & + \frac{G^{\mathrm{g}}T}{p\lambda} (Y - g(1,1,X)) 
 70 | 
 71 |     & - \frac{G^{\mathrm{g}}(1-T)}{p(1-\lambda)}(Y - g(1,0,X))
 72 | 
 73 |     & - \frac{m(X) (1-G^{\mathrm{g}})T}{p(1-m(X))\lambda} (Y-g(0,1,X)) 
 74 | 
 75 |     & + \frac{m(X) (1-G^{\mathrm{g}})(1-T)}{p(1-m(X))(1-\lambda)} (Y-g(0,0,X))
 76 | 
 77 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 78 | 
 79 | with :math:`\eta=(g, m, p, \lambda)`, where :math:`p_0 = \mathbb{E}[G^{\mathrm{g}}]` and :math:`\lambda_0 = \mathbb{E}[T]` are estimated on the subsample.
 80 | Remark that this will result a similar score, but just uses slightly different normalization.
 81 | 
 82 | ``score='experimental'`` assumes that the treatment probability is independent of the covariates :math:`X` and
 83 | implements the score function:
 84 | 
 85 | .. math::
 86 | 
 87 |     \tilde{\psi}(W,\theta,\eta) :=\; & - \theta + \Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
 88 | 
 89 |     & + \frac{G^{\mathrm{g}}T}{\mathbb{E}_n[G^{\mathrm{g}}T]} (Y - g(1,1,X)) 
 90 | 
 91 |     & - \frac{G^{\mathrm{g}}(1-T)}{\mathbb{E}_n[G^{\mathrm{g}}(1-T)]}(Y - g(1,0,X))
 92 | 
 93 |     & - \frac{(1-G^{\mathrm{g}})T}{\mathbb{E}_n[(1-G^{\mathrm{g}})T]} (Y-g(0,1,X)) 
 94 | 
 95 |     & + \frac{(1-G^{\mathrm{g}})(1-T)}{\mathbb{E}_n[(1-G^{\mathrm{g}})(1-T)]} (Y-g(0,0,X))
 96 | 
 97 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
 98 | 
 99 | where the components of the final linear score :math:`\psi` are
100 | 
101 | .. math::
102 |     \psi_a(W; \eta) &=  \tilde{\psi}_a(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)}),
103 | 
104 |     \psi_b(W; \eta) &= \tilde{\psi}_b(W; \eta) \cdot \max(G^{\mathrm{g}}, C^{(\cdot)})
105 | 
106 | and the nuisance elements :math:`\eta=(g, m)`.
107 | 
108 | Analogously, if ``in_sample_normalization='False'``,  the score is set to
109 | 
110 | .. math::
111 | 
112 |     \tilde{\psi}(W,\theta,\eta) :=\; & - \theta + \Big(g(1,1,X) - g(1,0,X) - (g(0,1,X) - g(0,0,X))\Big)
113 | 
114 |     & + \frac{G^{\mathrm{g}}T}{p\lambda} (Y - g(1,1,X)) 
115 | 
116 |     & - \frac{G^{\mathrm{g}}(1-T)}{p(1-\lambda)}(Y - g(1,0,X))
117 | 
118 |     & - \frac{(1-G^{\mathrm{g}})T}{(1-p)\lambda} (Y-g(0,1,X)) 
119 | 
120 |     & + \frac{(1-G^{\mathrm{g}})(1-T)}{(1-p)(1-\lambda)} (Y-g(0,0,X))
121 | 
122 |     =\; &\tilde{\psi}_a(W; \eta) \theta + \tilde{\psi}_b(W; \eta)
123 | 
124 | with :math:`\eta=(g, m, p, \lambda)`, where :math:`p_0 = \mathbb{E}[G^{\mathrm{g}}]` and :math:`\lambda_0 = \mathbb{E}[T]` are estimated on the subsample.
125 | Remark that this will result in a similar score, but just uses slightly different normalization.


--------------------------------------------------------------------------------
/doc/guide/models/ssm/ssm_models.inc:
--------------------------------------------------------------------------------
  1 | .. include:: /guide/models/ssm/ssm.rst
  2 | 
  3 | .. _ssm-mar-model:
  4 | 
  5 | Missingness at Random
  6 | *********************
  7 | 
  8 | Consider the following two additional assumptions for the sample selection model:
  9 | 
 10 | - **Cond. Independence of Selection:** :math:`Y_i(d) \perp S_i|D_i=d, X_i\quad a.s.` for :math:`d=0,1`
 11 | - **Common Support:** :math:`P(D_i=1|X_i)>0` and :math:`P(S_i=1|D_i=d, X_i)>0` for :math:`d=0,1`
 12 | 
 13 | such that outcomes are missing at random (for the score see :ref:`Scores <ssm-mar-score>`).
 14 | 
 15 | ``DoubleMLSSM`` implements sample selection models. The score ``score='missing-at-random'`` refers to the correponding score
 16 | relying on the assumptions above. The ``DoubleMLData`` object has to be defined with the additional argument ``s_col`` for the selection indicator.
 17 | Estimation is conducted via its ``fit()`` method:
 18 | 
 19 | .. tab-set::
 20 | 
 21 |     .. tab-item:: Python
 22 |         :sync: py
 23 | 
 24 |         .. ipython:: python
 25 |             :okwarning:
 26 | 
 27 |             import numpy as np
 28 |             from sklearn.linear_model import LassoCV, LogisticRegressionCV
 29 |             from doubleml.irm.datasets import make_ssm_data
 30 |             import doubleml as dml
 31 | 
 32 |             np.random.seed(42)
 33 |             n_obs = 2000
 34 |             df = make_ssm_data(n_obs=n_obs, mar=True, return_type='DataFrame')
 35 |             dml_data = dml.DoubleMLSSMData(df, 'y', 'd', s_col='s')
 36 | 
 37 |             ml_g = LassoCV()
 38 |             ml_m = LogisticRegressionCV(penalty='l1', solver='liblinear')
 39 |             ml_pi = LogisticRegressionCV(penalty='l1', solver='liblinear')
 40 |             
 41 |             dml_ssm = dml.DoubleMLSSM(dml_data, ml_g, ml_m, ml_pi, score='missing-at-random')
 42 |             dml_ssm.fit()
 43 |             print(dml_ssm)
 44 | 
 45 |     .. tab-item:: R
 46 |         :sync: r
 47 | 
 48 |         .. jupyter-execute::
 49 | 
 50 |             library(DoubleML)
 51 |             library(mlr3)
 52 |             library(data.table)
 53 | 
 54 |             set.seed(3141)
 55 |             n_obs = 2000
 56 |             df = make_ssm_data(n_obs=n_obs, mar=TRUE, return_type="data.table")
 57 |             dml_data = DoubleMLData$new(df, y_col="y", d_cols="d", s_col="s")
 58 | 
 59 |             ml_g = lrn("regr.cv_glmnet", nfolds = 5, s = "lambda.min")
 60 |             ml_m = lrn("classif.cv_glmnet", nfolds = 5, s = "lambda.min")
 61 |             ml_pi = lrn("classif.cv_glmnet", nfolds = 5, s = "lambda.min")
 62 |             
 63 |             dml_ssm = DoubleMLSSM$new(dml_data, ml_g, ml_m, ml_pi, score="missing-at-random")
 64 |             dml_ssm$fit()
 65 |             print(dml_ssm)
 66 | 
 67 | 
 68 | .. _ssm-nr-model:
 69 | 
 70 | Nonignorable Nonresponse
 71 | ************************
 72 | 
 73 | When sample selection or outcome attriction is realated to unobservables, identification generally requires an instrument for the selection indicator :math:`S_i`.
 74 | Consider the following additional assumptions for the instrumental variable:
 75 | 
 76 | - **Cond. Correlation:** :math:`\exists Z: \mathbb{E}[Z\cdot S|D,X] \neq 0`
 77 | - **Cond. Independence:** :math:`Y_i(d,z)=Y_i(d)` and :math:`Y_i \perp Z_i|D_i=d, X_i\quad a.s.` for :math:`d=0,1`
 78 | 
 79 | This requires the instrumental variable :math:`Z_i`, which must not affect :math:`Y_i` or be associated
 80 | with unobservables affecting :math:`Y_i` conditional on :math:`D_i` and :math:`X_i`. Further, the selection is determined via 
 81 | a (unknown) threshold model:
 82 | 
 83 | - **Threshold:** :math:`S_i = 1\{V_i \le \xi(D,X,Z)\}` where :math:`\xi` is a general function and :math:`V_i` is a scalar with strictly monotonic cumulative distribution function conditional on :math:`X_i`.
 84 | - **Cond. Independence:** :math:`V_i \perp (Z_i, D_i)|X_i`.
 85 | 
 86 | Let :math:`\Pi_i := P(S_i=1|D_i, X_i, Z_i)` denote the selection probability.
 87 | Additionally, the following assumptions are required:
 88 | 
 89 | - **Common Support for Treatment:** :math:`P(D_i=1|X_i, \Pi)>0`
 90 | - **Cond. Effect Homogeneity:** :math:`\mathbb{E}[Y_i(1)-Y_i(0)|S_i=1, X_i=x, V_i=v] = \mathbb{E}[Y_i(1)-Y_i(0)|X_i=x, V_i=v]`
 91 | - **Common Support for Selection:** :math:`P(S_i=1|D_i=d, X_i=x, Z_i=z)>0\quad a.s.` for :math:`d=0,1`
 92 | 
 93 | For further details, see `Bia, Huber and Lafférs (2023) <https://doi.org/10.1080/07350015.2023.2271071>`_.
 94 | 
 95 | .. figure:: /guide/figures/py_ssm.svg
 96 |    :width: 400
 97 |    :alt: DAG
 98 |    :align: center
 99 | 
100 |    Causal paths under nonignorable nonresponse
101 |     
102 | 
103 | ``DoubleMLSSM`` implements sample selection models. The score ``score='nonignorable'`` refers to the correponding score
104 | relying on the assumptions above. The ``DoubleMLData`` object has to be defined with the additional argument ``s_col`` for the selection indicator
105 | and ``z_cols`` for the instrument.
106 | Estimation is conducted via its ``fit()`` method:
107 | 
108 | .. tab-set::
109 | 
110 |     .. tab-item:: Python
111 |         :sync: py
112 | 
113 |         .. ipython:: python
114 |             :okwarning:
115 | 
116 |             import numpy as np
117 |             from sklearn.linear_model import LassoCV, LogisticRegressionCV
118 |             from doubleml.irm.datasets import make_ssm_data
119 |             import doubleml as dml
120 | 
121 |             np.random.seed(42)
122 |             n_obs = 2000
123 |             df = make_ssm_data(n_obs=n_obs, mar=False, return_type='DataFrame')
124 |             dml_data = dml.DoubleMLSSMData(df, 'y', 'd', z_cols='z', s_col='s')
125 | 
126 |             ml_g = LassoCV()
127 |             ml_m = LogisticRegressionCV(penalty='l1', solver='liblinear')
128 |             ml_pi = LogisticRegressionCV(penalty='l1', solver='liblinear')
129 |             
130 |             dml_ssm = dml.DoubleMLSSM(dml_data, ml_g, ml_m, ml_pi, score='nonignorable')
131 |             dml_ssm.fit()
132 |             print(dml_ssm)
133 | 
134 |     .. tab-item:: R
135 |         :sync: r
136 | 
137 |         .. jupyter-execute::
138 | 
139 |             library(DoubleML)
140 |             library(mlr3)
141 |             library(data.table)
142 | 
143 |             set.seed(3141)
144 |             n_obs = 2000
145 |             df = make_ssm_data(n_obs=n_obs, mar=FALSE, return_type="data.table")
146 |             dml_data = DoubleMLData$new(df, y_col="y", d_cols="d", z_cols = "z", s_col="s")
147 |             
148 |             ml_g = lrn("regr.cv_glmnet", nfolds = 5, s = "lambda.min")
149 |             ml_m = lrn("classif.cv_glmnet", nfolds = 5, s = "lambda.min")
150 |             ml_pi = lrn("classif.cv_glmnet", nfolds = 5, s = "lambda.min")
151 |             
152 |             dml_ssm = DoubleMLSSM$new(dml_data, ml_g, ml_m, ml_pi, score="nonignorable")
153 |             dml_ssm$fit()
154 |             print(dml_ssm)


--------------------------------------------------------------------------------