├── .gitignore
├── ATE_Estimation_with_Machine_Learning.ipynb
├── IV_Strategies.ipynb
├── LICENSE
├── README.md
├── Sensitivity_Analysis.ipynb
├── data
    ├── ditella-crime-2004
    │   ├── CrimebyBlock.dta
    │   ├── DiTella_crime.csv
    │   ├── MonthlyPanel.dta
    │   ├── README
    │   ├── README~
    │   ├── WeeklyPanel.dta
    │   └── data_cleaning.ipynb
    ├── hbp_dbp.csv
    └── outvote_2020_data.csv
└── difference_in_differences.ipynb


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/ATE_Estimation_with_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "ATE-Estimation-with-Machine-Learning.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "toc_visible": true,
 10 |       "include_colab_link": true
 11 |     },
 12 |     "kernelspec": {
 13 |       "name": "python3",
 14 |       "display_name": "Python 3"
 15 |     },
 16 |     "language_info": {
 17 |       "name": "python"
 18 |     }
 19 |   },
 20 |   "cells": [
 21 |     {
 22 |       "cell_type": "markdown",
 23 |       "metadata": {
 24 |         "id": "view-in-github",
 25 |         "colab_type": "text"
 26 |       },
 27 |       "source": [
 28 |         "<a href=\"https://colab.research.google.com/github/vveitch/causality-tutorials/blob/main/ATE_Estimation_with_Machine_Learning.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 29 |       ]
 30 |     },
 31 |     {
 32 |       "cell_type": "markdown",
 33 |       "metadata": {
 34 |         "id": "QfZkNLUb4B-p"
 35 |       },
 36 |       "source": [
 37 |         "# ATT Estimation Tutorial\n",
 38 |         "\n",
 39 |         "This tutorial gives a short example for how to estimate average treatment effect on the treated using machine learning methods"
 40 |       ]
 41 |     },
 42 |     {
 43 |       "cell_type": "code",
 44 |       "metadata": {
 45 |         "id": "dS2X3Bq1-fxE"
 46 |       },
 47 |       "source": [
 48 |         "import numpy as np\n",
 49 |         "import pandas as pd\n",
 50 |         "import scipy as sp\n",
 51 |         "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
 52 |         "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
 53 |         "from sklearn.metrics import mean_squared_error, log_loss\n",
 54 |         "import sklearn\n",
 55 |         "import os"
 56 |       ],
 57 |       "execution_count": 6,
 58 |       "outputs": []
 59 |     },
 60 |     {
 61 |       "cell_type": "code",
 62 |       "metadata": {
 63 |         "id": "nxJ46X9cFJ9X"
 64 |       },
 65 |       "source": [
 66 |         "RANDOM_SEED=42\n",
 67 |         "np.random.seed(RANDOM_SEED)"
 68 |       ],
 69 |       "execution_count": 7,
 70 |       "outputs": []
 71 |     },
 72 |     {
 73 |       "cell_type": "markdown",
 74 |       "metadata": {
 75 |         "id": "yPbJeayiEs3u"
 76 |       },
 77 |       "source": [
 78 |         "##Load and Format LaLonde Observational Data"
 79 |       ]
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "metadata": {
 84 |         "id": "2AC9TPko-hbt"
 85 |       },
 86 |       "source": [
 87 |         "def make_data_lalonde(df):\n",
 88 |         "    df_new = df.drop(['nodegree'], axis=1)\n",
 89 |         "    df_new['pos74'] = (df_new['RE74'] > 0).astype(int)\n",
 90 |         "    df_new['pos75'] = (df_new['RE75'] > 0).astype(int)\n",
 91 |         "    df_new['treatment'] = df_new['treatment'].astype(int)\n",
 92 |         "    return df_new\n",
 93 |         "\n",
 94 |         "\n",
 95 |         "col_names = ['treatment', 'age', 'education', 'black',\n",
 96 |         "             'hispanic', 'married', 'nodegree', 'RE74', 'RE75', 'RE78']\n",
 97 |         "control = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/psid_controls.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n",
 98 |         "treatment = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/nswre74_treated.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n",
 99 |         "\n",
100 |         "lalonde1 = pd.concat([control, treatment]).reset_index(drop=True)\n",
101 |         "lalonde1 = make_data_lalonde(lalonde1)"
102 |       ],
103 |       "execution_count": 8,
104 |       "outputs": []
105 |     },
106 |     {
107 |       "cell_type": "code",
108 |       "metadata": {
109 |         "colab": {
110 |           "base_uri": "https://localhost:8080/",
111 |           "height": 203
112 |         },
113 |         "id": "-A1LX6-t-hZD",
114 |         "outputId": "b0e276e2-dce3-424d-ffc7-e2b992ad62ec"
115 |       },
116 |       "source": [
117 |         "lalonde1.head()"
118 |       ],
119 |       "execution_count": 9,
120 |       "outputs": [
121 |         {
122 |           "output_type": "execute_result",
123 |           "data": {
124 |             "text/html": [
125 |               "<div>\n",
126 |               "<style scoped>\n",
127 |               "    .dataframe tbody tr th:only-of-type {\n",
128 |               "        vertical-align: middle;\n",
129 |               "    }\n",
130 |               "\n",
131 |               "    .dataframe tbody tr th {\n",
132 |               "        vertical-align: top;\n",
133 |               "    }\n",
134 |               "\n",
135 |               "    .dataframe thead th {\n",
136 |               "        text-align: right;\n",
137 |               "    }\n",
138 |               "</style>\n",
139 |               "<table border=\"1\" class=\"dataframe\">\n",
140 |               "  <thead>\n",
141 |               "    <tr style=\"text-align: right;\">\n",
142 |               "      <th></th>\n",
143 |               "      <th>treatment</th>\n",
144 |               "      <th>age</th>\n",
145 |               "      <th>education</th>\n",
146 |               "      <th>black</th>\n",
147 |               "      <th>hispanic</th>\n",
148 |               "      <th>married</th>\n",
149 |               "      <th>RE74</th>\n",
150 |               "      <th>RE75</th>\n",
151 |               "      <th>RE78</th>\n",
152 |               "      <th>pos74</th>\n",
153 |               "      <th>pos75</th>\n",
154 |               "    </tr>\n",
155 |               "  </thead>\n",
156 |               "  <tbody>\n",
157 |               "    <tr>\n",
158 |               "      <th>0</th>\n",
159 |               "      <td>0</td>\n",
160 |               "      <td>47.0</td>\n",
161 |               "      <td>12.0</td>\n",
162 |               "      <td>0.0</td>\n",
163 |               "      <td>0.0</td>\n",
164 |               "      <td>0.0</td>\n",
165 |               "      <td>0.0</td>\n",
166 |               "      <td>0.0</td>\n",
167 |               "      <td>0.0</td>\n",
168 |               "      <td>0</td>\n",
169 |               "      <td>0</td>\n",
170 |               "    </tr>\n",
171 |               "    <tr>\n",
172 |               "      <th>1</th>\n",
173 |               "      <td>0</td>\n",
174 |               "      <td>50.0</td>\n",
175 |               "      <td>12.0</td>\n",
176 |               "      <td>1.0</td>\n",
177 |               "      <td>0.0</td>\n",
178 |               "      <td>1.0</td>\n",
179 |               "      <td>0.0</td>\n",
180 |               "      <td>0.0</td>\n",
181 |               "      <td>0.0</td>\n",
182 |               "      <td>0</td>\n",
183 |               "      <td>0</td>\n",
184 |               "    </tr>\n",
185 |               "    <tr>\n",
186 |               "      <th>2</th>\n",
187 |               "      <td>0</td>\n",
188 |               "      <td>44.0</td>\n",
189 |               "      <td>12.0</td>\n",
190 |               "      <td>0.0</td>\n",
191 |               "      <td>0.0</td>\n",
192 |               "      <td>0.0</td>\n",
193 |               "      <td>0.0</td>\n",
194 |               "      <td>0.0</td>\n",
195 |               "      <td>0.0</td>\n",
196 |               "      <td>0</td>\n",
197 |               "      <td>0</td>\n",
198 |               "    </tr>\n",
199 |               "    <tr>\n",
200 |               "      <th>3</th>\n",
201 |               "      <td>0</td>\n",
202 |               "      <td>28.0</td>\n",
203 |               "      <td>12.0</td>\n",
204 |               "      <td>1.0</td>\n",
205 |               "      <td>0.0</td>\n",
206 |               "      <td>1.0</td>\n",
207 |               "      <td>0.0</td>\n",
208 |               "      <td>0.0</td>\n",
209 |               "      <td>0.0</td>\n",
210 |               "      <td>0</td>\n",
211 |               "      <td>0</td>\n",
212 |               "    </tr>\n",
213 |               "    <tr>\n",
214 |               "      <th>4</th>\n",
215 |               "      <td>0</td>\n",
216 |               "      <td>54.0</td>\n",
217 |               "      <td>12.0</td>\n",
218 |               "      <td>0.0</td>\n",
219 |               "      <td>0.0</td>\n",
220 |               "      <td>1.0</td>\n",
221 |               "      <td>0.0</td>\n",
222 |               "      <td>0.0</td>\n",
223 |               "      <td>0.0</td>\n",
224 |               "      <td>0</td>\n",
225 |               "      <td>0</td>\n",
226 |               "    </tr>\n",
227 |               "  </tbody>\n",
228 |               "</table>\n",
229 |               "</div>"
230 |             ],
231 |             "text/plain": [
232 |               "   treatment   age  education  black  hispanic  ...  RE74  RE75  RE78  pos74  pos75\n",
233 |               "0          0  47.0       12.0    0.0       0.0  ...   0.0   0.0   0.0      0      0\n",
234 |               "1          0  50.0       12.0    1.0       0.0  ...   0.0   0.0   0.0      0      0\n",
235 |               "2          0  44.0       12.0    0.0       0.0  ...   0.0   0.0   0.0      0      0\n",
236 |               "3          0  28.0       12.0    1.0       0.0  ...   0.0   0.0   0.0      0      0\n",
237 |               "4          0  54.0       12.0    0.0       0.0  ...   0.0   0.0   0.0      0      0\n",
238 |               "\n",
239 |               "[5 rows x 11 columns]"
240 |             ]
241 |           },
242 |           "metadata": {},
243 |           "execution_count": 9
244 |         }
245 |       ]
246 |     },
247 |     {
248 |       "cell_type": "code",
249 |       "metadata": {
250 |         "id": "APOqpHmrOGzo"
251 |       },
252 |       "source": [
253 |         "confounders = lalonde1.drop(columns=['RE78', 'treatment'])\n",
254 |         "outcome = lalonde1['RE78']\n",
255 |         "treatment = lalonde1['treatment']"
256 |       ],
257 |       "execution_count": 10,
258 |       "outputs": []
259 |     },
260 |     {
261 |       "cell_type": "markdown",
262 |       "metadata": {
263 |         "id": "C576dWRsa3ad"
264 |       },
265 |       "source": [
266 |         "## Specify Nuisance Function Models\n",
267 |         "\n",
268 |         "The next step is to specify models for the conditional expected outcome and propensity score"
269 |       ]
270 |     },
271 |     {
272 |       "cell_type": "code",
273 |       "metadata": {
274 |         "colab": {
275 |           "base_uri": "https://localhost:8080/"
276 |         },
277 |         "id": "qyOhSZRQRb8W",
278 |         "outputId": "7df1d854-c13f-4f93-ec7c-4977df6ad283"
279 |       },
280 |       "source": [
281 |         "# specify a model for the conditional expected outcome\n",
282 |         "\n",
283 |         "# make a function that returns a sklearn model for later use in k-folding\n",
284 |         "def make_Q_model():\n",
285 |         "  return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=500, max_depth=None)\n",
286 |         "Q_model = make_Q_model()\n",
287 |         "\n",
288 |         "# Sanity check that chosen model actually improves test error\n",
289 |         "# A real analysis should give substantial attention to model selection and validation \n",
290 |         "\n",
291 |         "X_w_treatment = confounders.copy()\n",
292 |         "X_w_treatment[\"treatment\"] = treatment\n",
293 |         "\n",
294 |         "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
295 |         "Q_model.fit(X_train, y_train)\n",
296 |         "y_pred = Q_model.predict(X_test)\n",
297 |         "\n",
298 |         "test_mse=mean_squared_error(y_pred, y_test)\n",
299 |         "print(f\"Test MSE of fit model {test_mse}\") \n",
300 |         "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
301 |         "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
302 |       ],
303 |       "execution_count": 11,
304 |       "outputs": [
305 |         {
306 |           "output_type": "stream",
307 |           "name": "stdout",
308 |           "text": [
309 |             "Test MSE of fit model 105637760.68507269\n",
310 |             "Test MSE of no-covariate model 246319790.55062827\n"
311 |           ]
312 |         }
313 |       ]
314 |     },
315 |     {
316 |       "cell_type": "code",
317 |       "metadata": {
318 |         "colab": {
319 |           "base_uri": "https://localhost:8080/"
320 |         },
321 |         "id": "uq6eZEBXbsaI",
322 |         "outputId": "974c356c-07f3-4573-f8c3-b83400c82169"
323 |       },
324 |       "source": [
325 |         "# specify a model for the propensity score\n",
326 |         "\n",
327 |         "def make_g_model():\n",
328 |         "#  return LogisticRegression(max_iter=1000)\n",
329 |         "  return RandomForestClassifier(n_estimators=100, max_depth=5)\n",
330 |         "\n",
331 |         "g_model = make_g_model()\n",
332 |         "# Sanity check that chosen model actually improves test error\n",
333 |         "# A real analysis should give substantial attention to model selection and validation \n",
334 |         "\n",
335 |         "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n",
336 |         "g_model.fit(X_train, a_train)\n",
337 |         "a_pred = g_model.predict_proba(X_test)[:,1]\n",
338 |         "\n",
339 |         "test_ce=log_loss(a_test, a_pred)\n",
340 |         "print(f\"Test CE of fit model {test_ce}\") \n",
341 |         "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
342 |         "print(f\"Test CE of no-covariate model {baseline_ce}\")"
343 |       ],
344 |       "execution_count": 12,
345 |       "outputs": [
346 |         {
347 |           "output_type": "stream",
348 |           "name": "stdout",
349 |           "text": [
350 |             "Test CE of fit model 0.07789407933364972\n",
351 |             "Test CE of no-covariate model 0.21817471356014154\n"
352 |           ]
353 |         }
354 |       ]
355 |     },
356 |     {
357 |       "cell_type": "markdown",
358 |       "metadata": {
359 |         "id": "2RkvV_4_dFWo"
360 |       },
361 |       "source": [
362 |         "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
363 |       ]
364 |     },
365 |     {
366 |       "cell_type": "code",
367 |       "metadata": {
368 |         "id": "KA0AsEGJ_X3b"
369 |       },
370 |       "source": [
371 |         "# helper functions to implement the cross fitting\n",
372 |         "\n",
373 |         "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
374 |         "    \"\"\"\n",
375 |         "    Implements K fold cross-fitting for the model predicting the treatment A. \n",
376 |         "    That is, \n",
377 |         "    1. Split data into K folds\n",
378 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
379 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
380 |         "    Returns an array containing the predictions  \n",
381 |         "\n",
382 |         "    Args:\n",
383 |         "    model: function that returns sklearn model (which implements fit and predict_prob)\n",
384 |         "    X: dataframe of variables to adjust for\n",
385 |         "    A: array of treatments\n",
386 |         "    n_splits: number of splits to use\n",
387 |         "    \"\"\"\n",
388 |         "    predictions = np.full_like(A, np.nan, dtype=float)\n",
389 |         "    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
390 |         "    \n",
391 |         "    for train_index, test_index in kf.split(X, A):\n",
392 |         "      X_train = X.loc[train_index]\n",
393 |         "      A_train = A.loc[train_index]\n",
394 |         "      g = make_model()\n",
395 |         "      g.fit(X_train, A_train)\n",
396 |         "\n",
397 |         "      # get predictions for split\n",
398 |         "      predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
399 |         "\n",
400 |         "    assert np.isnan(predictions).sum() == 0\n",
401 |         "    return predictions\n",
402 |         "\n",
403 |         "\n",
404 |         "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
405 |         "    \"\"\"\n",
406 |         "    Implements K fold cross-fitting for the model predicting the outcome Y. \n",
407 |         "    That is, \n",
408 |         "    1. Split data into K folds\n",
409 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
410 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
411 |         "    Returns two arrays containing the predictions for all units untreated, all units treated  \n",
412 |         "\n",
413 |         "    Args:\n",
414 |         "    model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
415 |         "    X: dataframe of variables to adjust for\n",
416 |         "    y: array of outcomes\n",
417 |         "    A: array of treatments\n",
418 |         "    n_splits: number of splits to use\n",
419 |         "    output_type: type of outcome, \"binary\" or \"continuous\"\n",
420 |         "\n",
421 |         "    \"\"\"\n",
422 |         "    predictions0 = np.full_like(A, np.nan, dtype=float)\n",
423 |         "    predictions1 = np.full_like(y, np.nan, dtype=float)\n",
424 |         "    if output_type == 'binary':\n",
425 |         "      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
426 |         "    elif output_type == 'continuous':\n",
427 |         "      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
428 |         "\n",
429 |         "    # include the treatment as input feature\n",
430 |         "    X_w_treatment = X.copy()\n",
431 |         "    X_w_treatment[\"A\"] = A\n",
432 |         "\n",
433 |         "    # for predicting effect under treatment / control status for each data point \n",
434 |         "    X0 = X_w_treatment.copy()\n",
435 |         "    X0[\"A\"] = 0\n",
436 |         "    X1 = X_w_treatment.copy()\n",
437 |         "    X1[\"A\"] = 1\n",
438 |         "\n",
439 |         "    \n",
440 |         "    for train_index, test_index in kf.split(X_w_treatment, y):\n",
441 |         "      X_train = X_w_treatment.loc[train_index]\n",
442 |         "      y_train = y.loc[train_index]\n",
443 |         "      q = make_model()\n",
444 |         "      q.fit(X_train, y_train)\n",
445 |         "\n",
446 |         "      if output_type =='binary':\n",
447 |         "        predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
448 |         "        predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
449 |         "      elif output_type == 'continuous':\n",
450 |         "        predictions0[test_index] = q.predict(X0.loc[test_index])\n",
451 |         "        predictions1[test_index] = q.predict(X1.loc[test_index])\n",
452 |         "\n",
453 |         "    assert np.isnan(predictions0).sum() == 0\n",
454 |         "    assert np.isnan(predictions1).sum() == 0\n",
455 |         "    return predictions0, predictions1"
456 |       ],
457 |       "execution_count": 13,
458 |       "outputs": []
459 |     },
460 |     {
461 |       "cell_type": "code",
462 |       "metadata": {
463 |         "id": "wVcE6pRQeMNf"
464 |       },
465 |       "source": [
466 |         "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
467 |       ],
468 |       "execution_count": 14,
469 |       "outputs": []
470 |     },
471 |     {
472 |       "cell_type": "code",
473 |       "metadata": {
474 |         "id": "GLEHlLLdWSh9"
475 |       },
476 |       "source": [
477 |         "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
478 |       ],
479 |       "execution_count": 15,
480 |       "outputs": []
481 |     },
482 |     {
483 |       "cell_type": "code",
484 |       "metadata": {
485 |         "colab": {
486 |           "base_uri": "https://localhost:8080/",
487 |           "height": 203
488 |         },
489 |         "id": "_NVCV0q0g8wQ",
490 |         "outputId": "b638a74f-1c3f-4860-bd3d-af9eb397832a"
491 |       },
492 |       "source": [
493 |         "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
494 |         "data_and_nuisance_estimates.head()"
495 |       ],
496 |       "execution_count": 16,
497 |       "outputs": [
498 |         {
499 |           "output_type": "execute_result",
500 |           "data": {
501 |             "text/html": [
502 |               "<div>\n",
503 |               "<style scoped>\n",
504 |               "    .dataframe tbody tr th:only-of-type {\n",
505 |               "        vertical-align: middle;\n",
506 |               "    }\n",
507 |               "\n",
508 |               "    .dataframe tbody tr th {\n",
509 |               "        vertical-align: top;\n",
510 |               "    }\n",
511 |               "\n",
512 |               "    .dataframe thead th {\n",
513 |               "        text-align: right;\n",
514 |               "    }\n",
515 |               "</style>\n",
516 |               "<table border=\"1\" class=\"dataframe\">\n",
517 |               "  <thead>\n",
518 |               "    <tr style=\"text-align: right;\">\n",
519 |               "      <th></th>\n",
520 |               "      <th>g</th>\n",
521 |               "      <th>Q0</th>\n",
522 |               "      <th>Q1</th>\n",
523 |               "      <th>A</th>\n",
524 |               "      <th>Y</th>\n",
525 |               "    </tr>\n",
526 |               "  </thead>\n",
527 |               "  <tbody>\n",
528 |               "    <tr>\n",
529 |               "      <th>0</th>\n",
530 |               "      <td>0.313350</td>\n",
531 |               "      <td>95.549536</td>\n",
532 |               "      <td>1571.922518</td>\n",
533 |               "      <td>0</td>\n",
534 |               "      <td>0.0</td>\n",
535 |               "    </tr>\n",
536 |               "    <tr>\n",
537 |               "      <th>1</th>\n",
538 |               "      <td>0.191958</td>\n",
539 |               "      <td>2032.024647</td>\n",
540 |               "      <td>3895.070486</td>\n",
541 |               "      <td>0</td>\n",
542 |               "      <td>0.0</td>\n",
543 |               "    </tr>\n",
544 |               "    <tr>\n",
545 |               "      <th>2</th>\n",
546 |               "      <td>0.470788</td>\n",
547 |               "      <td>29.940432</td>\n",
548 |               "      <td>1731.498259</td>\n",
549 |               "      <td>0</td>\n",
550 |               "      <td>0.0</td>\n",
551 |               "    </tr>\n",
552 |               "    <tr>\n",
553 |               "      <th>3</th>\n",
554 |               "      <td>0.517957</td>\n",
555 |               "      <td>11037.487272</td>\n",
556 |               "      <td>9030.776610</td>\n",
557 |               "      <td>0</td>\n",
558 |               "      <td>0.0</td>\n",
559 |               "    </tr>\n",
560 |               "    <tr>\n",
561 |               "      <th>4</th>\n",
562 |               "      <td>0.014246</td>\n",
563 |               "      <td>0.000000</td>\n",
564 |               "      <td>2139.630960</td>\n",
565 |               "      <td>0</td>\n",
566 |               "      <td>0.0</td>\n",
567 |               "    </tr>\n",
568 |               "  </tbody>\n",
569 |               "</table>\n",
570 |               "</div>"
571 |             ],
572 |             "text/plain": [
573 |               "          g            Q0           Q1  A    Y\n",
574 |               "0  0.313350     95.549536  1571.922518  0  0.0\n",
575 |               "1  0.191958   2032.024647  3895.070486  0  0.0\n",
576 |               "2  0.470788     29.940432  1731.498259  0  0.0\n",
577 |               "3  0.517957  11037.487272  9030.776610  0  0.0\n",
578 |               "4  0.014246      0.000000  2139.630960  0  0.0"
579 |             ]
580 |           },
581 |           "metadata": {},
582 |           "execution_count": 16
583 |         }
584 |       ]
585 |     },
586 |     {
587 |       "cell_type": "markdown",
588 |       "metadata": {
589 |         "id": "VNhM7URdgzQB"
590 |       },
591 |       "source": [
592 |         "## Combine predicted values and data into estimate of ATT"
593 |       ]
594 |     },
595 |     {
596 |       "cell_type": "code",
597 |       "metadata": {
598 |         "id": "J-vONC5ejwh2"
599 |       },
600 |       "source": [
601 |         "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
602 |         "  \"\"\"\n",
603 |         "  # Double ML estimator for the ATT\n",
604 |         "  This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n",
605 |         "  \"\"\"\n",
606 |         "\n",
607 |         "  if prob_t is None:\n",
608 |         "    prob_t = A.mean() # estimate marginal probability of treatment\n",
609 |         "\n",
610 |         "  tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n",
611 |         "  \n",
612 |         "  scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n",
613 |         "  n = Y.shape[0] # number of observations\n",
614 |         "  std_hat = np.std(scores) / np.sqrt(n)\n",
615 |         "\n",
616 |         "  return tau_hat, std_hat\n"
617 |       ],
618 |       "execution_count": 17,
619 |       "outputs": []
620 |     },
621 |     {
622 |       "cell_type": "code",
623 |       "metadata": {
624 |         "id": "O_F5r0SSkzzK"
625 |       },
626 |       "source": [
627 |         "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
628 |         "  \"\"\"\n",
629 |         "  # Double ML estimator for the ATE\n",
630 |         "  \"\"\"\n",
631 |         "\n",
632 |         "  tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n",
633 |         "  \n",
634 |         "  scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n",
635 |         "  n = Y.shape[0] # number of observations\n",
636 |         "  std_hat = np.std(scores) / np.sqrt(n)\n",
637 |         "\n",
638 |         "  return tau_hat, std_hat\n"
639 |       ],
640 |       "execution_count": 18,
641 |       "outputs": []
642 |     },
643 |     {
644 |       "cell_type": "code",
645 |       "metadata": {
646 |         "colab": {
647 |           "base_uri": "https://localhost:8080/"
648 |         },
649 |         "id": "SjDj0F9Bm9uq",
650 |         "outputId": "fdef5c08-3829-400b-ea0e-1cb5dd01bc25"
651 |       },
652 |       "source": [
653 |         "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n",
654 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
655 |       ],
656 |       "execution_count": 19,
657 |       "outputs": [
658 |         {
659 |           "output_type": "stream",
660 |           "name": "stdout",
661 |           "text": [
662 |             "The estimate is 1300.9807431649592 pm 1622.6924287596182\n"
663 |           ]
664 |         }
665 |       ]
666 |     },
667 |     {
668 |       "cell_type": "code",
669 |       "metadata": {
670 |         "colab": {
671 |           "base_uri": "https://localhost:8080/"
672 |         },
673 |         "id": "vSaOp1HwlQ4i",
674 |         "outputId": "874e2ea0-dfc1-4594-9d45-b6663f69f163"
675 |       },
676 |       "source": [
677 |         "in_treated = data_and_nuisance_estimates['A']==1\n",
678 |         "treated_estimates = data_and_nuisance_estimates[in_treated]\n",
679 |         "tau_hat, std_hat = ate_aiptw(**treated_estimates)\n",
680 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
681 |       ],
682 |       "execution_count": 20,
683 |       "outputs": [
684 |         {
685 |           "output_type": "stream",
686 |           "name": "stdout",
687 |           "text": [
688 |             "The estimate is -33439.05484914103 pm 50637.28008886066\n"
689 |           ]
690 |         }
691 |       ]
692 |     },
693 |     {
694 |       "cell_type": "code",
695 |       "metadata": {
696 |         "colab": {
697 |           "base_uri": "https://localhost:8080/"
698 |         },
699 |         "id": "IOuJnlbEo8j_",
700 |         "outputId": "74678791-7163-41e6-f7a9-a04a7b669e81"
701 |       },
702 |       "source": [
703 |         "# The LaLonde data has severe overlap issues. Lets try computing the estimate restricted to a population with only reasonable propensity scores\n",
704 |         "g = data_and_nuisance_estimates['g']\n",
705 |         "in_overlap_popluation = ( g < 0.90)\n",
706 |         "overlap_data_and_nuisance = data_and_nuisance_estimates[in_overlap_popluation]\n",
707 |         "tau_hat, std_hat = att_aiptw(**overlap_data_and_nuisance)\n",
708 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
709 |       ],
710 |       "execution_count": 21,
711 |       "outputs": [
712 |         {
713 |           "output_type": "stream",
714 |           "name": "stdout",
715 |           "text": [
716 |             "The estimate is 572.1572812652179 pm 1501.516696945994\n"
717 |           ]
718 |         }
719 |       ]
720 |     },
721 |     {
722 |       "cell_type": "code",
723 |       "metadata": {
724 |         "id": "LnJppbQdjwVI"
725 |       },
726 |       "source": [
727 |         ""
728 |       ],
729 |       "execution_count": 22,
730 |       "outputs": []
731 |     }
732 |   ]
733 | }


--------------------------------------------------------------------------------
/IV_Strategies.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "cells": [
   3 |     {
   4 |       "cell_type": "markdown",
   5 |       "metadata": {
   6 |         "id": "view-in-github",
   7 |         "colab_type": "text"
   8 |       },
   9 |       "source": [
  10 |         "<a href=\"https://colab.research.google.com/github/vveitch/causality-tutorials/blob/main/IV_Strategies.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
  11 |       ]
  12 |     },
  13 |     {
  14 |       "cell_type": "markdown",
  15 |       "metadata": {
  16 |         "id": "3Pr7ijIYeO--"
  17 |       },
  18 |       "source": [
  19 |         "# LATE Estimation Tutorial"
  20 |       ]
  21 |     },
  22 |     {
  23 |       "cell_type": "markdown",
  24 |       "metadata": {
  25 |         "id": "kVIsqn30gqCx"
  26 |       },
  27 |       "source": [
  28 |         "This tutorial gives a short example for how to use instrument variable to estimate local average treatment effect using machine learning methods"
  29 |       ]
  30 |     },
  31 |     {
  32 |       "cell_type": "code",
  33 |       "execution_count": 1,
  34 |       "metadata": {
  35 |         "id": "Nv0YbKbGea6U"
  36 |       },
  37 |       "outputs": [],
  38 |       "source": [
  39 |         "import numpy as np\n",
  40 |         "import pandas as pd\n",
  41 |         "import sklearn\n",
  42 |         "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
  43 |         "from sklearn.linear_model import LogisticRegression\n",
  44 |         "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
  45 |         "from sklearn.metrics import mean_squared_error, log_loss\n",
  46 |         "import math"
  47 |       ]
  48 |     },
  49 |     {
  50 |       "cell_type": "code",
  51 |       "execution_count": 2,
  52 |       "metadata": {
  53 |         "id": "ONvBs_yvia3a"
  54 |       },
  55 |       "outputs": [],
  56 |       "source": [
  57 |         "RANDOM_SEED=0\n",
  58 |         "np.random.seed(RANDOM_SEED)"
  59 |       ]
  60 |     },
  61 |     {
  62 |       "cell_type": "markdown",
  63 |       "metadata": {
  64 |         "id": "yPbJeayiEs3u"
  65 |       },
  66 |       "source": [
  67 |         "##Load Outvote 2020 Observational Data"
  68 |       ]
  69 |     },
  70 |     {
  71 |       "cell_type": "markdown",
  72 |       "metadata": {
  73 |         "id": "2t60_xU_qORv"
  74 |       },
  75 |       "source": [
  76 |         "First, load the observational data."
  77 |       ]
  78 |     },
  79 |     {
  80 |       "cell_type": "code",
  81 |       "execution_count": 3,
  82 |       "metadata": {
  83 |         "colab": {
  84 |           "base_uri": "https://localhost:8080/",
  85 |           "height": 488
  86 |         },
  87 |         "id": "iCuOMjvsoWpm",
  88 |         "outputId": "5eac723f-847b-41f0-a01b-377ec7ec1261"
  89 |       },
  90 |       "outputs": [
  91 |         {
  92 |           "output_type": "execute_result",
  93 |           "data": {
  94 |             "text/plain": [
  95 |               "       voted_2020  messaged  queue_position  queue_length  \\\n",
  96 |               "0               1         1             1.0          19.0   \n",
  97 |               "1               1         1             6.0          19.0   \n",
  98 |               "2               0         1             8.0          19.0   \n",
  99 |               "3               1         1            10.0          19.0   \n",
 100 |               "4               1         1            18.0          19.0   \n",
 101 |               "...           ...       ...             ...           ...   \n",
 102 |               "81199           1         0             5.0           5.0   \n",
 103 |               "81200           1         0             2.0           3.0   \n",
 104 |               "81201           0         0             1.0           2.0   \n",
 105 |               "81202           1         1             2.0           9.0   \n",
 106 |               "81203           0         1             3.0           3.0   \n",
 107 |               "\n",
 108 |               "                      queue_id  voted_2018  voted_2016  is_Democrat  \\\n",
 109 |               "0        xgcwm279xcwkxjq4zxabo           0           0            0   \n",
 110 |               "1        xgcwm279xcwkxjq4zxabo           1           1            1   \n",
 111 |               "2        xgcwm279xcwkxjq4zxabo           1           0            1   \n",
 112 |               "3        xgcwm279xcwkxjq4zxabo           1           1            0   \n",
 113 |               "4        xgcwm279xcwkxjq4zxabo           0           1            1   \n",
 114 |               "...                        ...         ...         ...          ...   \n",
 115 |               "81199  0j3f3gyc3qspaukludfcpy4           1           1            0   \n",
 116 |               "81200   ixyu0l548p9dextzgjspdr           0           1            0   \n",
 117 |               "81201   ghclqszfr6qvz2i5yesrwo           0           1            0   \n",
 118 |               "81202   sscqhe1ttlh7fmkmuk231f           1           1            0   \n",
 119 |               "81203    41tse6pfdjlw0kwcm7rbo           0           0            0   \n",
 120 |               "\n",
 121 |               "       is_Republican  is_Male  is_Female  is_Married  is_Urban  is_Rural  \\\n",
 122 |               "0                  0        0          1           0         0         0   \n",
 123 |               "1                  0        1          0           0         0         0   \n",
 124 |               "2                  0        0          1           0         0         0   \n",
 125 |               "3                  0        1          0           0         0         0   \n",
 126 |               "4                  0        0          1           0         0         0   \n",
 127 |               "...              ...      ...        ...         ...       ...       ...   \n",
 128 |               "81199              0        0          1           1         0         0   \n",
 129 |               "81200              0        0          1           0         0         0   \n",
 130 |               "81201              0        0          1           0         0         0   \n",
 131 |               "81202              0        1          0           1         0         0   \n",
 132 |               "81203              0        1          0           0         0         0   \n",
 133 |               "\n",
 134 |               "       is_Battleground   age  \n",
 135 |               "0                    1  68.0  \n",
 136 |               "1                    0  52.0  \n",
 137 |               "2                    0  26.0  \n",
 138 |               "3                    1  39.0  \n",
 139 |               "4                    1  23.0  \n",
 140 |               "...                ...   ...  \n",
 141 |               "81199                1  54.0  \n",
 142 |               "81200                0  36.0  \n",
 143 |               "81201                1  90.0  \n",
 144 |               "81202                1  72.0  \n",
 145 |               "81203                1  72.0  \n",
 146 |               "\n",
 147 |               "[81204 rows x 16 columns]"
 148 |             ],
 149 |             "text/html": [
 150 |               "\n",
 151 |               "  <div id=\"df-640ed473-2aa5-43e0-a0d4-45657e4d03d9\">\n",
 152 |               "    <div class=\"colab-df-container\">\n",
 153 |               "      <div>\n",
 154 |               "<style scoped>\n",
 155 |               "    .dataframe tbody tr th:only-of-type {\n",
 156 |               "        vertical-align: middle;\n",
 157 |               "    }\n",
 158 |               "\n",
 159 |               "    .dataframe tbody tr th {\n",
 160 |               "        vertical-align: top;\n",
 161 |               "    }\n",
 162 |               "\n",
 163 |               "    .dataframe thead th {\n",
 164 |               "        text-align: right;\n",
 165 |               "    }\n",
 166 |               "</style>\n",
 167 |               "<table border=\"1\" class=\"dataframe\">\n",
 168 |               "  <thead>\n",
 169 |               "    <tr style=\"text-align: right;\">\n",
 170 |               "      <th></th>\n",
 171 |               "      <th>voted_2020</th>\n",
 172 |               "      <th>messaged</th>\n",
 173 |               "      <th>queue_position</th>\n",
 174 |               "      <th>queue_length</th>\n",
 175 |               "      <th>queue_id</th>\n",
 176 |               "      <th>voted_2018</th>\n",
 177 |               "      <th>voted_2016</th>\n",
 178 |               "      <th>is_Democrat</th>\n",
 179 |               "      <th>is_Republican</th>\n",
 180 |               "      <th>is_Male</th>\n",
 181 |               "      <th>is_Female</th>\n",
 182 |               "      <th>is_Married</th>\n",
 183 |               "      <th>is_Urban</th>\n",
 184 |               "      <th>is_Rural</th>\n",
 185 |               "      <th>is_Battleground</th>\n",
 186 |               "      <th>age</th>\n",
 187 |               "    </tr>\n",
 188 |               "  </thead>\n",
 189 |               "  <tbody>\n",
 190 |               "    <tr>\n",
 191 |               "      <th>0</th>\n",
 192 |               "      <td>1</td>\n",
 193 |               "      <td>1</td>\n",
 194 |               "      <td>1.0</td>\n",
 195 |               "      <td>19.0</td>\n",
 196 |               "      <td>xgcwm279xcwkxjq4zxabo</td>\n",
 197 |               "      <td>0</td>\n",
 198 |               "      <td>0</td>\n",
 199 |               "      <td>0</td>\n",
 200 |               "      <td>0</td>\n",
 201 |               "      <td>0</td>\n",
 202 |               "      <td>1</td>\n",
 203 |               "      <td>0</td>\n",
 204 |               "      <td>0</td>\n",
 205 |               "      <td>0</td>\n",
 206 |               "      <td>1</td>\n",
 207 |               "      <td>68.0</td>\n",
 208 |               "    </tr>\n",
 209 |               "    <tr>\n",
 210 |               "      <th>1</th>\n",
 211 |               "      <td>1</td>\n",
 212 |               "      <td>1</td>\n",
 213 |               "      <td>6.0</td>\n",
 214 |               "      <td>19.0</td>\n",
 215 |               "      <td>xgcwm279xcwkxjq4zxabo</td>\n",
 216 |               "      <td>1</td>\n",
 217 |               "      <td>1</td>\n",
 218 |               "      <td>1</td>\n",
 219 |               "      <td>0</td>\n",
 220 |               "      <td>1</td>\n",
 221 |               "      <td>0</td>\n",
 222 |               "      <td>0</td>\n",
 223 |               "      <td>0</td>\n",
 224 |               "      <td>0</td>\n",
 225 |               "      <td>0</td>\n",
 226 |               "      <td>52.0</td>\n",
 227 |               "    </tr>\n",
 228 |               "    <tr>\n",
 229 |               "      <th>2</th>\n",
 230 |               "      <td>0</td>\n",
 231 |               "      <td>1</td>\n",
 232 |               "      <td>8.0</td>\n",
 233 |               "      <td>19.0</td>\n",
 234 |               "      <td>xgcwm279xcwkxjq4zxabo</td>\n",
 235 |               "      <td>1</td>\n",
 236 |               "      <td>0</td>\n",
 237 |               "      <td>1</td>\n",
 238 |               "      <td>0</td>\n",
 239 |               "      <td>0</td>\n",
 240 |               "      <td>1</td>\n",
 241 |               "      <td>0</td>\n",
 242 |               "      <td>0</td>\n",
 243 |               "      <td>0</td>\n",
 244 |               "      <td>0</td>\n",
 245 |               "      <td>26.0</td>\n",
 246 |               "    </tr>\n",
 247 |               "    <tr>\n",
 248 |               "      <th>3</th>\n",
 249 |               "      <td>1</td>\n",
 250 |               "      <td>1</td>\n",
 251 |               "      <td>10.0</td>\n",
 252 |               "      <td>19.0</td>\n",
 253 |               "      <td>xgcwm279xcwkxjq4zxabo</td>\n",
 254 |               "      <td>1</td>\n",
 255 |               "      <td>1</td>\n",
 256 |               "      <td>0</td>\n",
 257 |               "      <td>0</td>\n",
 258 |               "      <td>1</td>\n",
 259 |               "      <td>0</td>\n",
 260 |               "      <td>0</td>\n",
 261 |               "      <td>0</td>\n",
 262 |               "      <td>0</td>\n",
 263 |               "      <td>1</td>\n",
 264 |               "      <td>39.0</td>\n",
 265 |               "    </tr>\n",
 266 |               "    <tr>\n",
 267 |               "      <th>4</th>\n",
 268 |               "      <td>1</td>\n",
 269 |               "      <td>1</td>\n",
 270 |               "      <td>18.0</td>\n",
 271 |               "      <td>19.0</td>\n",
 272 |               "      <td>xgcwm279xcwkxjq4zxabo</td>\n",
 273 |               "      <td>0</td>\n",
 274 |               "      <td>1</td>\n",
 275 |               "      <td>1</td>\n",
 276 |               "      <td>0</td>\n",
 277 |               "      <td>0</td>\n",
 278 |               "      <td>1</td>\n",
 279 |               "      <td>0</td>\n",
 280 |               "      <td>0</td>\n",
 281 |               "      <td>0</td>\n",
 282 |               "      <td>1</td>\n",
 283 |               "      <td>23.0</td>\n",
 284 |               "    </tr>\n",
 285 |               "    <tr>\n",
 286 |               "      <th>...</th>\n",
 287 |               "      <td>...</td>\n",
 288 |               "      <td>...</td>\n",
 289 |               "      <td>...</td>\n",
 290 |               "      <td>...</td>\n",
 291 |               "      <td>...</td>\n",
 292 |               "      <td>...</td>\n",
 293 |               "      <td>...</td>\n",
 294 |               "      <td>...</td>\n",
 295 |               "      <td>...</td>\n",
 296 |               "      <td>...</td>\n",
 297 |               "      <td>...</td>\n",
 298 |               "      <td>...</td>\n",
 299 |               "      <td>...</td>\n",
 300 |               "      <td>...</td>\n",
 301 |               "      <td>...</td>\n",
 302 |               "      <td>...</td>\n",
 303 |               "    </tr>\n",
 304 |               "    <tr>\n",
 305 |               "      <th>81199</th>\n",
 306 |               "      <td>1</td>\n",
 307 |               "      <td>0</td>\n",
 308 |               "      <td>5.0</td>\n",
 309 |               "      <td>5.0</td>\n",
 310 |               "      <td>0j3f3gyc3qspaukludfcpy4</td>\n",
 311 |               "      <td>1</td>\n",
 312 |               "      <td>1</td>\n",
 313 |               "      <td>0</td>\n",
 314 |               "      <td>0</td>\n",
 315 |               "      <td>0</td>\n",
 316 |               "      <td>1</td>\n",
 317 |               "      <td>1</td>\n",
 318 |               "      <td>0</td>\n",
 319 |               "      <td>0</td>\n",
 320 |               "      <td>1</td>\n",
 321 |               "      <td>54.0</td>\n",
 322 |               "    </tr>\n",
 323 |               "    <tr>\n",
 324 |               "      <th>81200</th>\n",
 325 |               "      <td>1</td>\n",
 326 |               "      <td>0</td>\n",
 327 |               "      <td>2.0</td>\n",
 328 |               "      <td>3.0</td>\n",
 329 |               "      <td>ixyu0l548p9dextzgjspdr</td>\n",
 330 |               "      <td>0</td>\n",
 331 |               "      <td>1</td>\n",
 332 |               "      <td>0</td>\n",
 333 |               "      <td>0</td>\n",
 334 |               "      <td>0</td>\n",
 335 |               "      <td>1</td>\n",
 336 |               "      <td>0</td>\n",
 337 |               "      <td>0</td>\n",
 338 |               "      <td>0</td>\n",
 339 |               "      <td>0</td>\n",
 340 |               "      <td>36.0</td>\n",
 341 |               "    </tr>\n",
 342 |               "    <tr>\n",
 343 |               "      <th>81201</th>\n",
 344 |               "      <td>0</td>\n",
 345 |               "      <td>0</td>\n",
 346 |               "      <td>1.0</td>\n",
 347 |               "      <td>2.0</td>\n",
 348 |               "      <td>ghclqszfr6qvz2i5yesrwo</td>\n",
 349 |               "      <td>0</td>\n",
 350 |               "      <td>1</td>\n",
 351 |               "      <td>0</td>\n",
 352 |               "      <td>0</td>\n",
 353 |               "      <td>0</td>\n",
 354 |               "      <td>1</td>\n",
 355 |               "      <td>0</td>\n",
 356 |               "      <td>0</td>\n",
 357 |               "      <td>0</td>\n",
 358 |               "      <td>1</td>\n",
 359 |               "      <td>90.0</td>\n",
 360 |               "    </tr>\n",
 361 |               "    <tr>\n",
 362 |               "      <th>81202</th>\n",
 363 |               "      <td>1</td>\n",
 364 |               "      <td>1</td>\n",
 365 |               "      <td>2.0</td>\n",
 366 |               "      <td>9.0</td>\n",
 367 |               "      <td>sscqhe1ttlh7fmkmuk231f</td>\n",
 368 |               "      <td>1</td>\n",
 369 |               "      <td>1</td>\n",
 370 |               "      <td>0</td>\n",
 371 |               "      <td>0</td>\n",
 372 |               "      <td>1</td>\n",
 373 |               "      <td>0</td>\n",
 374 |               "      <td>1</td>\n",
 375 |               "      <td>0</td>\n",
 376 |               "      <td>0</td>\n",
 377 |               "      <td>1</td>\n",
 378 |               "      <td>72.0</td>\n",
 379 |               "    </tr>\n",
 380 |               "    <tr>\n",
 381 |               "      <th>81203</th>\n",
 382 |               "      <td>0</td>\n",
 383 |               "      <td>1</td>\n",
 384 |               "      <td>3.0</td>\n",
 385 |               "      <td>3.0</td>\n",
 386 |               "      <td>41tse6pfdjlw0kwcm7rbo</td>\n",
 387 |               "      <td>0</td>\n",
 388 |               "      <td>0</td>\n",
 389 |               "      <td>0</td>\n",
 390 |               "      <td>0</td>\n",
 391 |               "      <td>1</td>\n",
 392 |               "      <td>0</td>\n",
 393 |               "      <td>0</td>\n",
 394 |               "      <td>0</td>\n",
 395 |               "      <td>0</td>\n",
 396 |               "      <td>1</td>\n",
 397 |               "      <td>72.0</td>\n",
 398 |               "    </tr>\n",
 399 |               "  </tbody>\n",
 400 |               "</table>\n",
 401 |               "<p>81204 rows × 16 columns</p>\n",
 402 |               "</div>\n",
 403 |               "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-640ed473-2aa5-43e0-a0d4-45657e4d03d9')\"\n",
 404 |               "              title=\"Convert this dataframe to an interactive table.\"\n",
 405 |               "              style=\"display:none;\">\n",
 406 |               "        \n",
 407 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
 408 |               "       width=\"24px\">\n",
 409 |               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
 410 |               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
 411 |               "  </svg>\n",
 412 |               "      </button>\n",
 413 |               "      \n",
 414 |               "  <style>\n",
 415 |               "    .colab-df-container {\n",
 416 |               "      display:flex;\n",
 417 |               "      flex-wrap:wrap;\n",
 418 |               "      gap: 12px;\n",
 419 |               "    }\n",
 420 |               "\n",
 421 |               "    .colab-df-convert {\n",
 422 |               "      background-color: #E8F0FE;\n",
 423 |               "      border: none;\n",
 424 |               "      border-radius: 50%;\n",
 425 |               "      cursor: pointer;\n",
 426 |               "      display: none;\n",
 427 |               "      fill: #1967D2;\n",
 428 |               "      height: 32px;\n",
 429 |               "      padding: 0 0 0 0;\n",
 430 |               "      width: 32px;\n",
 431 |               "    }\n",
 432 |               "\n",
 433 |               "    .colab-df-convert:hover {\n",
 434 |               "      background-color: #E2EBFA;\n",
 435 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
 436 |               "      fill: #174EA6;\n",
 437 |               "    }\n",
 438 |               "\n",
 439 |               "    [theme=dark] .colab-df-convert {\n",
 440 |               "      background-color: #3B4455;\n",
 441 |               "      fill: #D2E3FC;\n",
 442 |               "    }\n",
 443 |               "\n",
 444 |               "    [theme=dark] .colab-df-convert:hover {\n",
 445 |               "      background-color: #434B5C;\n",
 446 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
 447 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
 448 |               "      fill: #FFFFFF;\n",
 449 |               "    }\n",
 450 |               "  </style>\n",
 451 |               "\n",
 452 |               "      <script>\n",
 453 |               "        const buttonEl =\n",
 454 |               "          document.querySelector('#df-640ed473-2aa5-43e0-a0d4-45657e4d03d9 button.colab-df-convert');\n",
 455 |               "        buttonEl.style.display =\n",
 456 |               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
 457 |               "\n",
 458 |               "        async function convertToInteractive(key) {\n",
 459 |               "          const element = document.querySelector('#df-640ed473-2aa5-43e0-a0d4-45657e4d03d9');\n",
 460 |               "          const dataTable =\n",
 461 |               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
 462 |               "                                                     [key], {});\n",
 463 |               "          if (!dataTable) return;\n",
 464 |               "\n",
 465 |               "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
 466 |               "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
 467 |               "            + ' to learn more about interactive tables.';\n",
 468 |               "          element.innerHTML = '';\n",
 469 |               "          dataTable['output_type'] = 'display_data';\n",
 470 |               "          await google.colab.output.renderOutput(dataTable, element);\n",
 471 |               "          const docLink = document.createElement('div');\n",
 472 |               "          docLink.innerHTML = docLinkHtml;\n",
 473 |               "          element.appendChild(docLink);\n",
 474 |               "        }\n",
 475 |               "      </script>\n",
 476 |               "    </div>\n",
 477 |               "  </div>\n",
 478 |               "  "
 479 |             ]
 480 |           },
 481 |           "metadata": {},
 482 |           "execution_count": 3
 483 |         }
 484 |       ],
 485 |       "source": [
 486 |         "outvote = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/outvote_2020_data.csv')\n",
 487 |         "outvote"
 488 |       ]
 489 |     },
 490 |     {
 491 |       "cell_type": "markdown",
 492 |       "metadata": {
 493 |         "id": "wksy28PnqSGU"
 494 |       },
 495 |       "source": [
 496 |         "**Data description:**\n",
 497 |         "* Core variables:\n",
 498 |         "  * `voted_2020`: (binary) the outcome; whether subject voted in 2020 election\n",
 499 |         "  * `messaged`: (binary) the treatment; whether subject was messaged by an Outvote user\n",
 500 |         "  * `queue_position`: (ordinal / pos int / 1+) the instrument; the subject's position/ranking in the user's queue; this is randomized\n",
 501 |         "  * `queue_length`: (ordinal / pos int / 2+) the block; how long the user's queue was; this is endogenous / non-randomized and must be blocked on\n",
 502 |         "\n",
 503 |         "* Optional / additional variables: \n",
 504 |         "  * `queue_id`: (int) a unique identifier for the queue the subject was in\n",
 505 |         "  * `voted_2018`: (binary) whether subject voted in 2018 election\n",
 506 |         "  * `voted_2016`: (binary) whether subject voted in 2016 election\n",
 507 |         "  * `is_Democrat`: (binary) whether subject is registered Democrat\n",
 508 |         "  * `is_Republican`: (binary) whether subject is registered Republican\n",
 509 |         "  * `is_Male`: (binary) whether subject identifies as Male\n",
 510 |         "  * `is_Female`: (binary) whether subject identifies as Female\n",
 511 |         "  * `is_Married`: (binary) whether subject is married\n",
 512 |         "  * `is_Urban`: (binary) whether subject lives in a city\n",
 513 |         "  * `is_Rural`: (binary) whether subject lives in a rural area\n",
 514 |         "  * `is_Battleground`: (binary) whether subject is registered in a battleground state\n",
 515 |         "  * `age`: (pos int, 18+), subject's age"
 516 |       ]
 517 |     },
 518 |     {
 519 |       "cell_type": "code",
 520 |       "execution_count": 4,
 521 |       "metadata": {
 522 |         "colab": {
 523 |           "base_uri": "https://localhost:8080/"
 524 |         },
 525 |         "id": "6CgdtoZr7uvL",
 526 |         "outputId": "39c90551-18ee-4dcd-b9d1-a8970900cf62"
 527 |       },
 528 |       "outputs": [
 529 |         {
 530 |           "output_type": "execute_result",
 531 |           "data": {
 532 |             "text/plain": [
 533 |               "43357"
 534 |             ]
 535 |           },
 536 |           "metadata": {},
 537 |           "execution_count": 4
 538 |         }
 539 |       ],
 540 |       "source": [
 541 |         "# select a cutoff K = 37 for the instrument\n",
 542 |         "# we only consider those queues with length > K\n",
 543 |         "outvote = outvote.loc[outvote.queue_length>37].reset_index()\n",
 544 |         "outvote['queue_position'] = (outvote['queue_position'] <= 37.0).astype(int)\n",
 545 |         "len(outvote)"
 546 |       ]
 547 |     },
 548 |     {
 549 |       "cell_type": "code",
 550 |       "execution_count": 5,
 551 |       "metadata": {
 552 |         "id": "2AC9TPko-hbt"
 553 |       },
 554 |       "outputs": [],
 555 |       "source": [
 556 |         "outcome = outvote['voted_2020']\n",
 557 |         "treatment = outvote['messaged']\n",
 558 |         "instrument = outvote['queue_position']\n",
 559 |         "block = outvote['queue_length']"
 560 |       ]
 561 |     },
 562 |     {
 563 |       "cell_type": "markdown",
 564 |       "metadata": {
 565 |         "id": "C576dWRsa3ad"
 566 |       },
 567 |       "source": [
 568 |         "## Specify Nuisance Function Models\n",
 569 |         "\n",
 570 |         "The next step is to specify models for \n",
 571 |         "\n",
 572 |         "*   $\\mu(z,x)=\\mathbb{E}(Y|z,x)$\n",
 573 |         "*   $m(z,x) = P(A=1|z,x)$\n",
 574 |         "*   $p(x) = P(Z=1|x)$\n",
 575 |         "\n"
 576 |       ]
 577 |     },
 578 |     {
 579 |       "cell_type": "code",
 580 |       "execution_count": 6,
 581 |       "metadata": {
 582 |         "colab": {
 583 |           "base_uri": "https://localhost:8080/"
 584 |         },
 585 |         "id": "qyOhSZRQRb8W",
 586 |         "outputId": "4819a4f0-8268-45e2-c667-7b0c6055d4d5"
 587 |       },
 588 |       "outputs": [
 589 |         {
 590 |           "output_type": "stream",
 591 |           "name": "stdout",
 592 |           "text": [
 593 |             "Test Cross Entropy of fit model 0.5250216548208155\n",
 594 |             "Test Cross Entropy of no-covariate model 0.5270933091701285\n"
 595 |           ]
 596 |         }
 597 |       ],
 598 |       "source": [
 599 |         "from sklearn.neighbors import KNeighborsClassifier\n",
 600 |         "# specify a model for mu(z,x)\n",
 601 |         "\n",
 602 |         "# make a function that returns a sklearn model for later use in k-folding\n",
 603 |         "def make_mu_model():\n",
 604 |         "  return KNeighborsClassifier(n_neighbors=300)\n",
 605 |         "mu_model = make_mu_model()\n",
 606 |         "\n",
 607 |         "# Sanity check that chosen model actually improves test error\n",
 608 |         "# A real analysis should give substantial attention to model selection and validation \n",
 609 |         "\n",
 610 |         "X_zx = outvote[['queue_position','queue_length']].copy()\n",
 611 |         "\n",
 612 |         "X_train, X_test, y_train, y_test = train_test_split(X_zx, outcome, test_size=0.2)\n",
 613 |         "mu_model.fit(X_train, y_train)\n",
 614 |         "y_pred = mu_model.predict_proba(X_test)[:,1]\n",
 615 |         "\n",
 616 |         "test_ce=log_loss(y_test, y_pred)\n",
 617 |         "print(f\"Test Cross Entropy of fit model {test_ce}\") \n",
 618 |         "baseline_ce=log_loss(y_test, y_train.mean()*np.ones_like(y_test))\n",
 619 |         "print(f\"Test Cross Entropy of no-covariate model {baseline_ce}\")"
 620 |       ]
 621 |     },
 622 |     {
 623 |       "cell_type": "code",
 624 |       "execution_count": 7,
 625 |       "metadata": {
 626 |         "colab": {
 627 |           "base_uri": "https://localhost:8080/"
 628 |         },
 629 |         "id": "uq6eZEBXbsaI",
 630 |         "outputId": "c87bc82f-22be-49f6-af8b-058eec588403"
 631 |       },
 632 |       "outputs": [
 633 |         {
 634 |           "output_type": "stream",
 635 |           "name": "stdout",
 636 |           "text": [
 637 |             "Test CE of fit model 0.6398686068177049\n",
 638 |             "Test CE of no-covariate model 0.6714289850271291\n"
 639 |           ]
 640 |         }
 641 |       ],
 642 |       "source": [
 643 |         "# specify a model for m(z,x)\n",
 644 |         "\n",
 645 |         "def make_m_model():\n",
 646 |         "  return LogisticRegression(max_iter=1000, warm_start=True, random_state=RANDOM_SEED)\n",
 647 |         "\n",
 648 |         "m_model = make_m_model()\n",
 649 |         "# Sanity check that chosen model actually improves test error\n",
 650 |         "# A real analysis should give substantial attention to model selection and validation \n",
 651 |         "\n",
 652 |         "X_train, X_test, a_train, a_test = train_test_split(X_zx, treatment, test_size=0.2)\n",
 653 |         "m_model.fit(X_train, a_train)\n",
 654 |         "a_pred = m_model.predict_proba(X_test)[:,1]\n",
 655 |         "\n",
 656 |         "test_ce=log_loss(a_test, a_pred)\n",
 657 |         "print(f\"Test CE of fit model {test_ce}\") \n",
 658 |         "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
 659 |         "print(f\"Test CE of no-covariate model {baseline_ce}\")"
 660 |       ]
 661 |     },
 662 |     {
 663 |       "cell_type": "code",
 664 |       "execution_count": 8,
 665 |       "metadata": {
 666 |         "colab": {
 667 |           "base_uri": "https://localhost:8080/"
 668 |         },
 669 |         "id": "pg-7pFAYxRQ5",
 670 |         "outputId": "cf0a1fb1-31dd-4c9f-c5df-b8987d66a7c4"
 671 |       },
 672 |       "outputs": [
 673 |         {
 674 |           "output_type": "stream",
 675 |           "name": "stdout",
 676 |           "text": [
 677 |             "Test CE of fit model 0.5303714147556886\n",
 678 |             "Test CE of no-covariate model 0.6810169611354872\n"
 679 |           ]
 680 |         }
 681 |       ],
 682 |       "source": [
 683 |         "def make_p_model():\n",
 684 |         "  return RandomForestClassifier(n_estimators=200, max_depth=5)\n",
 685 |         "\n",
 686 |         "p_model = make_p_model()\n",
 687 |         "# Sanity check that chosen model actually improves test error\n",
 688 |         "# A real analysis should give substantial attention to model selection and validation \n",
 689 |         "\n",
 690 |         "X_train, X_test, Z_train, Z_test = train_test_split(block.to_frame(), instrument, test_size=0.2)\n",
 691 |         "p_model.fit(X_train, Z_train)\n",
 692 |         "Z_pred = p_model.predict_proba(X_test)[:,1]\n",
 693 |         "\n",
 694 |         "test_ce=log_loss(Z_test, Z_pred)\n",
 695 |         "print(f\"Test CE of fit model {test_ce}\") \n",
 696 |         "baseline_ce=log_loss(Z_test, Z_train.mean()*np.ones_like(Z_test))\n",
 697 |         "print(f\"Test CE of no-covariate model {baseline_ce}\")"
 698 |       ]
 699 |     },
 700 |     {
 701 |       "cell_type": "markdown",
 702 |       "metadata": {
 703 |         "id": "2RkvV_4_dFWo"
 704 |       },
 705 |       "source": [
 706 |         "## Use cross fitting to get predicted $\\hat{\\mu}$, $\\hat{m}$, $\\hat{p}$ for each unit"
 707 |       ]
 708 |     },
 709 |     {
 710 |       "cell_type": "code",
 711 |       "execution_count": 9,
 712 |       "metadata": {
 713 |         "id": "KA0AsEGJ_X3b"
 714 |       },
 715 |       "outputs": [],
 716 |       "source": [
 717 |         "# helper functions to implement the cross fitting\n",
 718 |         "\n",
 719 |         "def p_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, n_splits:int):\n",
 720 |         "    \"\"\"\n",
 721 |         "    Implements K fold cross-fitting for the model predicting the instrument Z. \n",
 722 |         "    That is, \n",
 723 |         "    1. Split data into K folds\n",
 724 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
 725 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
 726 |         "    Returns an array containing the predictions  \n",
 727 |         "\n",
 728 |         "    Args:\n",
 729 |         "    model: function that returns sklearn model (which implements fit and predict_prob)\n",
 730 |         "    X: dataframe of variables to adjust for\n",
 731 |         "    Z: array of instruments\n",
 732 |         "    n_splits: number of splits to use\n",
 733 |         "    \"\"\"\n",
 734 |         "    predictions = np.full_like(Z, np.nan, dtype=float)\n",
 735 |         "    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 736 |         "    \n",
 737 |         "    for train_index, test_index in kf.split(X, Z):\n",
 738 |         "      X_train = X.loc[train_index]\n",
 739 |         "      Z_train = Z.loc[train_index]\n",
 740 |         "      g = make_model()\n",
 741 |         "      g.fit(X_train, Z_train)\n",
 742 |         "\n",
 743 |         "      # get predictions for split\n",
 744 |         "      predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
 745 |         "\n",
 746 |         "    assert np.isnan(predictions).sum() == 0\n",
 747 |         "    return predictions\n",
 748 |         "\n",
 749 |         "\n",
 750 |         "def m_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, A:np.array, n_splits:int):\n",
 751 |         "    \"\"\"\n",
 752 |         "    Implements K fold cross-fitting for the model predicting the outcome Y. \n",
 753 |         "    That is, \n",
 754 |         "    1. Split data into K folds\n",
 755 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
 756 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
 757 |         "    Returns two arrays containing the predictions for all units untreated, all units treated  \n",
 758 |         "\n",
 759 |         "    Args:\n",
 760 |         "    model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
 761 |         "    X: dataframe of variables to adjust for\n",
 762 |         "    Z: array of instruments\n",
 763 |         "    A: array of treatments\n",
 764 |         "    n_splits: number of splits to use\n",
 765 |         "    \"\"\"\n",
 766 |         "    predictions0 = np.full_like(A, np.nan, dtype=float)\n",
 767 |         "    predictions1 = np.full_like(A, np.nan, dtype=float)\n",
 768 |         "    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 769 |         "\n",
 770 |         "    # include the treatment as input feature\n",
 771 |         "    X_zx = X.copy()\n",
 772 |         "    X_zx[\"Z\"] = Z\n",
 773 |         "\n",
 774 |         "    # for predicting A under Z=1 / Z=0 status for each data point \n",
 775 |         "    X0 = X_zx.copy()\n",
 776 |         "    X0[\"Z\"] = 0\n",
 777 |         "    X1 = X_zx.copy()\n",
 778 |         "    X1[\"Z\"] = 1\n",
 779 |         "    \n",
 780 |         "    for train_index, test_index in kf.split(X_zx, A):\n",
 781 |         "      X_train = X_zx.loc[train_index]\n",
 782 |         "      A_train = A.loc[train_index]\n",
 783 |         "      m = make_model()\n",
 784 |         "      m.fit(X_train, A_train)\n",
 785 |         "      predictions0[test_index] = m.predict_proba(X0.loc[test_index])[:,1]\n",
 786 |         "      predictions1[test_index] = m.predict_proba(X1.loc[test_index])[:,1]\n",
 787 |         "\n",
 788 |         "    assert np.isnan(predictions0).sum() == 0\n",
 789 |         "    assert np.isnan(predictions1).sum() == 0\n",
 790 |         "    return predictions0, predictions1\n",
 791 |         "\n",
 792 |         "def mu_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, y:np.array, n_splits:int, output_type:str):\n",
 793 |         "    \"\"\"\n",
 794 |         "    Implements K fold cross-fitting for the model predicting the outcome Y. \n",
 795 |         "    That is, \n",
 796 |         "    1. Split data into K folds\n",
 797 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
 798 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
 799 |         "    Returns two arrays containing the predictions for all units untreated, all units treated  \n",
 800 |         "\n",
 801 |         "    Args:\n",
 802 |         "    model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
 803 |         "    X: dataframe of variables to adjust for\n",
 804 |         "    Z: array of instruments\n",
 805 |         "    y: array of outcomes\n",
 806 |         "    n_splits: number of splits to use\n",
 807 |         "    output_type: type of outcome, \"binary\" or \"continuous\"\n",
 808 |         "\n",
 809 |         "    \"\"\"\n",
 810 |         "    predictions0 = np.full_like(y, np.nan, dtype=float)\n",
 811 |         "    predictions1 = np.full_like(y, np.nan, dtype=float)\n",
 812 |         "    if output_type == 'binary':\n",
 813 |         "      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 814 |         "    elif output_type == 'continuous':\n",
 815 |         "      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 816 |         "\n",
 817 |         "    # include the treatment as input feature\n",
 818 |         "    X_zx = X.copy()\n",
 819 |         "    X_zx[\"Z\"] = Z\n",
 820 |         "\n",
 821 |         "    # for predicting effect under treatment / control status for each data point \n",
 822 |         "    X0 = X_zx.copy()\n",
 823 |         "    X0[\"Z\"] = 0\n",
 824 |         "    X1 = X_zx.copy()\n",
 825 |         "    X1[\"Z\"] = 1\n",
 826 |         "\n",
 827 |         "    \n",
 828 |         "    for train_index, test_index in kf.split(X_zx, y):\n",
 829 |         "      X_train = X_zx.loc[train_index]\n",
 830 |         "      y_train = y.loc[train_index]\n",
 831 |         "      mu = make_model()\n",
 832 |         "      mu.fit(X_train, y_train)\n",
 833 |         "\n",
 834 |         "      if output_type =='binary':\n",
 835 |         "        predictions0[test_index] = mu.predict_proba(X0.loc[test_index])[:, 1]\n",
 836 |         "        predictions1[test_index] = mu.predict_proba(X1.loc[test_index])[:, 1]\n",
 837 |         "      elif output_type == 'continuous':\n",
 838 |         "        predictions0[test_index] = mu.predict(X0.loc[test_index])\n",
 839 |         "        predictions1[test_index] = mu.predict(X1.loc[test_index])\n",
 840 |         "\n",
 841 |         "    assert np.isnan(predictions0).sum() == 0\n",
 842 |         "    assert np.isnan(predictions1).sum() == 0\n",
 843 |         "    return predictions0, predictions1"
 844 |       ]
 845 |     },
 846 |     {
 847 |       "cell_type": "code",
 848 |       "execution_count": 10,
 849 |       "metadata": {
 850 |         "id": "wVcE6pRQeMNf"
 851 |       },
 852 |       "outputs": [],
 853 |       "source": [
 854 |         "p = p_k_fold_fit_and_predict(make_p_model, X=block.to_frame(), Z=instrument, n_splits=10)"
 855 |       ]
 856 |     },
 857 |     {
 858 |       "cell_type": "code",
 859 |       "execution_count": 11,
 860 |       "metadata": {
 861 |         "id": "GLEHlLLdWSh9"
 862 |       },
 863 |       "outputs": [],
 864 |       "source": [
 865 |         "m0,m1= m_k_fold_fit_and_predict(make_m_model, X=block.to_frame(), Z=instrument, A=treatment, n_splits=10)"
 866 |       ]
 867 |     },
 868 |     {
 869 |       "cell_type": "code",
 870 |       "source": [
 871 |         "# check relevance\n",
 872 |         "from matplotlib.pyplot import hist\n",
 873 |         "hist(m1-m0, density=True)"
 874 |       ],
 875 |       "metadata": {
 876 |         "id": "jpXU9DK26d6c",
 877 |         "outputId": "72e0d30c-46e6-48b0-fd8a-318b8b88c26d",
 878 |         "colab": {
 879 |           "base_uri": "https://localhost:8080/",
 880 |           "height": 390
 881 |         }
 882 |       },
 883 |       "execution_count": 18,
 884 |       "outputs": [
 885 |         {
 886 |           "output_type": "execute_result",
 887 |           "data": {
 888 |             "text/plain": [
 889 |               "(array([3.97620728e-02, 3.77739692e-02, 9.34408711e-02, 2.04774675e-01,\n",
 890 |               "        3.37977619e-02, 0.00000000e+00, 0.00000000e+00, 3.61834863e-01,\n",
 891 |               "        5.95297873e+01, 2.58970380e+01]),\n",
 892 |               " array([0.04620174, 0.05780291, 0.06940408, 0.08100525, 0.09260642,\n",
 893 |               "        0.10420759, 0.11580876, 0.12740993, 0.13901109, 0.15061226,\n",
 894 |               "        0.16221343]),\n",
 895 |               " <a list of 10 Patch objects>)"
 896 |             ]
 897 |           },
 898 |           "metadata": {},
 899 |           "execution_count": 18
 900 |         },
 901 |         {
 902 |           "output_type": "display_data",
 903 |           "data": {
 904 |             "text/plain": [
 905 |               "<Figure size 432x288 with 1 Axes>"
 906 |             ],
 907 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOiUlEQVR4nO3dfYxldX3H8fdHBopglUWmmy1oByJoSCpgRwrR2pQtlJZGaEIoStvVkuwf1samtnatf7XpH9DaWhKbNhuxDo0WcCuFiAE2K8S0scggzyzKQhZdurDDUxWaVBe//eOelcns7M7duQ/Db3y/kpt7Hn7nnu839/LZM+fec0hVIUlqz2tWugBJ0vIY4JLUKANckhplgEtSowxwSWrUxDh3dtxxx9XU1NQ4dylJzbv77rufqarJhcvHGuBTU1PMzs6Oc5eS1LwkTyy23FMoktQoA1ySGmWAS1KjDHBJalRfAZ7kmCRbkjySZHuSs5Mcm2Rrkke75zWjLlaS9Ip+j8CvAm6pqrcBpwHbgU3Atqo6GdjWzUuSxmTJAE/yBuA9wNUAVfWDqnoBuBCY6YbNABeNqkhJ0v76OQI/EZgD/jnJPUk+k+RoYG1V7e7GPAWsXWzjJBuTzCaZnZubG07VkqS+AnwCeAfwj1V1BvASC06XVO+m4oveWLyqNlfVdFVNT07udyGRJGmZ+rkScxewq6ru7Oa30Avwp5Osq6rdSdYBe0ZVpKTVZWrTzSuy351XXLAi+x2VJY/Aq+op4LtJ3totWg88DNwEbOiWbQBuHEmFkqRF9XsvlD8EPp/kCOBx4IP0wv/6JJcDTwCXjKZESdJi+grwqroXmF5k1frhliNJ6pdXYkpSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWrURD+DkuwEvg+8DOytqukkxwLXAVPATuCSqnp+NGVKkhY6lCPwX6mq06tqupvfBGyrqpOBbd28JGlMBjmFciEw003PABcNXo4kqV/9BngBtyW5O8nGbtnaqtrdTT8FrF1swyQbk8wmmZ2bmxuwXEnSPn2dAwfeXVVPJvkZYGuSR+avrKpKUottWFWbgc0A09PTi46RJB26vo7Aq+rJ7nkPcANwJvB0knUA3fOeURUpSdrfkgGe5OgkP71vGjgPeBC4CdjQDdsA3DiqIiVJ++vnFMpa4IYk+8Z/oapuSXIXcH2Sy4EngEtGV6YkaaElA7yqHgdOW2T5s8D6URQlSVqaV2JKUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqP6DvAkhyW5J8mXu/kTk9yZZEeS65IcMboyJUkLHcoR+EeA7fPmrwQ+VVVvAZ4HLh9mYZKkg+srwJOcAFwAfKabD3AOsKUbMgNcNIoCJUmL6/cI/O+BjwE/6ubfCLxQVXu7+V3A8UOuTZJ0EEsGeJLfBPZU1d3L2UGSjUlmk8zOzc0t5yUkSYvo5wj8XcB7k+wErqV36uQq4JgkE92YE4AnF9u4qjZX1XRVTU9OTg6hZEkS9BHgVfXxqjqhqqaAS4GvVtVlwO3Axd2wDcCNI6tSkrSfQX4H/mfAHyfZQe+c+NXDKUmS1I+JpYe8oqruAO7oph8Hzhx+SZKkfnglpiQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJjkzyjST3JXkoyV90y09McmeSHUmuS3LE6MuVJO3TzxH4/wHnVNVpwOnA+UnOAq4EPlVVbwGeBy4fXZmSpIWWDPDqebGbPbx7FHAOsKVbPgNcNJIKJUmL6usceJLDktwL7AG2Ao8BL1TV3m7ILuD4A2y7Mclsktm5ublh1CxJos8Ar6qXq+p04ATgTOBt/e6gqjZX1XRVTU9OTi6zTEnSQof0K5SqegG4HTgbOCbJRLfqBODJIdcmSTqIfn6FMpnkmG76tcC5wHZ6QX5xN2wDcOOoipQk7W9i6SGsA2aSHEYv8K+vqi8neRi4NslfAfcAV4+wTknSAksGeFXdD5yxyPLH6Z0PlyStAK/ElKRGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRvVzKb0krQpTm25ekf3uvOKCkbyuR+CS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJ3pTk9iQPJ3koyUe65ccm2Zrk0e55zejLlSTt088R+F7go1V1KnAW8AdJTgU2Aduq6mRgWzcvSRqTJQO8qnZX1Te76e8D24HjgQuBmW7YDHDRqIqUJO3vkM6BJ5kCzgDuBNZW1e5u1VPA2gNsszHJbJLZubm5AUqVJM3Xd4AneR3wb8AfVdX35q+rqgJqse2qanNVTVfV9OTk5EDFSpJe0VeAJzmcXnh/vqq+1C1+Osm6bv06YM9oSpQkLaafX6EEuBrYXlV/N2/VTcCGbnoDcOPwy5MkHchEH2PeBfwu8ECSe7tlfw5cAVyf5HLgCeCS0ZQoSVrMkgFeVf8B5ACr1w+3HElSv7wSU5IaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqOWDPAkn02yJ8mD85Ydm2Rrkke75zWjLVOStFA/R+CfA85fsGwTsK2qTga2dfOSpDFaMsCr6mvAcwsWXwjMdNMzwEVDrkuStITlngNfW1W7u+mngLUHGphkY5LZJLNzc3PL3J0kaaGBv8SsqgLqIOs3V9V0VU1PTk4OujtJUme5Af50knUA3fOe4ZUkSerHcgP8JmBDN70BuHE45UiS+tXPzwj/Ffg68NYku5JcDlwBnJvkUeBXu3lJ0hhNLDWgqt53gFXrh1yLJOkQeCWmJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRg0U4EnOT/KtJDuSbBpWUZKkpS07wJMcBvwD8OvAqcD7kpw6rMIkSQc3McC2ZwI7qupxgCTXAhcCDw+jMMHUpptXuoSx23nFBStdwk+Mn8TP12ozSIAfD3x33vwu4BcXDkqyEdjYzb6Y5FsD7PPV5DjgmZUuYshWvKdcOdSXW/F+RmC19bTa+oFFehrC5/rnFls4SID3pao2A5tHvZ9xSzJbVdMrXccwrbaeVls/sPp6Wm39wHh7GuRLzCeBN82bP6FbJkkag0EC/C7g5CQnJjkCuBS4aThlSZKWsuxTKFW1N8mHgVuBw4DPVtVDQ6vs1W/VnRZi9fW02vqB1dfTausHxthTqmpc+5IkDZFXYkpSowxwSWqUAb6IpW4RkOSnklzXrb8zydS8dW9P8vUkDyV5IMmR46x9McvtJ8nhSWa6PrYn+fi4az+QPnp6T5JvJtmb5OIF6zYkebR7bBhf1Qe23H6SnD7v83Z/kt8eb+UHNsh71K1/fZJdST49nooPbsDP3JuT3Nb9d/Tw/MwYSFX5mPeg94XsY8BJwBHAfcCpC8Z8CPinbvpS4LpuegK4Hzitm38jcFjD/bwfuLabPgrYCUw18h5NAW8HrgEunrf8WODx7nlNN72m4X5OAU7upn8W2A0c0/J7NG/9VcAXgE+33g9wB3BuN/064Khh1OUR+P5+fIuAqvoBsO8WAfNdCMx001uA9UkCnAfcX1X3AVTVs1X18pjqPpBB+ing6CQTwGuBHwDfG0/ZB7VkT1W1s6ruB360YNtfA7ZW1XNV9TywFTh/HEUfxLL7qapvV9Wj3fR/A3uAyfGUfVCDvEck+QVgLXDbOIrtw7L76e4RNVFVW7txL1bV/w6jKAN8f4vdIuD4A42pqr3A/9A72j4FqCS3dn9KfWwM9S5lkH62AC/RO6r7DvDJqnpu1AX3oZ+eRrHtqAylpiRn0js6fGxIdQ1i2T0leQ3wt8CfjKCu5RrkPToFeCHJl5Lck+RvupsBDswAH64J4N3AZd3zbyVZv7IlDeRM4GV6f5qfCHw0yUkrW5IWk2Qd8C/AB6tqvyPaxnwI+EpV7VrpQoZkAvglev8gvZPeaZgPDOOFDfD99XOLgB+P6U4vvAF4lt6/yl+rqme6P5G+Arxj5BUf3CD9vB+4pap+WFV7gP8EXg33rRjkNg6vxltADFRTktcDNwOfqKr/GnJtyzVIT2cDH06yE/gk8HtJrhhueYdskH52Afd2p1/2Av/OkHLBAN9fP7cIuAnY9+uFi4GvVu/biVuBn09yVBeEv8zK3153kH6+A5wDkORo4CzgkbFUfXCD3MbhVuC8JGuSrKH3vcWtI6qzX8vupxt/A3BNVW0ZYY2Hatk9VdVlVfXmqpqid9R6TVWt9P8wZpDP3F3AMUn2fTdxDsPKhZX+dvfV+AB+A/g2vXOJn+iW/SXw3m76SOCLwA7gG8BJ87b9HeAh4EHgr1e6l0H6ofdt+Re7fh4G/nSlezmEnt5J78jnJXp/TTw0b9vf73rdQe+UQ7P9dJ+3HwL3znucvtL9DPoezXuND/Aq+BXKED5z59L7hdoDwOeAI4ZRk5fSS1KjPIUiSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1Kj/h9VXn6+L6tiOAAAAABJRU5ErkJggg==\n"
 908 |           },
 909 |           "metadata": {
 910 |             "needs_background": "light"
 911 |           }
 912 |         }
 913 |       ]
 914 |     },
 915 |     {
 916 |       "cell_type": "code",
 917 |       "execution_count": 13,
 918 |       "metadata": {
 919 |         "id": "kQ7s8zWV7IgR"
 920 |       },
 921 |       "outputs": [],
 922 |       "source": [
 923 |         "mu0,mu1= mu_k_fold_fit_and_predict(make_mu_model, X=block.to_frame(), Z=instrument, y=outcome, n_splits=10, output_type=\"binary\")"
 924 |       ]
 925 |     },
 926 |     {
 927 |       "cell_type": "code",
 928 |       "execution_count": 14,
 929 |       "metadata": {
 930 |         "colab": {
 931 |           "base_uri": "https://localhost:8080/",
 932 |           "height": 206
 933 |         },
 934 |         "id": "_NVCV0q0g8wQ",
 935 |         "outputId": "d0a90b3d-966c-4830-81c8-f73c71262552"
 936 |       },
 937 |       "outputs": [
 938 |         {
 939 |           "output_type": "execute_result",
 940 |           "data": {
 941 |             "text/plain": [
 942 |               "          p       mu0       mu1        m1        m0  Z  A  Y\n",
 943 |               "0  0.147581  0.833333  0.843333  0.668028  0.513991  0  0  0\n",
 944 |               "1  0.143757  0.826667  0.830000  0.667640  0.513672  0  0  1\n",
 945 |               "2  0.143383  0.823333  0.836667  0.668948  0.512616  0  0  1\n",
 946 |               "3  0.143757  0.826667  0.836667  0.667598  0.514286  0  0  1\n",
 947 |               "4  0.143383  0.826667  0.830000  0.667870  0.513021  0  0  0"
 948 |             ],
 949 |             "text/html": [
 950 |               "\n",
 951 |               "  <div id=\"df-39ee92f9-54d3-4a07-b4ea-3fc1006336e6\">\n",
 952 |               "    <div class=\"colab-df-container\">\n",
 953 |               "      <div>\n",
 954 |               "<style scoped>\n",
 955 |               "    .dataframe tbody tr th:only-of-type {\n",
 956 |               "        vertical-align: middle;\n",
 957 |               "    }\n",
 958 |               "\n",
 959 |               "    .dataframe tbody tr th {\n",
 960 |               "        vertical-align: top;\n",
 961 |               "    }\n",
 962 |               "\n",
 963 |               "    .dataframe thead th {\n",
 964 |               "        text-align: right;\n",
 965 |               "    }\n",
 966 |               "</style>\n",
 967 |               "<table border=\"1\" class=\"dataframe\">\n",
 968 |               "  <thead>\n",
 969 |               "    <tr style=\"text-align: right;\">\n",
 970 |               "      <th></th>\n",
 971 |               "      <th>p</th>\n",
 972 |               "      <th>mu0</th>\n",
 973 |               "      <th>mu1</th>\n",
 974 |               "      <th>m1</th>\n",
 975 |               "      <th>m0</th>\n",
 976 |               "      <th>Z</th>\n",
 977 |               "      <th>A</th>\n",
 978 |               "      <th>Y</th>\n",
 979 |               "    </tr>\n",
 980 |               "  </thead>\n",
 981 |               "  <tbody>\n",
 982 |               "    <tr>\n",
 983 |               "      <th>0</th>\n",
 984 |               "      <td>0.147581</td>\n",
 985 |               "      <td>0.833333</td>\n",
 986 |               "      <td>0.843333</td>\n",
 987 |               "      <td>0.668028</td>\n",
 988 |               "      <td>0.513991</td>\n",
 989 |               "      <td>0</td>\n",
 990 |               "      <td>0</td>\n",
 991 |               "      <td>0</td>\n",
 992 |               "    </tr>\n",
 993 |               "    <tr>\n",
 994 |               "      <th>1</th>\n",
 995 |               "      <td>0.143757</td>\n",
 996 |               "      <td>0.826667</td>\n",
 997 |               "      <td>0.830000</td>\n",
 998 |               "      <td>0.667640</td>\n",
 999 |               "      <td>0.513672</td>\n",
1000 |               "      <td>0</td>\n",
1001 |               "      <td>0</td>\n",
1002 |               "      <td>1</td>\n",
1003 |               "    </tr>\n",
1004 |               "    <tr>\n",
1005 |               "      <th>2</th>\n",
1006 |               "      <td>0.143383</td>\n",
1007 |               "      <td>0.823333</td>\n",
1008 |               "      <td>0.836667</td>\n",
1009 |               "      <td>0.668948</td>\n",
1010 |               "      <td>0.512616</td>\n",
1011 |               "      <td>0</td>\n",
1012 |               "      <td>0</td>\n",
1013 |               "      <td>1</td>\n",
1014 |               "    </tr>\n",
1015 |               "    <tr>\n",
1016 |               "      <th>3</th>\n",
1017 |               "      <td>0.143757</td>\n",
1018 |               "      <td>0.826667</td>\n",
1019 |               "      <td>0.836667</td>\n",
1020 |               "      <td>0.667598</td>\n",
1021 |               "      <td>0.514286</td>\n",
1022 |               "      <td>0</td>\n",
1023 |               "      <td>0</td>\n",
1024 |               "      <td>1</td>\n",
1025 |               "    </tr>\n",
1026 |               "    <tr>\n",
1027 |               "      <th>4</th>\n",
1028 |               "      <td>0.143383</td>\n",
1029 |               "      <td>0.826667</td>\n",
1030 |               "      <td>0.830000</td>\n",
1031 |               "      <td>0.667870</td>\n",
1032 |               "      <td>0.513021</td>\n",
1033 |               "      <td>0</td>\n",
1034 |               "      <td>0</td>\n",
1035 |               "      <td>0</td>\n",
1036 |               "    </tr>\n",
1037 |               "  </tbody>\n",
1038 |               "</table>\n",
1039 |               "</div>\n",
1040 |               "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-39ee92f9-54d3-4a07-b4ea-3fc1006336e6')\"\n",
1041 |               "              title=\"Convert this dataframe to an interactive table.\"\n",
1042 |               "              style=\"display:none;\">\n",
1043 |               "        \n",
1044 |               "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
1045 |               "       width=\"24px\">\n",
1046 |               "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
1047 |               "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
1048 |               "  </svg>\n",
1049 |               "      </button>\n",
1050 |               "      \n",
1051 |               "  <style>\n",
1052 |               "    .colab-df-container {\n",
1053 |               "      display:flex;\n",
1054 |               "      flex-wrap:wrap;\n",
1055 |               "      gap: 12px;\n",
1056 |               "    }\n",
1057 |               "\n",
1058 |               "    .colab-df-convert {\n",
1059 |               "      background-color: #E8F0FE;\n",
1060 |               "      border: none;\n",
1061 |               "      border-radius: 50%;\n",
1062 |               "      cursor: pointer;\n",
1063 |               "      display: none;\n",
1064 |               "      fill: #1967D2;\n",
1065 |               "      height: 32px;\n",
1066 |               "      padding: 0 0 0 0;\n",
1067 |               "      width: 32px;\n",
1068 |               "    }\n",
1069 |               "\n",
1070 |               "    .colab-df-convert:hover {\n",
1071 |               "      background-color: #E2EBFA;\n",
1072 |               "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1073 |               "      fill: #174EA6;\n",
1074 |               "    }\n",
1075 |               "\n",
1076 |               "    [theme=dark] .colab-df-convert {\n",
1077 |               "      background-color: #3B4455;\n",
1078 |               "      fill: #D2E3FC;\n",
1079 |               "    }\n",
1080 |               "\n",
1081 |               "    [theme=dark] .colab-df-convert:hover {\n",
1082 |               "      background-color: #434B5C;\n",
1083 |               "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1084 |               "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1085 |               "      fill: #FFFFFF;\n",
1086 |               "    }\n",
1087 |               "  </style>\n",
1088 |               "\n",
1089 |               "      <script>\n",
1090 |               "        const buttonEl =\n",
1091 |               "          document.querySelector('#df-39ee92f9-54d3-4a07-b4ea-3fc1006336e6 button.colab-df-convert');\n",
1092 |               "        buttonEl.style.display =\n",
1093 |               "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1094 |               "\n",
1095 |               "        async function convertToInteractive(key) {\n",
1096 |               "          const element = document.querySelector('#df-39ee92f9-54d3-4a07-b4ea-3fc1006336e6');\n",
1097 |               "          const dataTable =\n",
1098 |               "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1099 |               "                                                     [key], {});\n",
1100 |               "          if (!dataTable) return;\n",
1101 |               "\n",
1102 |               "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
1103 |               "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1104 |               "            + ' to learn more about interactive tables.';\n",
1105 |               "          element.innerHTML = '';\n",
1106 |               "          dataTable['output_type'] = 'display_data';\n",
1107 |               "          await google.colab.output.renderOutput(dataTable, element);\n",
1108 |               "          const docLink = document.createElement('div');\n",
1109 |               "          docLink.innerHTML = docLinkHtml;\n",
1110 |               "          element.appendChild(docLink);\n",
1111 |               "        }\n",
1112 |               "      </script>\n",
1113 |               "    </div>\n",
1114 |               "  </div>\n",
1115 |               "  "
1116 |             ]
1117 |           },
1118 |           "metadata": {},
1119 |           "execution_count": 14
1120 |         }
1121 |       ],
1122 |       "source": [
1123 |         "data_and_nuisance_estimates = pd.DataFrame({'p': p, 'mu0': mu0, 'mu1': mu1, 'm1': m1, 'm0': m0,\n",
1124 |         "                                            'Z': instrument, 'A': treatment, 'Y': outcome})\n",
1125 |         "data_and_nuisance_estimates.head()"
1126 |       ]
1127 |     },
1128 |     {
1129 |       "cell_type": "markdown",
1130 |       "metadata": {
1131 |         "id": "VNhM7URdgzQB"
1132 |       },
1133 |       "source": [
1134 |         "## Combine predicted values and data into estimate of LATE"
1135 |       ]
1136 |     },
1137 |     {
1138 |       "cell_type": "code",
1139 |       "execution_count": 15,
1140 |       "metadata": {
1141 |         "id": "Nj0veiaW4RRm"
1142 |       },
1143 |       "outputs": [],
1144 |       "source": [
1145 |         "def late_estimator(mu1, mu0, m1, m0, p, Z, A, Y, prob = None):\n",
1146 |         "  '''\n",
1147 |         "  Estimator for LATE\n",
1148 |         "  '''\n",
1149 |         "  n = len(Y)\n",
1150 |         "  phi_zy = mu1 - mu0 + Z*(Y-mu1)/p - (1-Z)*(Y-mu0)/(1-p)\n",
1151 |         "  phi_za = m1 - m0 + Z*(A-m1)/p - (1-Z)*(A-m0)/(1-p)\n",
1152 |         "\n",
1153 |         "  tau_za = phi_za.mean()\n",
1154 |         "  tau_hat = phi_zy.mean()/tau_za\n",
1155 |         "  phi = phi_zy - phi_za * tau_hat\n",
1156 |         "  \n",
1157 |         "  std_hat = math.sqrt((phi**2).mean()/tau_za**2/n)\n",
1158 |         "\n",
1159 |         "  return tau_hat, std_hat\n"
1160 |       ]
1161 |     },
1162 |     {
1163 |       "cell_type": "code",
1164 |       "execution_count": 16,
1165 |       "metadata": {
1166 |         "colab": {
1167 |           "base_uri": "https://localhost:8080/"
1168 |         },
1169 |         "id": "SjDj0F9Bm9uq",
1170 |         "outputId": "262886b4-da24-4b7c-eb75-c1a609e70e6c"
1171 |       },
1172 |       "outputs": [
1173 |         {
1174 |           "output_type": "stream",
1175 |           "name": "stdout",
1176 |           "text": [
1177 |             "The estimate is 0.047826812983712996 pm 0.07239241460155232\n"
1178 |           ]
1179 |         }
1180 |       ],
1181 |       "source": [
1182 |         "tau_hat, std_hat = late_estimator(**data_and_nuisance_estimates)\n",
1183 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
1184 |       ]
1185 |     }
1186 |   ],
1187 |   "metadata": {
1188 |     "colab": {
1189 |       "provenance": [],
1190 |       "authorship_tag": "ABX9TyMBLYRZL7Nk//toT2OnEWO8",
1191 |       "include_colab_link": true
1192 |     },
1193 |     "kernelspec": {
1194 |       "display_name": "Python 3",
1195 |       "name": "python3"
1196 |     },
1197 |     "language_info": {
1198 |       "name": "python"
1199 |     }
1200 |   },
1201 |   "nbformat": 4,
1202 |   "nbformat_minor": 0
1203 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Victor Veitch
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # causality-tutorials
2 | Short tutorials on the use of machine learning methods for causal inference
3 | 


--------------------------------------------------------------------------------
/Sensitivity_Analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Sensitivity_Analysis.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "language_info": {
 16 |       "name": "python"
 17 |     }
 18 |   },
 19 |   "cells": [
 20 |     {
 21 |       "cell_type": "markdown",
 22 |       "metadata": {
 23 |         "id": "view-in-github",
 24 |         "colab_type": "text"
 25 |       },
 26 |       "source": [
 27 |         "<a href=\"https://colab.research.google.com/github/vveitch/causality-tutorials/blob/main/Sensitivity_Analysis.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 28 |       ]
 29 |     },
 30 |     {
 31 |       "cell_type": "markdown",
 32 |       "metadata": {
 33 |         "id": "QfZkNLUb4B-p"
 34 |       },
 35 |       "source": [
 36 |         "# Sensitivity Analysis Tutorial\n",
 37 |         "\n",
 38 |         "This tutorial gives a short example for how to assess sensitivity to unobserved confounding in causal estimation. We use the Austen plot method (https://arxiv.org/abs/2003.01747). "
 39 |       ]
 40 |     },
 41 |     {
 42 |       "cell_type": "code",
 43 |       "metadata": {
 44 |         "colab": {
 45 |           "base_uri": "https://localhost:8080/"
 46 |         },
 47 |         "id": "1RrdIBTLQ9Ac",
 48 |         "outputId": "62c039df-51d7-4f31-a150-4b6a0e0f0b26"
 49 |       },
 50 |       "source": [
 51 |         "!pip install austen-plots"
 52 |       ],
 53 |       "execution_count": 218,
 54 |       "outputs": [
 55 |         {
 56 |           "output_type": "stream",
 57 |           "name": "stdout",
 58 |           "text": [
 59 |             "Requirement already satisfied: austen-plots in /usr/local/lib/python3.7/dist-packages (0.1.0)\n",
 60 |             "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.19.5)\n",
 61 |             "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.1.5)\n",
 62 |             "Requirement already satisfied: plotnine in /usr/local/lib/python3.7/dist-packages (from austen-plots) (0.6.0)\n",
 63 |             "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.0.1)\n",
 64 |             "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.4.1)\n",
 65 |             "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from austen-plots) (4.62.3)\n",
 66 |             "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2.8.2)\n",
 67 |             "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2018.9)\n",
 68 |             "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->austen-plots) (1.15.0)\n",
 69 |             "Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.10.2)\n",
 70 |             "Requirement already satisfied: descartes>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (1.1.0)\n",
 71 |             "Requirement already satisfied: patsy>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.5.2)\n",
 72 |             "Requirement already satisfied: matplotlib>=3.1.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (3.2.2)\n",
 73 |             "Requirement already satisfied: mizani>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.6.0)\n",
 74 |             "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (3.0.6)\n",
 75 |             "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (1.3.2)\n",
 76 |             "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (0.11.0)\n",
 77 |             "Requirement already satisfied: palettable in /usr/local/lib/python3.7/dist-packages (from mizani>=0.6.0->plotnine->austen-plots) (3.3.0)\n",
 78 |             "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (3.0.0)\n",
 79 |             "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (1.1.0)\n"
 80 |           ]
 81 |         }
 82 |       ]
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "metadata": {
 87 |         "id": "dS2X3Bq1-fxE"
 88 |       },
 89 |       "source": [
 90 |         "import numpy as np\n",
 91 |         "import pandas as pd\n",
 92 |         "import scipy as sp\n",
 93 |         "from sklearn import preprocessing\n",
 94 |         "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier\n",
 95 |         "from sklearn.linear_model import LogisticRegression\n",
 96 |         "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
 97 |         "from sklearn.metrics import mean_squared_error, log_loss\n",
 98 |         "import sklearn\n",
 99 |         "import os\n",
100 |         "import pathlib\n",
101 |         "\n",
102 |         "from austen_plots.AustenPlot import AustenPlot"
103 |       ],
104 |       "execution_count": 219,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "cell_type": "code",
109 |       "metadata": {
110 |         "id": "zNsGKVyLSRxn"
111 |       },
112 |       "source": [
113 |         "RANDOM_SEED = 42\n",
114 |         "np.random.seed(RANDOM_SEED)"
115 |       ],
116 |       "execution_count": 220,
117 |       "outputs": []
118 |     },
119 |     {
120 |       "cell_type": "markdown",
121 |       "metadata": {
122 |         "id": "H0YG8hR8RRAP"
123 |       },
124 |       "source": [
125 |         "# Data Loading and Initial Fit\n",
126 |         "\n",
127 |         "Load the diastolic blood pressure data and fit models for the propensity score and conditional expected outcome model, in the same way we'd do in a standard adjustment-based treatment effect estimation. For this tutorial, we'll use random forests for both models. \n",
128 |         "\n",
129 |         "This section doesn't contain anything special to sensitivity analysis, and can be safely skipped if you've already read the adjustment estimation tutorial."
130 |       ]
131 |     },
132 |     {
133 |       "cell_type": "markdown",
134 |       "metadata": {
135 |         "id": "yPbJeayiEs3u"
136 |       },
137 |       "source": [
138 |         "##Load and Format  Observational Data"
139 |       ]
140 |     },
141 |     {
142 |       "cell_type": "code",
143 |       "metadata": {
144 |         "id": "2AC9TPko-hbt"
145 |       },
146 |       "source": [
147 |         "nhanes = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/hbp_dbp.csv')"
148 |       ],
149 |       "execution_count": 221,
150 |       "outputs": []
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "metadata": {
155 |         "colab": {
156 |           "base_uri": "https://localhost:8080/",
157 |           "height": 223
158 |         },
159 |         "id": "-A1LX6-t-hZD",
160 |         "outputId": "2f9def4f-1348-48d2-d518-b5b5235b6e42"
161 |       },
162 |       "source": [
163 |         "nhanes.head()"
164 |       ],
165 |       "execution_count": 222,
166 |       "outputs": [
167 |         {
168 |           "output_type": "execute_result",
169 |           "data": {
170 |             "text/html": [
171 |               "<div>\n",
172 |               "<style scoped>\n",
173 |               "    .dataframe tbody tr th:only-of-type {\n",
174 |               "        vertical-align: middle;\n",
175 |               "    }\n",
176 |               "\n",
177 |               "    .dataframe tbody tr th {\n",
178 |               "        vertical-align: top;\n",
179 |               "    }\n",
180 |               "\n",
181 |               "    .dataframe thead th {\n",
182 |               "        text-align: right;\n",
183 |               "    }\n",
184 |               "</style>\n",
185 |               "<table border=\"1\" class=\"dataframe\">\n",
186 |               "  <thead>\n",
187 |               "    <tr style=\"text-align: right;\">\n",
188 |               "      <th></th>\n",
189 |               "      <th>white</th>\n",
190 |               "      <th>black</th>\n",
191 |               "      <th>hisp</th>\n",
192 |               "      <th>female</th>\n",
193 |               "      <th>age_mo</th>\n",
194 |               "      <th>hhsize</th>\n",
195 |               "      <th>edu</th>\n",
196 |               "      <th>married</th>\n",
197 |               "      <th>widowed</th>\n",
198 |               "      <th>divorced</th>\n",
199 |               "      <th>separated</th>\n",
200 |               "      <th>income</th>\n",
201 |               "      <th>packyr</th>\n",
202 |               "      <th>bmi</th>\n",
203 |               "      <th>pulse</th>\n",
204 |               "      <th>sodium</th>\n",
205 |               "      <th>potassium</th>\n",
206 |               "      <th>r_sodipota</th>\n",
207 |               "      <th>alcohol</th>\n",
208 |               "      <th>insurance</th>\n",
209 |               "      <th>together</th>\n",
210 |               "      <th>ave_dbp</th>\n",
211 |               "      <th>trt_dbp</th>\n",
212 |               "    </tr>\n",
213 |               "  </thead>\n",
214 |               "  <tbody>\n",
215 |               "    <tr>\n",
216 |               "      <th>0</th>\n",
217 |               "      <td>0</td>\n",
218 |               "      <td>0</td>\n",
219 |               "      <td>1</td>\n",
220 |               "      <td>0</td>\n",
221 |               "      <td>261</td>\n",
222 |               "      <td>4</td>\n",
223 |               "      <td>12</td>\n",
224 |               "      <td>0</td>\n",
225 |               "      <td>0</td>\n",
226 |               "      <td>0</td>\n",
227 |               "      <td>0</td>\n",
228 |               "      <td>2.251292</td>\n",
229 |               "      <td>0.0</td>\n",
230 |               "      <td>25.500000</td>\n",
231 |               "      <td>80</td>\n",
232 |               "      <td>5216</td>\n",
233 |               "      <td>4350</td>\n",
234 |               "      <td>1.199080</td>\n",
235 |               "      <td>0</td>\n",
236 |               "      <td>0</td>\n",
237 |               "      <td>24</td>\n",
238 |               "      <td>60</td>\n",
239 |               "      <td>0</td>\n",
240 |               "    </tr>\n",
241 |               "    <tr>\n",
242 |               "      <th>1</th>\n",
243 |               "      <td>1</td>\n",
244 |               "      <td>0</td>\n",
245 |               "      <td>0</td>\n",
246 |               "      <td>0</td>\n",
247 |               "      <td>428</td>\n",
248 |               "      <td>2</td>\n",
249 |               "      <td>17</td>\n",
250 |               "      <td>1</td>\n",
251 |               "      <td>0</td>\n",
252 |               "      <td>0</td>\n",
253 |               "      <td>0</td>\n",
254 |               "      <td>3.881564</td>\n",
255 |               "      <td>0.0</td>\n",
256 |               "      <td>29.400000</td>\n",
257 |               "      <td>72</td>\n",
258 |               "      <td>2668</td>\n",
259 |               "      <td>2387</td>\n",
260 |               "      <td>1.117721</td>\n",
261 |               "      <td>0</td>\n",
262 |               "      <td>1</td>\n",
263 |               "      <td>104</td>\n",
264 |               "      <td>94</td>\n",
265 |               "      <td>0</td>\n",
266 |               "    </tr>\n",
267 |               "    <tr>\n",
268 |               "      <th>2</th>\n",
269 |               "      <td>1</td>\n",
270 |               "      <td>0</td>\n",
271 |               "      <td>0</td>\n",
272 |               "      <td>1</td>\n",
273 |               "      <td>995</td>\n",
274 |               "      <td>1</td>\n",
275 |               "      <td>12</td>\n",
276 |               "      <td>0</td>\n",
277 |               "      <td>1</td>\n",
278 |               "      <td>0</td>\n",
279 |               "      <td>0</td>\n",
280 |               "      <td>1.504077</td>\n",
281 |               "      <td>0.0</td>\n",
282 |               "      <td>19.100000</td>\n",
283 |               "      <td>64</td>\n",
284 |               "      <td>2849</td>\n",
285 |               "      <td>3775</td>\n",
286 |               "      <td>0.754702</td>\n",
287 |               "      <td>0</td>\n",
288 |               "      <td>1</td>\n",
289 |               "      <td>156</td>\n",
290 |               "      <td>70</td>\n",
291 |               "      <td>1</td>\n",
292 |               "    </tr>\n",
293 |               "    <tr>\n",
294 |               "      <th>3</th>\n",
295 |               "      <td>0</td>\n",
296 |               "      <td>0</td>\n",
297 |               "      <td>1</td>\n",
298 |               "      <td>0</td>\n",
299 |               "      <td>531</td>\n",
300 |               "      <td>4</td>\n",
301 |               "      <td>7</td>\n",
302 |               "      <td>1</td>\n",
303 |               "      <td>0</td>\n",
304 |               "      <td>0</td>\n",
305 |               "      <td>0</td>\n",
306 |               "      <td>2.674149</td>\n",
307 |               "      <td>3650.0</td>\n",
308 |               "      <td>44.400002</td>\n",
309 |               "      <td>92</td>\n",
310 |               "      <td>3433</td>\n",
311 |               "      <td>2716</td>\n",
312 |               "      <td>1.263991</td>\n",
313 |               "      <td>0</td>\n",
314 |               "      <td>0</td>\n",
315 |               "      <td>52</td>\n",
316 |               "      <td>84</td>\n",
317 |               "      <td>0</td>\n",
318 |               "    </tr>\n",
319 |               "    <tr>\n",
320 |               "      <th>4</th>\n",
321 |               "      <td>0</td>\n",
322 |               "      <td>0</td>\n",
323 |               "      <td>1</td>\n",
324 |               "      <td>0</td>\n",
325 |               "      <td>581</td>\n",
326 |               "      <td>7</td>\n",
327 |               "      <td>0</td>\n",
328 |               "      <td>1</td>\n",
329 |               "      <td>0</td>\n",
330 |               "      <td>0</td>\n",
331 |               "      <td>0</td>\n",
332 |               "      <td>2.602690</td>\n",
333 |               "      <td>730.0</td>\n",
334 |               "      <td>37.500000</td>\n",
335 |               "      <td>68</td>\n",
336 |               "      <td>1808</td>\n",
337 |               "      <td>1883</td>\n",
338 |               "      <td>0.960170</td>\n",
339 |               "      <td>117</td>\n",
340 |               "      <td>1</td>\n",
341 |               "      <td>0</td>\n",
342 |               "      <td>80</td>\n",
343 |               "      <td>0</td>\n",
344 |               "    </tr>\n",
345 |               "  </tbody>\n",
346 |               "</table>\n",
347 |               "</div>"
348 |             ],
349 |             "text/plain": [
350 |               "   white  black  hisp  female  ...  insurance  together  ave_dbp  trt_dbp\n",
351 |               "0      0      0     1       0  ...          0        24       60        0\n",
352 |               "1      1      0     0       0  ...          1       104       94        0\n",
353 |               "2      1      0     0       1  ...          1       156       70        1\n",
354 |               "3      0      0     1       0  ...          0        52       84        0\n",
355 |               "4      0      0     1       0  ...          1         0       80        0\n",
356 |               "\n",
357 |               "[5 rows x 23 columns]"
358 |             ]
359 |           },
360 |           "metadata": {},
361 |           "execution_count": 222
362 |         }
363 |       ]
364 |     },
365 |     {
366 |       "cell_type": "code",
367 |       "metadata": {
368 |         "id": "fW3FdK8rJNHM"
369 |       },
370 |       "source": [
371 |         "# scale continuous covariates\n",
372 |         "cont_vars = ['age_mo', 'hhsize', 'edu', 'income', 'packyr', 'bmi',\n",
373 |         "             'pulse', 'sodium', 'potassium', 'r_sodipota', 'alcohol', 'together']\n",
374 |         "nhanes[cont_vars] = preprocessing.scale(nhanes[cont_vars])\n"
375 |       ],
376 |       "execution_count": 223,
377 |       "outputs": []
378 |     },
379 |     {
380 |       "cell_type": "code",
381 |       "metadata": {
382 |         "id": "APOqpHmrOGzo"
383 |       },
384 |       "source": [
385 |         "confounders = nhanes.drop(columns=['trt_dbp', 'ave_dbp'])\n",
386 |         "outcome = nhanes['ave_dbp']\n",
387 |         "treatment = nhanes['trt_dbp']"
388 |       ],
389 |       "execution_count": 224,
390 |       "outputs": []
391 |     },
392 |     {
393 |       "cell_type": "markdown",
394 |       "metadata": {
395 |         "id": "C576dWRsa3ad"
396 |       },
397 |       "source": [
398 |         "## Specify Nuisance Function Models\n",
399 |         "\n",
400 |         "The next step is to specify models for the conditional expected outcome and propensity score"
401 |       ]
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "metadata": {
406 |         "colab": {
407 |           "base_uri": "https://localhost:8080/"
408 |         },
409 |         "id": "qyOhSZRQRb8W",
410 |         "outputId": "905a03a6-e8c0-4572-e3e0-6f195b6409ad"
411 |       },
412 |       "source": [
413 |         "# specify a model for the conditional expected outcome\n",
414 |         "\n",
415 |         "# make a function that returns a sklearn model for later use in k-folding\n",
416 |         "def make_Q_model():\n",
417 |         "#  return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=None)\n",
418 |         "  return GradientBoostingRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=3)\n",
419 |         "Q_model = make_Q_model()\n",
420 |         "\n",
421 |         "# Sanity check that chosen model actually improves test error\n",
422 |         "# A real analysis should give substantial attention to model selection and validation \n",
423 |         "\n",
424 |         "X_w_treatment = confounders.copy()\n",
425 |         "X_w_treatment[\"treatment\"] = treatment\n",
426 |         "\n",
427 |         "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
428 |         "Q_model.fit(X_train, y_train)\n",
429 |         "y_pred = Q_model.predict(X_test)\n",
430 |         "\n",
431 |         "test_mse=mean_squared_error(y_pred, y_test)\n",
432 |         "print(f\"Test MSE of fit model {test_mse}\") \n",
433 |         "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
434 |         "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
435 |       ],
436 |       "execution_count": 225,
437 |       "outputs": [
438 |         {
439 |           "output_type": "stream",
440 |           "name": "stdout",
441 |           "text": [
442 |             "Test MSE of fit model 188.37465077057507\n",
443 |             "Test MSE of no-covariate model 196.715556166321\n"
444 |           ]
445 |         }
446 |       ]
447 |     },
448 |     {
449 |       "cell_type": "code",
450 |       "metadata": {
451 |         "colab": {
452 |           "base_uri": "https://localhost:8080/"
453 |         },
454 |         "id": "uq6eZEBXbsaI",
455 |         "outputId": "2a544732-9c9c-4ec6-be9c-8ef28610365c"
456 |       },
457 |       "source": [
458 |         "# specify a model for the propensity score\n",
459 |         "\n",
460 |         "def make_g_model():\n",
461 |         "  return LogisticRegression(max_iter=1000)\n",
462 |         "  # return RandomForestClassifier(n_estimators=100, max_depth=5)\n",
463 |         "  # return GradientBoostingClassifier(n_estimators=200, max_depth=3)\n",
464 |         "\n",
465 |         "g_model = make_g_model()\n",
466 |         "# Sanity check that chosen model actually improves test error\n",
467 |         "# A real analysis should give substantial attention to model selection and validation \n",
468 |         "\n",
469 |         "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2, stratify=treatment)\n",
470 |         "g_model.fit(X_train, a_train)\n",
471 |         "a_pred = g_model.predict_proba(X_test)[:,1]\n",
472 |         "\n",
473 |         "test_ce=log_loss(a_test, a_pred)\n",
474 |         "print(f\"Test CE of fit model {test_ce}\") \n",
475 |         "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
476 |         "print(f\"Test CE of no-covariate model {baseline_ce}\")"
477 |       ],
478 |       "execution_count": 226,
479 |       "outputs": [
480 |         {
481 |           "output_type": "stream",
482 |           "name": "stdout",
483 |           "text": [
484 |             "Test CE of fit model 0.4844169173325631\n",
485 |             "Test CE of no-covariate model 0.6785695199678788\n"
486 |           ]
487 |         }
488 |       ]
489 |     },
490 |     {
491 |       "cell_type": "markdown",
492 |       "metadata": {
493 |         "id": "2RkvV_4_dFWo"
494 |       },
495 |       "source": [
496 |         "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
497 |       ]
498 |     },
499 |     {
500 |       "cell_type": "code",
501 |       "metadata": {
502 |         "id": "KA0AsEGJ_X3b"
503 |       },
504 |       "source": [
505 |         "# helper functions to implement the cross fitting\n",
506 |         "\n",
507 |         "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
508 |         "    \"\"\"\n",
509 |         "    Implements K fold cross-fitting for the model predicting the treatment A. \n",
510 |         "    That is, \n",
511 |         "    1. Split data into K folds\n",
512 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
513 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
514 |         "    Returns an array containing the predictions  \n",
515 |         "\n",
516 |         "    Args:\n",
517 |         "    model: function that returns sklearn model (which implements fit and predict_prob)\n",
518 |         "    X: dataframe of variables to adjust for\n",
519 |         "    A: array of treatments\n",
520 |         "    n_splits: number of splits to use\n",
521 |         "    \"\"\"\n",
522 |         "    predictions = np.full_like(A, np.nan, dtype=float)\n",
523 |         "    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
524 |         "    \n",
525 |         "    for train_index, test_index in kf.split(X, A):\n",
526 |         "      X_train = X.loc[train_index]\n",
527 |         "      A_train = A.loc[train_index]\n",
528 |         "      g = make_model()\n",
529 |         "      g.fit(X_train, A_train)\n",
530 |         "\n",
531 |         "      # get predictions for split\n",
532 |         "      predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
533 |         "\n",
534 |         "    assert np.isnan(predictions).sum() == 0\n",
535 |         "    return predictions\n",
536 |         "\n",
537 |         "\n",
538 |         "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
539 |         "    \"\"\"\n",
540 |         "    Implements K fold cross-fitting for the model predicting the outcome Y. \n",
541 |         "    That is, \n",
542 |         "    1. Split data into K folds\n",
543 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
544 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
545 |         "    Returns two arrays containing the predictions for all units untreated, all units treated  \n",
546 |         "\n",
547 |         "    Args:\n",
548 |         "    model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
549 |         "    X: dataframe of variables to adjust for\n",
550 |         "    y: array of outcomes\n",
551 |         "    A: array of treatments\n",
552 |         "    n_splits: number of splits to use\n",
553 |         "    output_type: type of outcome, \"binary\" or \"continuous\"\n",
554 |         "\n",
555 |         "    \"\"\"\n",
556 |         "    predictions0 = np.full_like(A, np.nan, dtype=float)\n",
557 |         "    predictions1 = np.full_like(y, np.nan, dtype=float)\n",
558 |         "    if output_type == 'binary':\n",
559 |         "      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
560 |         "    elif output_type == 'continuous':\n",
561 |         "      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
562 |         "\n",
563 |         "    # include the treatment as input feature\n",
564 |         "    X_w_treatment = X.copy()\n",
565 |         "    X_w_treatment[\"A\"] = A\n",
566 |         "\n",
567 |         "    # for predicting effect under treatment / control status for each data point \n",
568 |         "    X0 = X_w_treatment.copy()\n",
569 |         "    X0[\"A\"] = 0\n",
570 |         "    X1 = X_w_treatment.copy()\n",
571 |         "    X1[\"A\"] = 1\n",
572 |         "\n",
573 |         "    \n",
574 |         "    for train_index, test_index in kf.split(X_w_treatment, y):\n",
575 |         "      X_train = X_w_treatment.loc[train_index]\n",
576 |         "      y_train = y.loc[train_index]\n",
577 |         "      q = make_model()\n",
578 |         "      q.fit(X_train, y_train)\n",
579 |         "\n",
580 |         "      if output_type =='binary':\n",
581 |         "        predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
582 |         "        predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
583 |         "      elif output_type == 'continuous':\n",
584 |         "        predictions0[test_index] = q.predict(X0.loc[test_index])\n",
585 |         "        predictions1[test_index] = q.predict(X1.loc[test_index])\n",
586 |         "\n",
587 |         "    assert np.isnan(predictions0).sum() == 0\n",
588 |         "    assert np.isnan(predictions1).sum() == 0\n",
589 |         "    return predictions0, predictions1"
590 |       ],
591 |       "execution_count": 227,
592 |       "outputs": []
593 |     },
594 |     {
595 |       "cell_type": "code",
596 |       "metadata": {
597 |         "id": "wVcE6pRQeMNf"
598 |       },
599 |       "source": [
600 |         "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
601 |       ],
602 |       "execution_count": 228,
603 |       "outputs": []
604 |     },
605 |     {
606 |       "cell_type": "code",
607 |       "metadata": {
608 |         "id": "GLEHlLLdWSh9"
609 |       },
610 |       "source": [
611 |         "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
612 |       ],
613 |       "execution_count": 229,
614 |       "outputs": []
615 |     },
616 |     {
617 |       "cell_type": "code",
618 |       "metadata": {
619 |         "colab": {
620 |           "base_uri": "https://localhost:8080/",
621 |           "height": 203
622 |         },
623 |         "id": "_NVCV0q0g8wQ",
624 |         "outputId": "9f4fb865-3b5f-4e46-f7a3-c343b1924e9e"
625 |       },
626 |       "source": [
627 |         "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
628 |         "data_and_nuisance_estimates.head()"
629 |       ],
630 |       "execution_count": 230,
631 |       "outputs": [
632 |         {
633 |           "output_type": "execute_result",
634 |           "data": {
635 |             "text/html": [
636 |               "<div>\n",
637 |               "<style scoped>\n",
638 |               "    .dataframe tbody tr th:only-of-type {\n",
639 |               "        vertical-align: middle;\n",
640 |               "    }\n",
641 |               "\n",
642 |               "    .dataframe tbody tr th {\n",
643 |               "        vertical-align: top;\n",
644 |               "    }\n",
645 |               "\n",
646 |               "    .dataframe thead th {\n",
647 |               "        text-align: right;\n",
648 |               "    }\n",
649 |               "</style>\n",
650 |               "<table border=\"1\" class=\"dataframe\">\n",
651 |               "  <thead>\n",
652 |               "    <tr style=\"text-align: right;\">\n",
653 |               "      <th></th>\n",
654 |               "      <th>g</th>\n",
655 |               "      <th>Q0</th>\n",
656 |               "      <th>Q1</th>\n",
657 |               "      <th>A</th>\n",
658 |               "      <th>Y</th>\n",
659 |               "    </tr>\n",
660 |               "  </thead>\n",
661 |               "  <tbody>\n",
662 |               "    <tr>\n",
663 |               "      <th>0</th>\n",
664 |               "      <td>0.009122</td>\n",
665 |               "      <td>69.928410</td>\n",
666 |               "      <td>72.999765</td>\n",
667 |               "      <td>0</td>\n",
668 |               "      <td>60</td>\n",
669 |               "    </tr>\n",
670 |               "    <tr>\n",
671 |               "      <th>1</th>\n",
672 |               "      <td>0.153910</td>\n",
673 |               "      <td>84.867654</td>\n",
674 |               "      <td>90.188475</td>\n",
675 |               "      <td>0</td>\n",
676 |               "      <td>94</td>\n",
677 |               "    </tr>\n",
678 |               "    <tr>\n",
679 |               "      <th>2</th>\n",
680 |               "      <td>0.777998</td>\n",
681 |               "      <td>73.341629</td>\n",
682 |               "      <td>67.400933</td>\n",
683 |               "      <td>1</td>\n",
684 |               "      <td>70</td>\n",
685 |               "    </tr>\n",
686 |               "    <tr>\n",
687 |               "      <th>3</th>\n",
688 |               "      <td>0.113439</td>\n",
689 |               "      <td>83.498759</td>\n",
690 |               "      <td>82.388082</td>\n",
691 |               "      <td>0</td>\n",
692 |               "      <td>84</td>\n",
693 |               "    </tr>\n",
694 |               "    <tr>\n",
695 |               "      <th>4</th>\n",
696 |               "      <td>0.221874</td>\n",
697 |               "      <td>87.987430</td>\n",
698 |               "      <td>86.811025</td>\n",
699 |               "      <td>0</td>\n",
700 |               "      <td>80</td>\n",
701 |               "    </tr>\n",
702 |               "  </tbody>\n",
703 |               "</table>\n",
704 |               "</div>"
705 |             ],
706 |             "text/plain": [
707 |               "          g         Q0         Q1  A   Y\n",
708 |               "0  0.009122  69.928410  72.999765  0  60\n",
709 |               "1  0.153910  84.867654  90.188475  0  94\n",
710 |               "2  0.777998  73.341629  67.400933  1  70\n",
711 |               "3  0.113439  83.498759  82.388082  0  84\n",
712 |               "4  0.221874  87.987430  86.811025  0  80"
713 |             ]
714 |           },
715 |           "metadata": {},
716 |           "execution_count": 230
717 |         }
718 |       ]
719 |     },
720 |     {
721 |       "cell_type": "markdown",
722 |       "metadata": {
723 |         "id": "VNhM7URdgzQB"
724 |       },
725 |       "source": [
726 |         "## Combine predicted values and data into estimate of ATE"
727 |       ]
728 |     },
729 |     {
730 |       "cell_type": "code",
731 |       "metadata": {
732 |         "id": "O_F5r0SSkzzK"
733 |       },
734 |       "source": [
735 |         "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
736 |         "  \"\"\"\n",
737 |         "  # Double ML estimator for the ATE\n",
738 |         "  \"\"\"\n",
739 |         "\n",
740 |         "  tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n",
741 |         "  \n",
742 |         "  scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n",
743 |         "  n = Y.shape[0] # number of observations\n",
744 |         "  std_hat = np.std(scores) / np.sqrt(n)\n",
745 |         "\n",
746 |         "  return tau_hat, std_hat\n"
747 |       ],
748 |       "execution_count": 231,
749 |       "outputs": []
750 |     },
751 |     {
752 |       "cell_type": "code",
753 |       "metadata": {
754 |         "colab": {
755 |           "base_uri": "https://localhost:8080/"
756 |         },
757 |         "id": "SjDj0F9Bm9uq",
758 |         "outputId": "e3a2f168-1bf8-47fd-f046-dd05390bf0d5"
759 |       },
760 |       "source": [
761 |         "tau_hat, std_hat = ate_aiptw(**data_and_nuisance_estimates)\n",
762 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
763 |       ],
764 |       "execution_count": 232,
765 |       "outputs": [
766 |         {
767 |           "output_type": "stream",
768 |           "name": "stdout",
769 |           "text": [
770 |             "The estimate is -2.6682535341413107 pm 1.52254875417939\n"
771 |           ]
772 |         }
773 |       ]
774 |     },
775 |     {
776 |       "cell_type": "markdown",
777 |       "metadata": {
778 |         "id": "L_GUa-5vMmL4"
779 |       },
780 |       "source": [
781 |         "#Sensitivity Analysis\n",
782 |         "\n",
783 |         "We found an average treatment effect of diastolic blood pressure medication of about 2, significant at the 0.95 level. We'd now conduct some analysis to decide how sensitive this conclusions is to possible unobserved confounding."
784 |       ]
785 |     },
786 |     {
787 |       "cell_type": "code",
788 |       "metadata": {
789 |         "id": "zEv_RlkUNQZ9"
790 |       },
791 |       "source": [
792 |         "# the first step is to choose a level of bias that would undermine the qualitative conclusion of the study\n",
793 |         "# we'll go with the nominal effect\n",
794 |         "target_bias = 2.00 # note: bias is specified as an absolute number"
795 |       ],
796 |       "execution_count": 233,
797 |       "outputs": []
798 |     },
799 |     {
800 |       "cell_type": "markdown",
801 |       "metadata": {
802 |         "id": "bDeSqyvvN3mg"
803 |       },
804 |       "source": [
805 |         "## Compute influence strength of covariates\n",
806 |         "Our task is to assess whether it's plausible that an unobserved confounder could be strong enough to induce a bias of 2 or more. To make that easier, we'd like to know how strong the observed confounders are. Austen plots computes these reference strengths by seeing how much model performance degrades when the covariates are removed. Accordingly, we refit the models with each (group of) reference covariate removed.  "
807 |       ]
808 |     },
809 |     {
810 |       "cell_type": "code",
811 |       "metadata": {
812 |         "id": "b_51ei-JOIrn"
813 |       },
814 |       "source": [
815 |         "# First, lets define the groups of covariates we'll measure the strength of.\n",
816 |         "# Note: it's important to group the covariates into meaningful groups, because strength is measured conditional on all remaining covariates\n",
817 |         "# E.g., if we remove only \"black\" (and not \"hispanic, white\") then we will measure no effect, because we can infer the removed variable from the remaining ones\n",
818 |         "\n",
819 |         "covariate_groups = {\n",
820 |         "    'socioeconomic': ['white', 'black', 'hisp' , 'hhsize', 'edu',\n",
821 |         "       'married', 'widowed', 'divorced', 'separated', 'income', 'packyr', 'alcohol',\n",
822 |         "       'insurance', 'together'],\n",
823 |         "    'sex': ['female'],\n",
824 |         "    'age': ['age_mo'],\n",
825 |         "    'health': ['bmi', 'pulse', 'sodium', 'potassium', 'r_sodipota']}"
826 |       ],
827 |       "execution_count": 234,
828 |       "outputs": []
829 |     },
830 |     {
831 |       "cell_type": "code",
832 |       "metadata": {
833 |         "id": "eLskfBqCQlwZ"
834 |       },
835 |       "source": [
836 |         "# For each covariate group, refit the models without using that group\n",
837 |         "nuisance_estimates = {}\n",
838 |         "for group, covs in covariate_groups.items():\n",
839 |         "  remaining_confounders = confounders.drop(columns=covs)\n",
840 |         "\n",
841 |         "  g = treatment_k_fold_fit_and_predict(make_g_model, X=remaining_confounders, A=treatment, n_splits=5)\n",
842 |         "  Q0, Q1 = outcome_k_fold_fit_and_predict(make_Q_model, X=remaining_confounders, y=outcome, A=treatment, n_splits=5, output_type=\"continuous\")\n",
843 |         "\n",
844 |         "  data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
845 |         "  nuisance_estimates[group] = data_and_nuisance_estimates\n"
846 |       ],
847 |       "execution_count": 235,
848 |       "outputs": []
849 |     },
850 |     {
851 |       "cell_type": "markdown",
852 |       "metadata": {
853 |         "id": "73aTyanbTihM"
854 |       },
855 |       "source": [
856 |         "## Save computed estimates as CSVs\n",
857 |         "The Austen plot code expects the nuisance function estimates to be provided as csvs with columns 'g', 'Q', 't', 'y'"
858 |       ]
859 |     },
860 |     {
861 |       "cell_type": "code",
862 |       "metadata": {
863 |         "id": "Am4bdBMGXhqr"
864 |       },
865 |       "source": [
866 |         "data_and_nuisance_path = 'data_and_nuisance_estimates.csv'\n",
867 |         "covariate_dir_path = 'covariates/'"
868 |       ],
869 |       "execution_count": 236,
870 |       "outputs": []
871 |     },
872 |     {
873 |       "cell_type": "code",
874 |       "metadata": {
875 |         "id": "YJ-QU3gXSqiz"
876 |       },
877 |       "source": [
878 |         "def _convert_to_austen_format(nuisance_estimate_df: pd.DataFrame):\n",
879 |         "  austen_df = pd.DataFrame()\n",
880 |         "  austen_df['y']=nuisance_estimate_df['Y']\n",
881 |         "  austen_df['t']=nuisance_estimate_df['A']\n",
882 |         "  austen_df['g']=nuisance_estimate_df['g']\n",
883 |         "  A = nuisance_estimate_df['A']\n",
884 |         "  austen_df['Q']=A*nuisance_estimate_df['Q1'] + (1-A)*nuisance_estimate_df['Q0'] # use Q1 when A=1, and Q0 when A=0\n",
885 |         "\n",
886 |         "  return austen_df"
887 |       ],
888 |       "execution_count": 237,
889 |       "outputs": []
890 |     },
891 |     {
892 |       "cell_type": "code",
893 |       "metadata": {
894 |         "id": "31BWqKXmVAQr"
895 |       },
896 |       "source": [
897 |         "austen_data_and_nuisance = _convert_to_austen_format(data_and_nuisance_estimates)\n",
898 |         "austen_data_and_nuisance.to_csv(data_and_nuisance_path, index=False)\n",
899 |         "\n",
900 |         "pathlib.Path(covariate_dir_path).mkdir(exist_ok=True)\n",
901 |         "for group, nuisance_estimate in nuisance_estimates.items():\n",
902 |         "  austen_nuisance_estimate = _convert_to_austen_format(nuisance_estimate)\n",
903 |         "  austen_nuisance_estimate.to_csv(os.path.join(covariate_dir_path,group+\".csv\"), index=False)"
904 |       ],
905 |       "execution_count": 238,
906 |       "outputs": []
907 |     },
908 |     {
909 |       "cell_type": "markdown",
910 |       "metadata": {
911 |         "id": "C84zSBeIVe0L"
912 |       },
913 |       "source": [
914 |         "## Make plots"
915 |       ]
916 |     },
917 |     {
918 |       "cell_type": "code",
919 |       "metadata": {
920 |         "id": "2C0cixtvVhmD"
921 |       },
922 |       "source": [
923 |         "ap = AustenPlot(data_and_nuisance_path, covariate_dir_path)"
924 |       ],
925 |       "execution_count": 239,
926 |       "outputs": []
927 |     },
928 |     {
929 |       "cell_type": "code",
930 |       "metadata": {
931 |         "colab": {
932 |           "base_uri": "https://localhost:8080/"
933 |         },
934 |         "id": "JFgN5L6YW0oF",
935 |         "outputId": "9bb60caf-daef-4ba3-8ab2-8a4f6d3b43eb"
936 |       },
937 |       "source": [
938 |         "p, plot_coords, variable_coords = ap.fit(bias=target_bias) # recall we set target_bias=2.0"
939 |       ],
940 |       "execution_count": 240,
941 |       "outputs": [
942 |         {
943 |           "output_type": "stream",
944 |           "name": "stdout",
945 |           "text": [
946 |             "Fitting main dataset\n"
947 |           ]
948 |         }
949 |       ]
950 |     },
951 |     {
952 |       "cell_type": "code",
953 |       "metadata": {
954 |         "colab": {
955 |           "base_uri": "https://localhost:8080/",
956 |           "height": 396
957 |         },
958 |         "id": "lfPaV4IDckdS",
959 |         "outputId": "56a1085c-cc7d-45e7-d61b-832125c05857"
960 |       },
961 |       "source": [
962 |         "p"
963 |       ],
964 |       "execution_count": 241,
965 |       "outputs": [
966 |         {
967 |           "output_type": "display_data",
968 |           "data": {
969 |             "image/png": "iVBORw0KGgoAAAANSUhEUgAAAikAAAFqCAYAAADft8pBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVhV1f4/8PcGZZZ5EEgFQUHERCC8OCKpgEMO5Zii+FVMMiPLORUnFKfKuUKcS8vZSjMVblpKCloaRFgMiSMIB5kR9u8Pfpzr6YAC+yAHeb+eh+dy9l57rc9ZN8/5sNbaawuiKIogIiIiUjMaDR0AERERUVWYpBAREZFaYpJCREREaolJChEREaklJilERESklpikEBERkVpikkJERERqiUkKERERqSUmKURERKSWmKTQC0MQBISFhTV0GEREpCJMUkht7dy5E4IgKPxYWlqiT58+OHnyZEOHp1L//PMPlixZAi8vL5iYmMDc3Bw+Pj44c+ZMjesoLy/H6tWrYW9vDx0dHbz88sv48ssv6zFqIqL61ayhAyB6lqVLl8Le3h6iKOLevXvYuXMnBgwYgBMnTmDQoEHycoWFhWjWrHH+J33s2DFERERg6NChmDBhAh4/fozdu3ejX79+iIqKQlBQ0DPrWLBgAVatWoUpU6bglVdewbFjxzB27FgIgoDRo0c/h3dBRKRaAh8wSOpq586dCAoKwuXLl+Hp6Sk/np2dDSsrK4wYMQL79u1rwAhV5/fff4eVlRXMzc3lx4qLi+Hm5oa8vDz8888/T70+IyMD9vb2CA4OxqZNmwAAoiiid+/eSElJQWpqKjQ1Nev1PRARqRqne6jRMTY2hq6urtKoyb/XpKSlpSEkJAROTk7Q1dWFmZkZRowYgdTUVIXrSktLsWTJErRr1w46OjowMzNDjx498MMPPzyHd1OhY8eOCgkKAGhra2PAgAG4desWHj169NTrjx07htLSUoSEhMiPCYKAadOm4datW7h48WK9xE1EVJ8a59g4NSkymQyZmZkQRRH379/Hxo0bkZeXh3Hjxj31usuXL+Pnn3/G6NGj8dJLLyE1NRVbt26Fj48PEhISoKenBwAICwvDypUrMXnyZHh5eSE3NxdXrlxBfHw8+vXrV2395eXlePjwYY3eg5GREZo3b17zN/3/3b17F3p6evJYq3P16lXo6+ujQ4cOCse9vLzk53v06FHr9omIGhKTFFJ7ffv2VXitra2NqKiopyYQADBw4EC88cYbCscGDx4Mb29vHDp0COPHjwcAfPvttxgwYAA+++yzWsWVnp4Oe3v7GpWNjo6Gj49Preq/efMmDh8+jBEjRjxzqubOnTuwsrKCIAgKx62trQEAt2/frlXbRETqgEkKqb3Nmzejffv2AIB79+5h7969mDx5Mlq0aIHhw4dXe52urq7899LSUuTm5sLR0RHGxsaIj4+XJynGxsb4/fffkZycjHbt2tU4rpYtW9Z4Sqhz5841rhcACgoKMGLECOjq6mLVqlXPLF9YWAhtbW2l4zo6OvLzRESNDZMUUnteXl4KC2fHjBmDLl26YPr06Rg0aBC0tLSqvK6wsBArV67Ejh07kJGRgSfXiMtkMvnvS5cuxZAhQ9C+fXu4urrC398f48ePx8svv/zUuHR0dJRGeVShrKwMo0ePRkJCAk6ePAkbG5tnXqOrq4vi4mKl40VFRfLzRESNDRfOUqOjoaGBPn364M6dO0hOTq623DvvvIMVK1Zg5MiR+Oqrr3D69Gn88MMPMDMzQ3l5ubxcr1698NdffyEqKgqurq6IjIyEu7s7IiMjnxpHWVkZ7t69W6OfkpKSGr+/KVOm4JtvvsHOnTvh6+tbo2usra1x9+5d/PtmvTt37gBAjRIdIiJ1w5EUapQeP34MAMjLy6u2zMGDBzFhwgSsW7dOfqyoqAg5OTlKZU1NTREUFISgoCDk5eWhV69eCAsLw+TJk6ut/59//lH5mpRZs2Zhx44d+PjjjzFmzJga1Q0Abm5uiIyMRGJiIlxcXOTHY2Nj5eeJiBobJinU6JSWluL06dPQ0tJSupvlSZqamkojCxs3bkRZWZnCsaysLJiZmclfGxgYwNHR8Zl7k6h6TcqaNWuwdu1azJ8/H++++2615WQyGe7cuQNra2sYGRkBAIYMGYL33nsPW7ZsUdgnZdu2bbC1tUW3bt1qFCcRkTphkkJq7+TJk/jjjz8AAPfv38cXX3yB5ORkzJ07F4aGhtVeN2jQIOzZswdGRkZwcXHBxYsXcebMGYWEBABcXFzg4+MDDw8PmJqa4sqVKzh48CCmT5/+1LhUuSblyJEjmD17Ntq1a4cOHTpg7969Cuf79esHKysredmgoCDs2LEDEydOBAC89NJLCA0NxZo1a1BaWopXXnkFR48exfnz57Fv3z5u5EZEjRKTFFJ7ixYtkv+uo6MDZ2dnbN26FVOnTn3qdZ988gk0NTWxb98+FBUVoXv37jhz5gz8/PwUys2YMQPHjx/H6dOnUVxcjDZt2mD58uWYNWtWvbyfqvz6668AgOTkZPldR0+Kjo6WJynVWbVqFUxMTPDpp59i586daNeuHfbu3YuxY8fWS8xERPWN2+ITERGRWuLdPURERKSWmKQQERGRWmKSQkRERGqJSQoRERGpJSYpREREpJZUmqQUFRVV+fwQdfbjjz9i8ODBsLGxgSAIOHr0aEOHRERERJCYpMTExOC9996Dl5cXDAwMoK+vDz09PbRo0QJeXl4IDQ1FTEyMikKtH/n5+ejcuTM2b97c0KEQERHRE2q9T0ppaSk+/fRTrF+/HqmpqTA1NYW7uzvatm0LExMTiKKI7OxspKSkID4+Hg8fPkSbNm3w/vvvY+rUqWjevHl9vRfJBEHAkSNHMHTo0IYOhYiIqMmr9Y6zjo6OKCkpwYQJEzBy5Ei4u7s/tXxcXBy+/vprhIeHY+3atUhNTa1rrERERNSE1DpJmT9/PiZOnAhtbe0alffw8ICHhweWLl2KHTt21DpAIiIiapq4Lf4TajPdU1hYiJKSkucQFRG9SLS0tKCrq9vQYRA1CnzAYA1lZGTg9u3bAICysjLk5+fDxMSkyrKffPIJSkpK4OXlhd69ez/PMOUqn3pbVlbWIO0DQLNmzfD48eMGa18d+gBgP1RiP1S4d+8efHx8mKgQ1UCtkpSCggIkJSXB0dERLVq0UDj3008/oXv37ioNTp3Y2trC1tYWACCTyXD+/Hk4OTkp9QNQ8WFcUlICY2NjODg4PO9QAQCVA2SCIDRI+wCQnZ1dbSL3PKhDHwDsh0rsB+DRo0e4ffs2SkpKmKQQ1UCNk5RLly5h8ODB0NLSQnZ2NubPn48PP/xQfj4gIAC5ubn1EmR9ysvLw82bN+WvU1JScO3aNZiamqJ169ZPvbZFixYwMjJSOq6hUXFnt5aWVpXnn4fKv1ibNWu4wbK8vLwGe/+AevQBwH6oxH4gotqq8T4pM2fOxKZNm5CRkYFff/0V33zzDQIDA+V/nTTWpS1XrlxBly5d0KVLFwAV77NLly5YtGhRA0dGRETUtNU4SUlISMCoUaMAAO3atUNMTAwePnyIYcOGNeoFpD4+PhBFUeln586dda6zcji5sSZuRERE6qDGSYqRkREyMjLkr3V0dHD06FHo6urCz88P5eXl9RJgY8QkhYiISLoaJyl9+/ZV2uekWbNm+OKLL+Do6IjCwkKVB0dERERNV41XkG3durXK2wcFQcDnn3+OhQsXqjSwxowjKURERNLVOEnR0tKClpZWteefdSdMU8IkhYiISDpJT0EmIiIiqi9MUuoBR1KIiIikq1OSUlpaigULFsDR0REWFhYYOHAgrl69qlQuKSkJ69evR79+/SQH2pgwSSEiIpKuTlsvhoeHY+XKlWjZsiXatGmD6Oho9O7dGxcuXICRkRG2bNmCQ4cOISUlBaIowtDQUNVxNwpMUoiIiOquTknK3r170a9fP3zzzTdo3rw5bt++jYEDB+K9995DXFwccnNz4enpiTFjxqB///7w9vZWddxqrXJbfCYpREREdVenJCUtLQ1z5sxB8+bNAQA2NjZYtWoVAgIC4Orqiq+++grOzs4qDZSIiIialjqtSXn8+DH09PQUjnXu3BkAMG/evCafoHBNChERkXR1vrsnMzNT4Uu4clTFyspKelSNXEM+Cp6IiOhFUednlr/33nuYN28eOnbsCDc3N9jZ2UEQBJSWlqoyvkaNIylERER1V6ck5dSpU/j111/x22+/4ddff8WuXbvkycmAAQPQpk0bdOrUCa6urvL/dXV1VWng6ozTPURERNLVKUnp378/+vfvL39dWlqKhIQEedLy22+/ITY2FidOnABQ8aVdVlammogbAU73EBERSVfn6Z4nNW/eHJ07d0bnzp0xfvx4+fF79+7h2rVruH79uiqaaXQ4kkJERFR3KklSqmNlZQU/Pz/4+fnVZzNqh9M9RERE0vHZPfWA0z1ERETSMUmpRxxJISIiqjsmKfWA0z1ERETSMUmpB0xSiIiIpGOSUg+YpBAREUlX67t70tPT69RQ69at63QdERERNU21TlIqt7+vraa4mRtHUoiIiOqu1klKVFQUb7F9BiYpRERE0tU6SZk4cWI9hEFERESkqF53nH2R6enpQRRFPH78uNoy5eXlTz1fn9Rhek0QhAZ7/4B69AHAfqjEfuDoKlFtqSRJKSoqwqFDhxAfHw+ZTIby8nKF84IgYPv27apoqsFlZGQgLS3tqR+2nO4hIiKSTnKSkpaWhj59+iA1NRXGxsaQyWQwNTVFTk4OysrKYG5uDgMDA1XEqhZsbW1hYGCA8+fPQxAENGum3IWVSUp155+nhmxfFMUGf/9Aw/YBwH6oxH7gIzOIakvyPimzZs2CTCbDpUuX8Oeff0IURRw4cAB5eXmIiIiArq4uvv/+e1XE2mhwJIWIiEg6yUnKuXPnEBISAi8vL2hoVFQniiK0tbUxa9YsvPrqqwgNDZUcaGPCv5aIiIikk5ykFBQUwM7ODgBgaGgIQRAgk8nk5729vXHhwgWpzTQqlUnKv9fmEBERUc1JTlJat26NW7duAaiY67W1tcWlS5fk5xMSEqCjoyO1mUaFIylERETSSV5B5uvri2PHjmHx4sUAKvZRWblyJbKzs1FeXo49e/YgMDBQcqCNEdekEBER1Z3kJGXu3Lm4fPkyiouLoa2tjfnz5+P27ds4ePAgNDU1MXbsWKxfv14VsTYaXDhLREQkneQkpXXr1goPD9TR0UFkZCQiIyOlVt1ocbqHiIhIOslrUqh6HEkhIiKqu1qPpKSnpwOAfPSk8vWzPDna8qLjdA8REZF0tU5S7OzsIAgCCgsLoaWlJX/9LOrw3AwiIiJqPGqdpERFRUEQBDRv3lzhNf0PR1KIiIikq3WSMnHiRIXXvr6+sLCwgK6ubpXlCwsL8eDBgzoF11gxSSEiIpJO8sJZe3t7HDlypNrzx48fh729vdRmiIiIqImRnKQ8a7SgtLRU/kyfpqLy/XJbfCIiorqr0z4pubm5yMnJkb/Oysqq8i6fnJwc7N+/H9bW1nWPkIiIiJqkOiUpH330EZYuXQqgYv1FaGhotU86FkURy5cvr3uEjRDXpBAREUlXpySlf//+MDAwgCiKmD17NsaMGQN3d3eFMoIgQF9fHx4eHvD09FRJsI0FkxQiIiLp6pSkeHt7w9vbGwCQn5+P119/Ha6urioNjIiIiJo2SStaCwoKcPz4cVy4cEFV8bwQOJJCREQknaQkRU9PDykpKdzM7V+YpBAREUkn+d5gf39/fP/996qIhYiIiEhOcpKycOFC/Pnnnxg/fjwuXLiAjIwMPHz4UOmnKeFIChERkXR1Wjj7pI4dOwIAEhIS8MUXX1Rbrik9YJDTX0RERNJJTlIWLVrEL+VqcCSFiIio7iQnKWFhYSoI48XC6R4iIiLpGtVDdTZv3gw7Ozvo6Oiga9eu+OWXX6ot6+PjA0EQlH4GDhwoLzNx4kSl8/7+/pLjZJJCREQkneSRFAAoKirCoUOHEB8fD5lMpvRgPUEQsH37dkltHDhwADNnzsS2bdvQtWtXfPzxx/Dz80NSUhIsLS2Vyh8+fBglJSXy11lZWejcuTNGjBihUM7f3x87duyQv9bW1pYUJ8AkhYiISBUkJylpaWno06cPUlNTYWxsDJlMBlNTU+Tk5KCsrAzm5uYwMDCQHOj69esxZcoUBAUFAQC2bduGb7/9FlFRUZg7d65SeVNTU4XX+/fvh56enlKSoq2tjZYtW0qO70lco0NERCSd5OmeWbNmQSaT4dKlS/jzzz8hiiIOHDiAvLw8REREQFdXV/I+KiUlJYiLi0Pfvn3/F7iGBvr27YuLFy/WqI7t27dj9OjR0NfXVzgeExMDS0tLODk5Ydq0acjKypIU65M4kkJERFR3kpOUc+fOISQkBF5eXtDQqKhOFEVoa2tj1qxZePXVV6t9QnJNZWZmoqysDFZWVgrHrayscPfu3Wde/8svv+DGjRuYPHmywnF/f3/s3r0bZ8+eRUREBP773/8iICBA8u3SnO4hIiKSTvJ0T0FBAezs7AAAhoaGEAQBMplMft7b2xsffPCB1GYk2b59Ozp16gQvLy+F46NHj5b/3qlTJ7z88stwcHBATEwMXn311ecdJhERET1BcpLSunVr3Lp1q6KyZs1ga2uLS5cuYfjw4QAqNnnT0dGR1Ia5uTk0NTVx7949heP37t175nqS/Px87N+/H0uXLn1mO23btoW5uTlu3ryplKRkZGTg9u3bAIDS0lLo6OggOzsbeXl5SvUUFRUBqJimysjIeGa79aFyFKch18cUFRU12PsH1KMPAPZDJfZDxWcHEdWc5CTF19cXx44dw+LFiwFU3Na7cuVKZGdno7y8HHv27EFgYKCkNrS0tODh4YGzZ89i6NChAIDy8nKcPXsW06dPf+q1X3/9NYqLizFu3LhntnPr1i1kZWXB2tpa6ZytrS1sbW0BADKZDOfPn4eJiQmMjIyUyurp6QH4X9LWEB4/fiyPoaFkZGQ02PsH1KMPAPZDJfYDFEaZiejZJP9rnTt3Li5fvozi4mJoa2tj/vz5uH37Ng4ePAhNTU2MHTsW69evlxzozJkzMWHCBHh6esLLywsff/wx8vPz5Xf7BAYGwtbWFitXrlS4bvv27Rg6dCjMzMwUjufl5WHJkiV4/fXX0bJlS/z111+YPXs2HB0d4efnJzleIiIikkYl0z2tW7eWv9bR0UFkZCQiIyOlVq1g1KhRePDgARYtWoS7d+/Czc0Np06dki+mTU9Ply/crZSUlIQLFy7g9OnTSvVpamrit99+w65du5CTkwMbGxv0798fy5Ytk7xXChfOEhERSdew47+1NH369Gqnd2JiYpSOOTk5VZsoqOLW6OowSSEiIpJOZUnKjRs38N133yE1NRUAYGdnh4CAAHTq1ElVTTQaTFKIiIikk5ykFBcXY+rUqdizZw9EUZRPuZSXl2PevHl48803ERkZCS0tLcnBNhZMUoiIiKSTvJnbnDlzsHv3bkybNg2JiYkoKipCcXExEhMT8dZbb2Hv3r2YPXu2KmIlIiKiJkTySMrevXsxfvx4bNq0SeG4k5MTNm/ejNzcXOzduxcff/yx1KYaDY6kEBERSSd5JKW0tBT/+c9/qj3frVs3+f4ETUVDb5pFRET0IpCcpPj5+T31LplTp06hf//+UptplDiSQkREVHeSp3uWLVuGkSNHYvjw4Xj77bfh6OgIAEhOTsbmzZuRlpaGAwcO4OHDhwrXmZqaSm1abXG6h4iISDrJSUqHDh0AANevX8exY8cUzlV+Sbu4uChdJ/VJw+qM0z1ERETSSU5SFi1axC/lanAkhYiIqO4kJylhYWEqCOPFwukeIiIi6SQvnCVlHFkiIiKSrtZJiouLC3bv3o2SkpIaX1NcXIwdO3ZUuTblRcSRFCIiIulqPd0zceJEzJw5E++++y5ee+019O3bF+7u7rC3t4eenh4AID8/HykpKbhy5QrOnDmDEydOQEtLC7NmzVL5G1BnTFKIiIjqrtZJyuzZszFt2jRs374dO3fuxJ49e+QjB82aVVRXuXmbKIpwdXXFkiVLMGnSJBgaGqowdPXFkRQiIiLp6rRwtkWLFggNDUVoaChSU1Px888/448//kBWVhYAwMzMDM7OzvD29oa9vb1KA24MmKQQERFJJ/nuHjs7O9jZ2akgFCIiIqL/4d099YAjKURERNIxSakHTFKIiIikY5JCREREaolJSj3gSAoREZF0TFLqAZMUIiIi6ZikEBERkVqSfAtypeLiYsTHx+P+/fvo3r07zM3NVVV1o8ORFCIiIulUMpKyYcMGWFtbo0ePHhg+fDh+++03AEBmZibMzc0RFRWlimYaDSYpRERE0klOUnbs2IHQ0FD4+/tj+/btCl/M5ubm8PX1xf79+6U206hoaFR0K5MUIiKiupOcpKxbtw5DhgzBF198gcGDByud9/DwwO+//y61mUalMkkpLy9v4EiIiIgaL8lJys2bNxEQEFDteVNTU/kzfZqKyukeJilERER1J3nhrLGxMTIzM6s9n5CQgJYtW0ptRu3o6elBFEX5E5+r8qzz9amsrKxB2n2SIAgN9v4B9egDgP1Qif3AKWCi2pKcpAwYMACfffYZQkJClM79/vvv+PzzzzFp0iSpzaiNjIwMpKWlPfXDliMpRNRYiKKIK1euIDo6GrmyHBgaGaNPnz7w9PSUf5YRNRTJScry5cvRtWtXuLq6YvDgwRAEAbt27UJUVBQOHToEa2trLFq0SBWxqgVbW1sYGBjg/PnzEAQBzZopd2HlMVEUqzz/PDVk++rw/oGG7QOA/VCJ/QC1+9JPSEjAh3PeR9qfN+BmLsJEW8SdYgEHtm9Em/auWB6xDi4uLjWuLy0tDRERETh79izS09Ohp6cHX19frFmzBnZ2dgplf/vtN7zzzjv45ZdfYGZmhrfeegu2traYNGkSUlJSFMqfPHkS4eHhiI+Ph4aGBnr16oXVq1ejY8eOKuoJUleS/7Xa2NggLi4O8+fPx4EDByCKIvbs2YMWLVpgzJgxWLVqVZPbM4UjKUSk7hISEjB5/Ch0NcrB/NfMYKr3v6+DhwWPsS/uBiaPH4XIPQdqnKhcvnwZP//8M0aPHo2XXnoJqamp2Lp1K3x8fJCQkAA9PT0AFSPSffr0gSAImDdvHvT19REZGQltbW2lOvfs2YMJEybAz88PERERKCgowNatW9GjRw9cvXpVKfmhF4tK/qSwtLREZGQkIiMj8eDBA5SXl8PCwkJ+l0tT8+QtyKIoqt1fT0TUtImiiA/nvI+uRjmY3sNS6TPKVK8ZpvewBC7cx4dz3seh49/V6HNs4MCBeOONNxSODR48GN7e3jh06BDGjx8PAIiIiEB2djbi4+Ph5uYGAAgKCkK7du0Urs3Ly8OMGTMwefJkfPbZZ/LjEyZMgJOTE8LDwxWO04tH5VmEhYUFrKysmmyCAkDhvXOhHBGpmytXriDtzxt408Os2uRDEASMdTdF2p83EBcXV6N6dXV15b+XlpYiKysLjo6OMDY2Rnx8vPzcqVOn4O3tLU9QgIo7Qd98802F+n744Qfk5ORgzJgxyMzMlP9oamqia9euiI6Ors3bpkZIJSMpFy5cQFRUFP7++29kZ2crfTELgoBff/1VFU01Ck/+o2eSQkTqJjo6Gm7mosIUT1XM9JvDzVzEuXPn4Onp+cx6CwsLsXLlSuzYsQMZGRkKn38ymUz+e1paGry9vZWud3R0VHidnJwMAPD19a2yPUNDw2fGRI2b5CRl/fr1mDVrFnR0dODk5ARTU1NVxNWoPZmklJeXQ1NTswGjISJSlCvLgYl2zf6AMtEWkftEgvE077zzjnwXcm9vbxgZGUEQBIwePbpOa/Qqr9mzZ0+VW1mow0Jsql+S/x9es2YNunfvjhMnTsDIyEgVMTV6nO4hInVmaGSMO8U1WyuXXSzAqYaf7QcPHsSECROwbt06+bGioiLk5OQolGvTpg1u3rypdP2/jzk4OACoWPfYt2/fGsVALxbJC0cKCgrw5ptvMkF5wpNJCu/wISJ106dPH1zLFPCw4Omb62Xll+JaplDtdMu/aWpqKv1htnHjRqWN9Pz8/HDx4kVcu3ZNfuzhw4fYt2+fUjlDQ0OEh4ejtLRUqb0HDx7UKC5qvCSPpPTp0wfXr19XRSwvjH9P9xARqRNPT0+0ae+KfXE3qry7B6gYBf4i/iHsnDrBw8OjRvUOGjQIe/bsgZGREVxcXHDx4kWcOXMGZmZmCuVmz56NvXv3ol+/fnjnnXfktyC3bt0aDx8+lMdjaGiIrVu3Yvz48XB3d8fo0aNhYWGB9PR0fPvtt+jevTs2bdokvUNIbUlOUjZu3Ij+/ftj7dq1mDRpEtekgNM9RKTeBEHA8oh1mDx+FHDhPt70qGqflCzEyowRuWltjbdR+OSTT6CpqYl9+/ahqKgI3bt3x5kzZ+Dn56dQrlWrVoiOjsaMGTMQHh4OCwsLvP3229DX18eMGTOgo6MjLzt27FjY2Nhg1apVWLNmDYqLi2Fra4uePXsiKChINR1CaktyktKqVStMnToVH3zwAebMmQMdHR2lhaKCICis7H7R8e4eIlJ3Li4uiNxzAB/OeR9Tj/9vx9nsYgHXMgW0ae+KyE2123HW2NgYUVFRSsdTU1OVjrm5ueHHH39UOBYaGgodHR2lDUB9fHzg4+NT4zjoxSE5SVm0aBFWrFgBW1tbeHp6cm0KON1DRI2Di4sLDh3/DnFxcTh37hxyZTI4GRlhmq8vPDw86nUjysLCQoV9VbKysrBnzx706NGDd0SSnOQkZdu2bRg4cCCOHj3apDdwexIXzhJRYyEIAjw9PWu0D4oqeXt7w8fHBx06dMC9e/ewfft25ObmYuHChc81DlJvkpOUkpISDBw4kAnKE7gmhYjo6QYMGICDBw/is88+gyAIcHd3x/bt29GrV6+GDo3UiOTMYtCgQTh//rwqYnlhcLqHiOjpwsPD8eeff6KgoAD5+fk4f/4890IhJZKTlMWLFyMhIQEhISGIi4vDgwcP8PDhQy7rkroAACAASURBVKWfpoQjKURERNJJnu5xcnICAFy7dg2ffvppteX+vZnPi4x39xAREUmnkrt76nMFeGPEhbNERETSSU5SwsLCVBDGi4VJChERkXQqfYRkXl4e/vnnHwAVm7wZGBiosvpGg9M9RERE0qnkvuHLly+jT58+MDExgaurK1xdXWFiYgJfX19cuXJFFU00Kry7h4iISDrJIymxsbHw8fGBlpYWJk+ejA4dOgAAEhMT8eWXX6JXr16IiYmBl5eX5GAbC97dQ0REJJ3kkZQFCxbA1tYWSUlJ2Lp1K2bMmIEZM2Zg69atSEpKgo2NDRYsWKCKWBsNjqQQUVMVFhYGQRCQmZnZIO3WpuzzjpFqT3KSEhsbi6lTp6Jly5ZK56ysrBAcHIxLly5JbaZR4cJZIqKGFx4ejqNHjzZ0GCSB5CRFQ0MDjx8/rvZ8WVlZk9syn9M9REQNj0lK4yc5e+jWrRs2b96MtLQ0pXPp6enYsmULunfvLrUZAMDmzZthZ2cHHR0ddO3aFb/88ku1ZXfu3AlBEBR+dHR0FMqIoohFixbB2toaurq66Nu3L5KTkyXHyekeImosRFHE5cuXsXr1anz44XysXr0aly9f5h9YpBYkJynh4eGQyWRwdnbG2LFjERYWhrCwMIwZMwbOzs6QyWRYuXKl5EAPHDiAmTNnYvHixYiPj0fnzp3h5+eH+/fvV3uNoaEh7ty5I//5dyK1evVqbNiwAdu2bUNsbCz09fXh5+eHoqIiSbHyFmQiagwSEhLwxrABCJk0HH9f3AAxbRf+vrgBIZOG441hA5CQkFDnunNycjBx4kQYGxvDyMgIQUFBKCgoUCizd+9eeHh4QFdXF6amphg9erR8G4tK58+fx4gRI9C6dWtoa2ujVatWeO+991BYWPjU9gVBQH5+Pnbt2iX/Q3XixIm1jpEaluS7e7p06YLY2FgsWLAAx48fl/8frKenB39/fyxfvhwuLi6SA12/fj2mTJmCoKAgAMC2bdvw7bffIioqCnPnzq3yGkEQqlwrA1QkDx9//DE+/PBDDBkyBACwe/duWFlZ4ejRoxg9enSdY+V0DxGpu4SEBEwJGoWe7XIQPtcMZob/+zrIyn2MHSdvYErQKHy+40CdPsNHjhwJe3t7rFy5EvHx8YiMjISlpSUiIiIAACtWrMDChQsxcuRITJ48GQ8ePMDGjRvRq1cvXL16FcbGxgCAr7/+GgUFBZg2bRrMzMzwyy+/YOPGjbh16xa+/vrratvfs2cPJk+eDC8vLwQHBwMAHBwcahUjNTyVbObm4uKCI0eOoLy8HA8ePAAAWFhYqGwtSklJCeLi4jBv3jz5MQ0NDfTt2xcXL16s9rq8vDy0adMG5eXlcHd3R3h4ODp27AgASElJwd27dxWeumlkZISuXbvi4sWLKktSON1DROpGFEUsnP8+erbLwfsjLZXuijEzbIb3R1oCX93Hwvnv4+CR72r9+JMuXbpg+/bt8tdZWVnYvn07IiIikJaWhsWLF2P58uWYP3++vMzw4cPRpUsXbNmyRX48IiICurq68jLBwcFwdHTE/PnzkZ6ejtatW1fZ/rhx4/DWW2+hbdu2GDduXK1jJPUgOYt4/PgxcnNzKyrT0ICVlRWsrKzkX9S5ublPXVhbE5mZmSgrK4OVlZXCcSsrK9y9e7fKa5ycnBAVFYVjx45h7969KC8vR7du3XDr1i0AkF9XmzprikkKEamzK1euIP2vGwgKMKs2+RAEARP9TZH+1w3ExcXVuo233npL4XXPnj2RlZWF3NxcHD58GOXl5Rg5ciQyMzPlPy1btkS7du0QHR0tv+7JBCU/Px+ZmZno1q0bRFHE1atXax1XTWMk9SA5SZkxYwa6detW7fnu3bvj/fffl9pMrXl7eyMwMBBubm7o3bs3Dh8+DAsLi6c+qVlVuCaFiNRZdHQ0PBxFhSmeqpgbNYeHo4hz587Vuo1/j3CYmJgAALKzs5GcnAxRFNGuXTtYWFgo/CQmJiqsNUxPT8fEiRNhamoKAwMDWFhYoHfv3gAAmUxW67hqGiOpB8nTPadOnUJgYGC159944w3s3bsXn3zySZ3bMDc3h6amJu7du6dw/N69e9WuOfm35s2bo0uXLrh58yYAyK+7d+8erK2tFep0c3NTuj4jIwO3b98GAJSWlkJHRwfZ2dnIy8tTKvvkBkH379+Xz60+T5XJUUM+obqoqAgZGRkN1r469AHAfqjEfqj47FAHubk5MDOo2R9QZgYicnNrnwxoampWeVwURZSXl0MQBJw8ebLKcpXPfSsrK0O/fv3w8OFDzJkzB87OztDX10dGRgYmTpwoeaT6aTGSepCcpNy+fRu2trbVnrexsZH8waSlpQUPDw+cPXsWQ4cOBVAxjXL27FlMnz69RnWUlZXh+vXrGDBgAADA3t4eLVu2xNmzZ+VJSW5uLmJjYzFt2jSl621tbeXvUyaT4fz58zAxMYGRkZFS2Sc/iMzMzJ7aP/WlcoqtWTOVPkOyVjIyMhrkvVdShz4A2A+V2A/S//JXFUNDY/ydV7NkLStPQFtD5c85KRwcHCCKIuzt7dG+fftqy12/fh1//vkndu3apfDH8A8//FCjdho6MSfpJE/3mJmZISkpqdrziYmJMDQ0lNoMZs6cic8//xy7du1CYmIipk2bhvz8fPndPoGBgQoLa5cuXYrTp0/j77//Rnx8PMaNG4e0tDRMnjwZQMV/vKGhoVi+fDmOHz+O69evIzAwEDY2NvJEqK64TwoRqbM+ffog7qaArNynrxfMlJUi7qYAX19flbY/fPhwaGpqYsmSJUqjFqIoIisrC8D/RjqeLCOKYo1H5vX19ZGTk6OiqKkhSP6Twt/fH59++inefPNNdOnSReFcfHw8PvvsM4wYMUJqMxg1ahQePHiARYsW4e7du3Bzc8OpU6fkC1/T09MVFqxmZ2djypQpuHv3LkxMTODh4YGff/5Z4Va62bNnIz8/H8HBwcjJyUGPHj1w6tQppU3faosLZ4lInXl6eqK1gyt2nLxR5d09QEUysPPUQ7Rx7AQPDw+Vtu/g4IDly5dj3rx5SE1NxdChQ9GiRQukpKTgyJEjCA4OxgcffABnZ2c4ODjggw8+QEZGBgwNDXHo0KEarxnx8PDAmTNnsH79etjY2MDe3h5du3ZV6Xuh+iU5SVm2bBlOnToFLy8vvPbaa/JbfG/cuIETJ07A0tISy5YtkxwoAEyfPr3a6Z2YmBiF1x999BE++uijp9YnCAKWLl2KpUuXqiS+StwnhYjUmSAIWBa+DlOCRgFf3UdQQFX7pGThfLIxPt+xtl6mTebOnYv27dvjo48+wpIlSwAArVq1Qv/+/fHaa68BqFhLeOLECcyYMQMrV66Ejo4Ohg0bhunTp6Nz587PbGP9+vUIDg7Ghx9+iMLCQkyYMIFJSiMjiCr4Fr1z5w7mzp2LY8eOyW/dMjQ0xNChQxEeHg4bGxvJgaqTyjUpPXv2rHJNyu3bt+V3NL3//vvw9PR83iGqxfw71yBUYD9UYD88+7PjeUtISMDC+e8j/a8bFXf7GIjIyhMQd1NAawdXLAtfp5LNOInqSiX/Wq2trbFr1y6IoqiwmVtTXbT05EhKWVlZA0ZCRFQ9FxcXHDzyHeLi4nDu3Dnk5srQ1tAIkxf4wsPDo8l+hpP6kJykpKenw8LCArq6uhAEAZaWlgrnCwsL8eDBg2p3BXwRPXlbG9ekEJE6EwQBnp6eDTLiS/Qsku/usbe3x5EjR6o9f/z4cdjb20ttplF5MknhSAoREVHdSE5SnrWkpbS0VGXP8GksON1DREQkXZ2me3JzcxXuPc/KykJ6erpSuZycHOzfv19hR9em4MmFeUxSiIiI6qZOScpHH30kv223clO00NDQKsuKoojly5fXPcJGiCMpRERE0tUpSenfvz8MDAwgiiJmz56NMWPGwN3dXaGMIAjQ19eHh4dHk1uQxTUpRERE0tUpSfH29oa3tzeAikdnDx8+HJ06dVJpYI0Z7+4hIiKSTvItyIsXL1ZFHC+UJ6d7KjeQIiIiotqRnKRoaGjUaMOfpjTtUdknlY8kJyIiotqTnKQsWrRIKUkpKytDamoqjh49CicnJwwaNEhqM42OpqYmHj9+3KSSMyIiIlWSnKSEhYVVe+7OnTv4z3/+g/bt20ttptGpnPJhkkJERFQ39brLmrW1Nd566y2VPQW5MalcPMskhYiIqG7qfStYfX19pKSk1HczaqdyJIVrUoiIiOqmXp9ZfuPGDWzYsKFJT/fw7h4iUmeiKOLKlSuIjo5GjiwXxkaG6NOnDzw9PfkUZGpwKnnAYNu2bZV+TE1N0blzZ9y7dw/r169XRayNSuV0D0dSiEhdJSQkYMCQYRgeOBkbTsVh17V72HAqDsMDJ2PAkGFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMrGxsfD394eRkRH09PTQu3dv/PTTT/LziYmJ0NXVRWBgoELdFy5cgKamJubMmVP3N02NiuSRlN69eytl24IgwMTEBA4ODhg9ejRMTU2lNtPocOEsEamzhIQEjAqchBzLjjAbOwnNDIzl5x7n5eDGxWMYFTgJB3ZHwcXFpcb1vvXWWzh48CCmT58OFxcXZGVl4cKFC0hMTIS7uzvOnTuHgIAAeHh4YPHixdDQ0MCOHTvg6+uL8+fPw8vLCx06dMCyZcswa9YsvPHGG3jttdeQn5+PiRMnwtnZWf5YFnrxCeKzHmNMSmQyGc6fP4+ePXvCyMioyjLvvPMOMjMz4ePjg6lTpz7nCP83zfTkww6ft4yMDNja2jZY++rQBwD7oRL7oWafHc+DKIoYMGQYbjw2g2XfwCqndURRxP0zu+HaLAvfHTtS46kfY2NjjBs3Dps2baqyTicnJ7Rt2xYnT56U11lYWIiOHTvC0dERp0+fBlAxCt27d28kJyfj999/x+LFi/Hpp5/i4sWLTe5RK01ZvS+cbap4dw8RqasrV67gRnIKzLyHVJt8CIIAU+8huJGcgri4uBrXbWxsjNjYWNy+fVvp3LVr15CcnIyxY8ciKysLmZmZyMzMRH5+Pl599VX8+OOP8ilyDQ0N7Ny5E3l5eQgICMCWLVswb948JihNjEqTlLy8PCQmJiIxMRF5eXmqrLrR4XQPEamr6OhoiNbOClM8VWluYAzR2hnnzp2rcd2rV6/GjRs30KpVK3h5eSEsLAx///03ACA5ORkAMGHCBFhYWCj8REZGori4GDKZTF6Xg4MDwsLCcPnyZXTs2BELFy6sw7ulxkwl456XL1/G7NmzceHCBYUsuGfPnli9enWTzHw5kkJE6ipHlgtR17BGZUVdQ8hkuTWue+TIkejZsyeOHDmC06dPY82aNYiIiMDhw4fl3w9r1qyBm5tbldcbGBgovK6c/rl9+zaysrLQsmXLGsdCjZ/kJCU2NhY+Pj7Q0tLC5MmT0aFDBwAVq7O//PJL9OrVCzExMfDy8pIcbGPCfVKISF0ZGxlCKEyuUVmhMBdGRu1qVb+1tTVCQkIQEhKC+/fvw93dHStWrMBHH30EADA0NETfvn2fWc+2bdvwww8/YMWKFVi5ciWmTp2KY8eO1SoWatwkT/csWLAAtra2SEpKwtatWzFjxgzMmDEDW7duRVJSEmxsbLBgwQJVxNqocLqHiNRVnz59INz5A4/zcp5arjQvB8KdP+Dr61ujesvKyhSmawDA0tISNjY2KC4uhoeHBxwcHLB27doqlwQ8ePBA/ntKSgpmzZqF119/HfPnz8fatWtx/Phx7N69u0ax0ItBJSMpixYtqnIIzsrKCsHBwdwWn4hIjXh6esK1nT1uXDz21Lt7Hl48hk7t28LDw6NG9T569AgvvfQS3njjDXTu3BkGBgY4c+YMLl++jHXr1kFDQwORkZEICAhAx44dERQUBFtbW2RkZCA6OhqGhoY4ceIERFHEpEmToKuri61btwIApk6dikOHDuHdd99F3759YWNjo9I+IfUkOUnR0NB46q6qZWVl8lGFpoQjKUSkrgRBwLpV4RgVOAn3z+yGmfcQpX1Ssi4eg/H937F2d1SNbz/W09NDSEgITp8+LV+D4ujoiC1btmDatGkAAB8fH1y8eBHLli3Dpk2bkJeXh5YtW6Jr167y7Ro2btyImJgYHDp0CBYWFvL6t2/fDldXV0yZMgXffvutCnuE1JXkJKVbt27YvHkzxo4dizZt2iicS09Px5YtW9C9e3epzTQ6TFKISJ25uLjgwO4ovD93Pm58sRCitTNEXUMIhbkQ7vwB13b2WFfLjdy0tLSwevVqrF69+qnl3NzccOjQoWrPVy4b+LdWrVopTSfRi01ykhIeHo5evXrB2dkZw4YNkz+nJykpCceOHUOzZs2wcuVKyYGqGz09PYiiWO0o0pPTPQ3x/B51SI4EQWjQZxepQx8A7IdK7IeKKRR14uLigu+OHUFcXBzOnTsHmaxikayv7yx4eHjw2T3U4CQnKV26dEFsbCwWLFiA48ePo6CgAEDFl7i/vz+WL19eq0xc3WVkZCAtLe2ZH7aVSQofMEhE6kwQBHh6ejbJrSJI/alknxQXFxccOXIE5eXl8tXZFhYWL+RaFFtbWxgYGOD8+fMQBKHaLbYr/wIRRbFBt+FuyLYb+r1XaugY2A8V2A/gyARRLan0X6uGhgasrKxUWWWjxbt7iIiIpHnxhjrUBBfOEhERScMkpZ5UDilzTQoREVHdMEmpJ5VJSmlpaQNHQkRE1DgxSaknTFKIiIikUenC2by8PGRnZ1e5F0Dr1q1V2ZTaY5JCREQkjeQkpaioCEuWLMH27duRlZVVbbmmtoCUSQoREZE0kpOUkJAQ7Nq1C0OHDkXPnj1hYmKiirgavcokpaysDOXl5S/knjFERET1SXKScvjwYUyePBmffvqpKuJ5YTy5YdTjx4+hpaXVgNEQEb14du7ciaCgIKSkpMDOzq6hw2mUYmJi0KdPH0RHR8PHx6ehw1Ei+c97QRDg7u6uilheKM2bN5f/XlJS0oCREBERNU6Sk5QhQ4bgzJkzqojlhVK54yzAvVKIiOrD+PHjUVhYiDZt2jR0KI1Wr169UFhYiF69ejV0KFWSPN2zcOFCjBw5EsHBwZg6dSpat26t8AVdydTUVGpTjcqTIylcPEtE6koURVy5cgXR0dHIyZXB2NAIffr0gaenp9o/a0hTU7PK7xuqOQ0NDejo6DR0GNWSPJLSrl07XL16FZGRkfDy8kLLli1hYWGh9NPUPLkmhUkKEamjhIQEDBg2BMMnBWLDz6ewK+0aNvx8CsMnBWLAsCFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMl9//TU8PDygq6sLc3NzjBs3DhkZGUp1/fHHHxg5ciQsLCygq6sLJycnLFiwQH5+586dEAQBqampCtdt2bIFHTt2hLa2NmxsbPD2228jJydHqf7Y2Fj4+/vDyMgIenp66N27N3766SelchkZGfi///s/2NjYQFtbG/b29pg2bZrCVP7ff/+NESNGwNTUFHp6evjPf/6Db7/9VqGemJgYCIKAr776CitWrMBLL70EHR0dvPrqq7h586ZSuzXpp4kTJ8LAwADp6ekYNGgQDAwMYGtri82bNwMArl+/Dl9fX+jr66NNmzb44osvqowpJiZGqW8GDBgAExMT6Ovr4+WXX8Ynn3yiFGN9kzySsmjRIrXPthsCkxQiUmcJCQkYFRSIHEdLmM15E80MDeTnHufm4cbJnzEqKBAHduyGi4tLjet96623cPDgQUyfPh0uLi7IysrChQsXkJiYCHd3d/li11deeQUrV67EvXv38Mknn+Cnn37C1atXYWxsDAD47bff0LNnTzRv3hzBwcGws7PDX3/9hRMnTmDFihXVth8WFoYlS5agb9++mDZtGpKSkrB161ZcvnwZP/30k3yU+9y5cwgICICHhwcWL14MDQ0N7NixA76+vjh//jy8vLwAALdv34aXlxdycnIQHBwMZ2dnZGRk4ODBgygoKICWlhbu3buHbt26oaCgADNmzICZmRl27dqF1157DQcPHsSwYcMUYly1ahU0NDTwwQcfQCaTYfXq1XjzzTcRGxsrL1PTfgIq7iINCAhAr169sHr1auzbtw/Tp0+Hvr4+FixYgDfffBPDhw/Htm3bEBgYCG9vb9jb21fbhz/88AMGDRoEa2trvPvuu2jZsiUSExPxzTff4N13363xfwsqIVKt5eTkiCdOnBBzcnKqLRMTEyOOHj1aHD16tJicnPwco6tQWloqlpaWPvd2n3Tr1q0GbV8d+kAU2Q+V2A81++x4HsrLy0X/IYPFl8YGiO5H14sexz5S+nE/ul58aWyA6D9ksFheXl7juo2MjMS33367ynMlJSWipaWl6OrqKhYWFsqPf/PNNyIAcdGiRfJjvXr1Elu0aCGmpaUpxV5px44dIgAxJSVFFEVRvH//vqilpSX2799fLCsrk5fbtGmTCECMioqS19GuXTvRz89Pob6CggLR3t5e7Nevn/xYYGCgqKGhIV6+fFnp/VReGxoaKgIQz58/Lz/36NEj0d7eXrSzs5PHEh0dLQIQO3ToIBYXF8vLfvLJJyIA8fr167XupwkTJogAxPDwcPmx7OxsUVdXVxQEQdy/f7/8+B9//CECEBcvXiw/VhlTdHS0KIqi+PjxY9He3l5s06aNmJ2dXW3fPy/cvKOecCSFiNTVlStXcOOvZJgFdKt2JFwQBJj6d8ONv5IRFxdX47qNjY0RGxuL27dvV9nu/fv3ERISorAOYuDAgXB2dpZPjzx48AA//vgjJk2apLRb+dNG7s+cOYOSkhKEhoYq7E01ZcoUGBoayuu/du0akpOTMXbsWGRlZSEzMxOZmZnIz8/Hq6++ih9//BHl5eUoLy/H0aNHMXjwYHh6elbZRwDw3XffwcvLCz169JCfMzAwQHBwMFJTU5WmzYKCghS2pejZsyeAiimj2vTTkyZPniz/3djYGE5OTtDX18fIkSPlx52cnGBsbCxvpypXr15FSkoKQkNDFUZrnny/z5NKk5S8vDwkJiYiMTEReXl5qqy60eHCWSJSV9HR0RAdrBWmeKrS3MgAooM1zp07V+O6V69ejRs3bqBVq1bw8vJCWFiY/EsxLS0NQMWX5b85OzvLz1eWd3V1rXG7T6tfS0sLbdu2lZ9PTk4GAEyYMEFp/WRkZCSKi4shk8nw4MED5ObmPjOOtLS0Kt9Thw4dFOKq9O/Eq3IT1Ozs7Ke+D0Cxnyrp6Ogorf00MjLCSy+9pJRYGBkZydupyl9//QWg9n1fX1Ty7J7Lly9j9uzZuHDhAsrLywFUrBju2bMnVq9eXWUG+qJ7csU5kxQiUic5uTKILXRrVFZsoQtZrqzGdY8cORI9e/bEkSNHcPr0aaxZswYRERE4fPhwXcNVucrvqTVr1sDNza3KMgYGBnj48GG9tF/dHUliFc+9k1KfqttpCJKTlNjYWPj4+EBLSwuTJ0+WZ46JiYn48ssv0atXL8TExMgXITUVHEkhInVlbGgE4VFhjcoKjwphZGhUq/qtra0REhKCkJAQ3L9/H+7u7lixYgXWrFkDAEhKSoKvr6/CNUlJSfL9Ttq2bQsAuHHjRq3arbw+KSlJXgdQsaFmSkoK+vbtCwBwcHAAABgaGsqPVcXCwgKGhobPjKNNmzZISkpSOv7HH38oxFWX9/G0fqoPlX1z48aNp/bN8yJ5umfBggWwtbWVr6CeMWMGZsyYga1btyIpKQk2NjYKt4w1FVyTQkTqqk+fPhD+uoPHuU+fli+V5UH4647SF2V1ysrKIJMpjrpYWlrCxsYGxcXF8PT0hKWlJbZt24bi4mJ5mZMnTyIxMREDBw4EUJEc9OrVC1FRUUhPT1eo72mjAH379oWWlhY2bNigUG779u2QyWTy+j08PODg4IC1a9dWuTThwYMHACpmBIYOHYoTJ07gypUrSuUq2xgwYAB++eUXXLx4UX4uPz8fn332Gezs7Gp1dxSAGvdTfXB3d4e9vT0+/vhjpdu2G2IERiUjKYsWLULLli2VzllZWSE4OBjLli2T2kyj8+RIypP/kRERNTRPT0+4OrTDjZM/w3JkvyoXRIqiiIenfkYnx/bw8PCoUb2PHj3CSy+9hDfeeAOdO3eGgYEBzpw5g8uXL2PdunVo3rw5IiIiEBQUhN69e2PMmDHyW2vt7Ozw3nvvyevasGEDevToAXd3dwQHB8Pe3h6pqan49ttvce3atSrbt7CwwLx587BkyRL4+/vjtddeQ1JSErZs2YJXXnkF48aNA1CRfERGRiIgIAAdO3ZEUFAQbG1tkZGRgejoaBgaGuLEiRMAgPDwcJw+fRq9e/dGcHAwOnTogDt37uDrr7/GhQsXYGxsjLlz5+LLL79EQEAAZsyYAVNTU+zatQspKSk4dOhQrR8wW5t+UjUNDQ1s3boVgwcPhpubG4KCgmBtbY0//vgDv//+O77//vt6a7sqkpMUDQ2Np277XlZW1iSfAKytrS3/vaioqAEjISJSJAgC1oWvwqigQNz/6geYBXRT2icl6+TPML55H2t37K7xXR16enoICQnB6dOncfjwYZSXl8PR0RFbtmzBtGnTAFRsPqanp4dVq1Zhzpw50NfXx7BhwxAREaFwN0nnzp1x6dIlLFy4EFu3bkVRURHatGmjcLdKVcLCwmBhYYFNmzbhvffeg6mpKYKDgxEeHq7wx6OPjw8uXryIZcuWYdOmTcjLy0PLli3RtWtXTJ06VV7O1tYWsbGxWLhwIfbt24fc3FzY2toiICAAenp6ACr+IP/5558xZ84cbNy4EUVFRXj55Zdx4sSJOo961LSf6oOfnx+io6OxZMkSrFu3DuXl5XBwcMCUKVPqtd2qCKLE8ZuAgABcv34dP/30k9I8WXp6Orp3745OnTrh6fFx+AAAIABJREFUu+++kxSoOpHJZDh//jx69uwJI6Oq52ozMjIwb948lJaW4vXXX8cbb7zxXGOsTByfnHZ63jIyMmBra9tg7atDHwDsh0rsh5p9djxPCQkJeH/+XNz4KxmigzXEFroQHhVC+OsOXB3aYV34qlpPVRCpkuR/reHh4ejVqxecnZ0xbNgwtG/fHkDF4p5jx46hWbNmWLlypeRAGyNtbW2UlpZyuoeI1JKLiwu+O3IMcXFxOHfuHGS5MhgZGsH3Q194eHhwN3FqcJKTlC5duiA2NhYLFizA8ePHUVBQAKBi2M/f3x/Lly9vspm4jo4O8vLyON1DRGpLEAR4eno2ya0iSP2pZLGIi4sLjhw5gtzcXNy5cwd37txBbm4uDh8+rNIEZfPmzbCzs4OOjg66du2KX375pdqyn3/+OXr27AkTExOYmJigb9++SuUnTpwIQRAUfvz9/VUWb+VOgUxSiIiIak+lK1o1NDRgZWUFKysrlS+WPXDgAGbOnInFixcjPj4enTt3hp+fH+7fv19l+ZiYGIwZMwbR0dG4ePEiWrVqhf79+ys9QdLf31+eWN25cwdffvmlymJmkkJERFR3jea2m/Xr12PKlCkICgqCi4sLtm3bBj09PURFRVVZft++fQgJCYGbmxucnZ0RGRmJ8vJynD17VqGctrY2WrZsKf+p3J5YFZikEBER1V2jSFJKSkoQFxensPudhoYG+vbtq7B5ztMUFBSgtLQUpqamCsdjYmJgaWkJJycnTJs2DVlZWSqLuzJJ4cJZIiKi2msUSUpmZibKyspgZWWlcNzKygp3796tUR1z5syBjY2NQqLj7++P3bt34+zZs4iIiMB///tfBAQEoKysTCVxV+6VwpEUIiKi2mvYjROek1WrVmH//v2IiYlReOz16NGj5b936tQJL7/8MhwcHBATE4NXX31Vcruc7iEiIqo7lSUpxcXFiI+Px/3799G9e3eYm5urqmqYm5tDU1MT9+7dUzh+7969Krfjf9LatWuxatUqnDlzBi+//PJTy7Zt2xbm5ua4efOmUpKSkZGB27dvA6h4Fo+Ojg6ys7OrfO4DUJGYVG4eVVBQoLRgt75V7tHXkPscFBUVPff3/SR16AOA/VCJ/cDneBHVlkqSlA0bNiAsLEz+YKkffvgBvr6+yMzMhLOzM1avXo1JkybVuX4tLS14eHjg7NmzGDp0KADIF8FOnz692utWr16NFStW4Pvvv6/RHgC3bt1CVlYWrK2tlc7Z2trKd8us3DXSxMTkqTvOViZqJSUlz32nTXXYXZM7jFZgP1RgP0Dp4XtE9HSS16Ts2LEDoaGh8Pf3x/bt2xWekmhubg5fX1/s379fajOYOXMmPv/8c+zatQuJiYmYNm0a8vPzERQUBAAIDAzEvHnz5OUjIiKwcOFCREVFwc7ODnfv3sXdu3flIx95eXmYNWsWLl26hNTUVJw9exZDhgyBo6Mj/Pz8JMcL/G+6p7S0VGXrXIiIiJoKyX9SrFu3DkOGDMEXX3xR5Z0xHh4e2LBhg9RmMGrUKDx48ACLFi3C3bt34ebmhlP/r707j4riSvsH/q1u9n0HMQpoDKK4gYG4EFGJu2IyMaIxwbiQxDjGyWTUOYlLRl+DmuhEX4/ihmgS9WcS9ThOHINHE41oVHSMGM1oEBNEwiqbLN19f3/wdg9tN0ov2N34/ZzTB7rqVt3n3i7goarurcOHNTfT3rp1S2tulg0bNqC+vl7nmTmLFy/GkiVLIJfLcenSJWRkZKC8vBzBwcEYNmwYli5dqvVwQFOoHz4FND6228PDwyz7JSIiehyYnKRcv34dc+bMaXa9j4+P2Yb1zp49u9nLO8ePH9d6f/PmzQfuy9nZudUfOe3m9t+nijJJISIiMozJl3u8vLxQXFzc7PorV6489ObWtqppklJZWWnBSIiIiGyPyUnKqFGjsGnTJpSXl+usy8nJwebNmzFu3DhTq7FJTZOU5kYBERERkX4mJynLli2DUqlEZGQk3n//fUiShIyMDEyZMgV9+/ZFQEAAFi1aZI5YbQ6TFCIiIuOZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36tFnnTLElTFKIiIiMZ5YJAwICArBlyxZs2bIFRUVFUKlU8Pf3N/uTkG2Ng4MDHBwcUF9fzySFiIjIQCZnEQqFAhUVFZr3/v7+CAwM1CQoFRUVmkmUHkfqsylMUoiIiAxjcpIyZ84c9O/fv9n1AwYMwJ///GdTq7FZTFKIiIiMY3KScvjwYZ0J05p68cUX8c9//tPUamyWem4UTodNRERkGJOTlNu3bz/weRzBwcEWfaiYpXl7ewMAysrKLBwJERGRbTE5SfH19cW1a9eaXf/TTz891jOtMkkhIiIyjslJyogRI5CWloYLFy7orMvOzsamTZswcuRIU6uxWT4+PgAaH1N/7949C0dDRERkO0wegrx06VIcPnwYMTExGDduHLp37w4AuHz5Mg4ePIiAgAAsXbrU5EBtlfpMCtB4NsXZ2dmC0RAREdkOk5OU4OBgnDt3DgsWLMCBAwewb98+AI03jL788stYvnw5goODTQ7UVjVNUkpLSx/rviAiIjKEWSZza9euHTIyMiCEQFFREYDG+VIkSTLH7m2a+nIPALM9DZqIiOhxYJYkRU2SJAQEBJhzlzbP29sb9vb2aGhoQGFhoaXDISIishlmTVKqqqpQVlYGIYTOuo4dO5qzKpshk8kQEBCA/Px83Llzx9LhEBER2QyTk5Ta2lp88MEH2Lp16wMvZyiVSlOrsllBQUFMUoiIiAxkcpIya9YsZGRkYPz48YiLi9O6UZQaBQUFAQAKCwshhOC9OkRERC1gcpLy1VdfYcaMGUhLSzNHPG1SYGAgAKCmpgaVlZWP9eR2RERELWXyZG6SJCEqKsocsbRZTR8bcOvWLQtGQkREZDtMTlISExORmZlpjljarNDQUM33N2/etFgcREREtsTkJGXhwoX45ZdfkJKSgvPnz6OoqAilpaU6r8eZi4uL5pJPbm6uhaMhIiKyDSbfk9KlSxcAwIULF7B169Zmyz3Oo3uAxrMphYWFTFKIiIhayOQkZdGiRRyt0gKdO3fGmTNnUFBQgPLycnh5eVk6JCIiIqtmcpKyZMkSM4TR9qkfvAgAOTk5GDBggAWjISIisn4m35OiVldXh6ysLBw4cADFxcXm2m2bERoaCldXVwCNSQoRERE9mFmmxV+7di2WLFmCu3fvAgC++eYbDBkyBMXFxejatStWrlyJadOmmaMqq+Hi4gIhBBQKhd71kiTprIuIiMC5c+fw73//G/X19ZDJzJYj6rCGe4D09cGjZA19ALAf1NgP0PvIECJqnsl/JdPT0zF37lyMGDECW7du1foh9PPzw5AhQ7B7925Tq7Ea+fn5yMnJMeqXrXo+mdLSUly/ft3coREREbUpJp9J+fjjj5GYmIjPP/9c77N7oqOjsXbtWlOrsRrt27eHm5sbTpw4AUmSYGenvwuFEDrrYmNjsW3bNigUCpw5cwbdunVr9Xibi+9R0NcHlmDpGNgPjdgP4CADIgOZfCbl+vXrGDlyZLPrfXx8HvjgwceJi4sLevfuDQA4efIkamtrLRwRERGR9TI5SfHy8nrgjbJXrlzRPGCPgOeeew5A43N8jh8/btlgiIiIrJjJScqoUaOwadMmlJeX66zLycnB5s2bMW7cOFOraTN69OiBDh06AAD+8Y9/oK6uzsIRERERWSeTk5Rly5ZBqVQiMjIS77//PiRJQkZGBqZMmYK+ffsiICAAixYtMkesbYIkSUhMTAQAlJSU4NChQxaOiIiIyDqZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36NPz8/MwRa5vRv39/zeME9u3bh7y8PAtHREREZH3MMlFHQEAAtmzZgtLSUhQWFqKgoABlZWXYtm0bAgICzFFFmyJJEqZNmwa5XA6FQoG1a9eiqqrK0mERERFZFbPPJubv74/AwMBWnaisLQgNDUVSUhIA4Pbt2/joo4842oeIiKgJZhIWNHr0aDz77LMAgGvXruFvf/ub3huQiYiIHkcmJykymQxyufyhL9IlSRJmzpyJmJgYAEBubi7mz5+Pc+fOWTgyIiIiyzN56sVFixbpzKKoVCpx8+ZN7N+/H+Hh4RgzZoyp1bRZdnZ2ePvtt/Hpp5/i66+/RkVFBT7++GP07t0bSUlJCAkJsXSIREREFmFykrJkyZJm1xUUFOCZZ57BU089ZWo1bZpMJsOrr76KiIgIbNmyBRUVFbh48SIuXryIyMhIDBs2DL1794a9vb2lQyUiInpkWvUhFu3atcMbb7yBpUuXYtKkSa1ZVZvw9NNPIyIiAvv27cORI0egUChw+fJlXL58Gc7OzoiOjkbv3r3RrVs3eHt762yvUqlw+vRpnDx5ElVVVfD29sbIkSPRtWtXC7SGiIjINK3+pC1XV1fk5ua2djVthpubG1555RWMGjUKmZmZyMzMRFVVFe7du4eTJ0/i5MmTABrnp+ncuTNCQ0MREhKCoqIifPzhUhTc/Bm9/QTc7FW4fk+GjA1r0DtmAJalrsITTzxh4dYRERG1XKsmKZcvX8batWt5uccIvr6+mDhxIl544QX8+OOPOHPmDM6fP4/q6moAjcOWb9++jRMnTqC4uBg/XTiNsWECb8a4wsfNEXZ2DrCzs8PvNQI7Lx7DyxOex47dX/AeFyIishkmJylhYWF6Hz9eXl6Ou3fvwsXFBfv37ze1mseWvb09oqKiEBUVBZVKhby8PFy5cgVXr17FzZs3UVhYiJ9/vIDxnVQYH+4IoAFVVQ2a7WUApjwp8L/ncjAiYTAGxg+Fu7s7XF1d4erqChcXF7i6usLNzQ0uLi5wcXGBk5MTHB0dH/jiiC0iImptJicpgwYN0klSJEmCt7c3OnfujKSkJPj4+JhaDaHxBtuwsDCEhYVh9OjRAIAvv/wSuRe+w5TYdpALJRoaGqBQKNCgUECoVAAAuUzC8+H2WPR9IQoLC80yu62dnR3s7e1hZ2en8736pVQq4erqqnedTCbTDF9Xf3//e/X3LSkjk8kgSZLWS6VSQZIkzQ3H+sqoJx1s+r2+cveXb66MWtP31dXVqKio0ClzfzljljX92twyIYQhHy0RkdUwOUnZvn27GcIgY2V9fwKDOtohwMdLa7kQAiqVCkqlEkqlEgEBSnS/UYKgoCBERUWhurpa52XIjLcKhQIKhcLczaFH5GGJjb5l6vf6zpy2pC4hRLPbGrrPltZprnLmqtfX1xcDBw40a51EbZnBScqtW7eMqqhjx45GbUcPdre0GJ1c9H+M6jMNau29axAZGYk33nhDb3mlUomamhrU1dU99FVbWwul8r9nbpq+1MuqqqpgZ2entUyhUECpVGoSKPVXIYTOcp4BaD1N+5b9/Og4OztbOgQim2JwkhIaGmrUfx9KpdLgbejhPLx8UPZby/q2vA5wd3dvdr1cLoe7u/sDyxgiPz8f7du3N3p7lUql87o/kWn6vXob9VeFQgEhBGQyGYQQWq+m5e5f11yZlpYH/vuHXwiB8vJyeHl5aS3TV+7+ZU3f61vW0n2pY1ZfqjI0DlMSmqblKysr9R5b5k6Smtuf+vO5vx9au96m7OxafUAlUZti8E9Menp6a8RBRhqcMAyp3+zDawoVHO2af8rBzdJa5FbIEB8f/+iCM5H6XhNjqS9HWfoPg6nJmqnYD42soR/u3r2LEydOWKx+Iltj8E9rnz59EBISAk9Pz9aIhwz03HPP4ePlQfjqUhEmRfnpLaMSAp9ll+Hpgc8hLCzsEUdIRERkHIP/Te3Tpw8OHTqkeT9kyBAcPXrUrEFRyzk6OuJvK1bjq1/s8Om5IlTVaV/6KapqwKpjv+MXEYz3F//NQlESEREZzuAzKc7OzqipqdG8P378OGbMmGHWoMgwcXFxWLspA0sXLsCh/TcRHaCCuwPwe42Ef5fI8FSPaGxbtYY3LxMRkU0xOEnp1asXVq9eDblcrrnkc/bsWTg5OT1wuxdeeMG4CKlFBgwYgEPfHMd3332HEydOoLqyAuG+fpg7ciR69uxp9qGWRERErc3gJOWTTz7Biy++iOnTpwNonBfgk08+wSeffNLsNpIkcXTPIyCXyzF48GAMHjzYKm4SJCIiMoXBf8H69u2L69ev48aNGygsLER8fDzee+89JCQktEZ8j8T69euxatUq3LlzB7169cK6desQExNj0D7OnDmD5cv/B2fPfIu62lpACDTIneHu7Y2OISGYMCYREyZMQIcOHVqpFURERG2LUf9m29nZITw8HOHh4UhOTsaYMWMQGxtr7tgeiT179uCdd97Bxo0bERsbi7///e8YPnw4rl27hoCAgBbtY968eUjf/Am6hciQMhzwcVGiWmmH73+uxw//+R2Xql3w69dfYENGOlYs/oCXvoiIiFrA5GsBtj5vyurVqzFz5ky89tprAICNGzfi0KFD2LZtGxYsWPDQ7VNTU7Fj69/xp5fcEdtVDkVtBSQfH8DeHtHPADd/q8eKz26hNCQIzv26450l78Pd3R3PPfdcazeNiIjIphk/U5YeVVVV+PXXX3Hr1i2dlzWqr6/H+fPntS5VyWQyJCQkICsr66HbKxQK/O/aVZic4IQBUd5Q1NwFvNyB/3ugHQCEPuGA2S84Qjp9Dg5hwRBxkVi++iOt2UmJiIhIl8lJSm1tLf76178iICAAnp6eCA0N1Typt+nLGhUXF0OpVCIwMFBreWBgIO7cufPQ7Xfs2AFJWYkh/fygvFcF2MkhOTrolOv2pCM6+atQcOwHeA/og+u3f8Xp06fN1g4iIqK2yOTLPbNmzUJGRgbGjx+PuLg4eHt7myMum5CVlYWIjnbwcHNAfVkp4KSboACNo5ue6SrD9Zu3YOfmDKmDP3788Uf079//EUdMRERkO0xOUr766ivMmDEDaWlp5ojnkfLz84NcLkdhYaHW8sLCQgQFBWkty8/Px+3btwEADQ0NcHJyQnV1Nezk//cAOAjgAXOR2MkBoVCgoaEeKgBFRUXIz883e5vU1A87s+T8KLW1ta3axoexhj4A2A9q7IfG3x1E1HImJymSJCEqKsocsTxyDg4OiI6OxtGjRzF+/HgAjU9KPXr0KGbPnq1Vtn379pqHo6kfEtalSxccycmEUikgk9tD1VDbbF03bqsAX1/YyeSQiu6ie/furfqwNWuYJ4UPlGvEfmjEfmj83UFELWfyPSmJiYnIzMw0RywW8c4772Dz5s3IyMjATz/9hDfffBPV1dWa0T4PMmvWLJRUyXEhpwxyZzegrgHQM2ldSbkCP/wH8Ivri8rLN+AJO47uISIiegiT/6VYuHAhXnrpJaSkpOD1119Hx44dIZfLdcr5+PiYWlWrmDhxIoqKirBo0SLcuXMHvXv3xuHDh3VuptUnMDAQMf2GIP0fRxD6hAs87JygKq+E5OOpufRTVy+wZV817rVrjwA/H5Rt2o+ZL02Bm5tbazeNiIjIpklCfaHWSDLZf0/GPOhab1uaFl99uScuLg729vbo3y8WteU/Y/xAR8Q8qYCrC9Dg5ITsGwIHv2/AtTovuPfrC9m/czEmZiDWrvk77JsMU24N1nBqm6f3G7EfGrEftH93qJ99RkTNM/mnddGiRRa/Ic+SXFxccPrMWfzpT3/Cpwf+H3b86x6c7AVq62tRrZCh3sEd3r5OeOq3e5j2x3cxefJkvWeaiIiISJvJScqSJUvMEIZtc3JywoYNG7Bu3Trs378fBQUFuHfvHqKjo2Fvbw8vLy9ERkZqnXUiIiKiBzMqScnOzjZ4G1sdAfQglZWVOsvUN8SWlZVpzRmjr2xrspbhlpYczWANfQCwH9TYD4/+9wCRrTMqSenbt2+Lf9CFEJAkqU3dk+Lg4AC5XI6LFy9aOhQisjGSJMHBQf/Ej0SkzagkxdYfKmgqZ2dnxMfHo76+vtkyOTk56N69+yOMyvpisHT9jIExWGMMP/30E5ydnS0aA5GtMCpJSU5ONnccNsfZ2fmBv2js7e0tfvd+SEiIRWNgHzRiPzRiPzTijfNELcc7OdswSw73tBbsg0bsh0bsByLbwiSllQQHB1s6BItjHzRiPzRiPzRiPxC1nMmTuRERERG1Bp5JISIiIqvEJIWIiIisEpMUM1u/fj1CQ0Ph5OSE2NhY/PDDD5YOyWwMadvmzZsRFxcHb29veHt7IyEhQaf81KlTIUmS1mvEiBGt3QyTGdIP27dv12mjk5OTVhkhBBYtWoR27drB2dkZCQkJ+M9//tPazTCZIf0QHx+v0w+SJGH06NGaMrZ6PLTEd999h7FjxyI4OBiSJGH//v2WDonIJjBJMaM9e/bgnXfeweLFi5GdnY1evXph+PDh+P333y0dmskMbdvx48cxadIkHDt2DFlZWejQoQOGDRuG/Px8rXIjRoxAQUGB5rVr165H0RyjGfMZe3h4aLUxLy9Pa/3KlSuxdu1abNy4EWfOnIGrqyuGDx+O2tra1m6O0Qzth6+++kqrDy5fvgy5XI4JEyZolbO146Glqqur0atXL6xfv97SoRDZFkFmExMTI9566y3Ne6VSKYKDg8WHH35owajMw9S2KRQK4e7uLjIyMjTLkpOTRWJiotljbU2G9kN6errw9PRsdn8qlUoEBQWJVatWaZaVl5cLR0dHsWvXLvMFbmamHg9r1qwR7u7uoqqqSrPMFo8HYwAQ+/bts3QYRDaBZ1LMpL6+HufPn0dCQoJmmUwmQ0JCArKysiwYmenM0baamho0NDTAx8dHa/nx48cREBCA8PBwvPnmmygpKTFr7OZkbD9UVVUhJCQEHTp0QGJiInJycjTrcnNzcefOHa19enp6IjY21mqPG3McD1u3bkVSUhJcXV21ltvS8UBErY9JipkUFxdDqVQiMDBQa3lgYCDu3LljoajMwxxtmz9/PoKDg7X+sI0YMQI7duzA0aNHsWLFCnz77bcYOXKk1T7nyZh+CA8Px7Zt23DgwAF8+umnUKlU6N+/P3777TcA0GxnS8eNqcfDDz/8gMuXL2PGjBlay23teCCi1mfUtPhEhkhNTcXu3btx/PhxrZtGk5KSNN/36NEDPXv2ROfOnXH8+HEMHTrUEqGaXb9+/dCvXz/N+/79+yMiIgJpaWlYunSpBSOznK1bt6JHjx6IiYnRWv44HA9EZBieSTETPz8/yOVyFBYWai0vLCxEUFCQhaIyD1Pa9tFHHyE1NRVHjhxBz549H1i2U6dO8PPzw/Xr102OuTWY4zO2t7dHnz59NG1Ub2dLx40p/VBdXY3du3dj+vTpD63H2o8HImp9TFLMxMHBAdHR0Th69KhmmUqlwtGjR7X+k7ZFxrZt5cqVWLp0KQ4fPoy+ffs+tJ7ffvsNJSUlaNeunVniNjdzfMZKpRI//vijpo1hYWEICgrS2mdFRQXOnDljtceNKf2wd+9e1NXVYcqUKQ+tx9qPByJ6BCx9525bsnv3buHo6Ci2b98urly5IlJSUoSXl5e4c+eOpUMz2cPa9sorr4gFCxZoyqempgoHBwfxxRdfiIKCAs2rsrJSCCFEZWWlePfdd0VWVpbIzc0VmZmZIioqSnTp0kXU1tZapI0tYWg/fPDBB+Jf//qXuHHjhjh//rxISkoSTk5OIicnR1MmNTVVeHl5iQMHDohLly6JxMREERYWJu7du/fI29dShvaD2sCBA8XEiRN1ltvq8dBSlZWV4sKFC+LChQsCgFi9erW4cOGCyMvLs3RoRFaNSYqZrVu3TnTs2FE4ODiImJgYcfr0aUuHZDYPatugQYNEcnKy5n1ISIgAoPNavHixEEKImpoaMWzYMOHv7y/s7e1FSEiImDlzpk0kdIb0w9y5czVlAwMDxahRo0R2drbW/lQqlVi4cKEIDAwUjo6OYujQoeLatWuPqjlGM6QfhBDi6tWrAoA4cuSIzr5s+XhoiWPHjun9ebi/j4hIGx8wSERERFaJ96QQERGRVWKSQkRERFaJSQoRERFZJSYpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUlKG3P27Fn0798frq6ukCQJFy9exPbt2yFJEm7evGnp8IiMsnLlSnTt2hUqlcqo7Tdu3IiOHTuirq7OzJERUWtikmJl1AnFuXPnDN62oaEBEyZMQGlpKdasWYOdO3ciJCSkFaIkADh16hSWLFmC8vJyxmEAQ+OtqKjAihUrMH/+fMhkxv3Kmjp1Kurr65GWlmbU9kRkGUxS2pAbN24gLy8P7777LlJSUjBlyhR4e3tbOqw269SpU/jggw8snhxYSxwtZWi827Ztg0KhwKRJk4yu08nJCcnJyVi9ejX4TFUi28EkpQ35/fffAQBeXl4WjoT0qa6utnQINik9PR3jxo2Dk5OTSft56aWXkJeXh2PHjpkpMiJqbUxSrNySJUsgSRKuX7+OqVOnwsvLC56ennjttddQU1OjKTd16lQMGjQIADBhwgRIkoT4+Hi9+5w6dSpCQ0Obret++fn5mDZtGgIDA+Ho6Iju3btj27ZtRsXZdJ/Tp09HcHAwHB0dERYWhjfffBP19fUG1ducCxcuYOTIkfDw8ICbmxuGDh2K06dPN9vmlsbddLu//OUvAICwsDBIkqR13496v1euXMHkyZPh7e2NgQMHGtS2vLw8zJo1C+Hh4XB2doavry8mTJigdW9RS+P4+eefMWXKFHh6esLf3x8LFy6EEAK//vorEhMT4eHhgaCgIHz88cc6bTXn5/+weO+Xm5uLS5cuISEhQe/6EydO4JlnnoGzszPCwsKwfv16AMD48ePx8ssva5WNjo6Gj48PDhw4oHdfRGR97CwdALXMSy+9hLCwMHz44YfIzs7Gli1bEBAQgBUrVgAAXn907/MmAAAIVklEQVT9dbRv3x7Lly/HnDlz8PTTTyMwMNDkegsLC/HMM89AkiTMnj0b/v7++PrrrzF9+nRUVFRg7ty5BsUJALdv30ZMTAzKy8uRkpKCrl27Ij8/H1988QVqamrg4OBgcL1N5eTkIC4uDh4eHpg3bx7s7e2RlpaG+Ph4fPvtt4iNjTW4f+/3wgsv4Oeff8auXbuwZs0a+Pn5AQD8/f21yk2YMAFdunTB8uXLNZcZWtq2s2fP4tSpU0hKSsITTzyBmzdvYsOGDYiPj8eVK1fg4uLS4jgmTpyIiIgIpKam4tChQ1i2bBl8fHyQlpaGIUOGYMWKFfjss8/w7rvv4umnn8azzz7bKp9/S+NVO3XqFAAgKipK77qEhAT06NEDq1atwqlTpzB79my0a9cOR44c0ZvQRkVF4fvvv9dbFxFZIUFWJT09XQAQZ8+eFUIIsXjxYgFATJs2Tavc888/L3x9fbWWHTt2TAAQe/fu1bvP3NxcIYQQycnJIiQkRKdudV1NTZ8+XbRr104UFxdrLU9KShKenp6ipqbG4DhfffVVIZPJNG1sSqVSGVSvPuPHjxcODg7ixo0bmmW3b98W7u7u4tlnn9Xb5pbEfb9Vq1Zp9au+/U6aNElnXUvbpq+NWVlZAoDYsWOHQXGkpKRolikUCvHEE08ISZJEamqqZnlZWZlwdnYWycnJBsdqSD8+KN77vf/++wKAqKys1Fk3ZMgQ4ebmJkpLS4UQjcdO7969RVBQkLCzsxNlZWU626SkpAhnZ+eH1ktE1oGXe2zEG2+8ofU+Li4OJSUlqKioaLU6hRD48ssvMXbsWAghUFxcrHkNHz4cd+/eRXZ2tkFxqlQq7N+/H2PHjkXfvn116pQkyah61ZRKJY4cOYLx48ejU6dOmuXt2rXD5MmTcfLkSb191lr9e/9+DWmbs7OzZruGhgaUlJTgySefhJeXV7Ptb86MGTM038vlcvTt2xdCCEyfPl2z3MvLC+Hh4fjll18MjrW59prajyUlJbCzs4Obm5vW8oaGBpw8eRJjxozR3BwuSRLGjBmDO3fuIC4uTu+9Wd7e3rh3794DL+URkfXg5R4b0bFjR6336l/MZWVl8PDwaJU6i4qKUF5ejk2bNmHTpk16y6hv1m1pnEVFRaioqEBkZKRZ6226bU1NDcLDw3XWRUREQKVS4ddff0X37t0NittYYWFhOvG1tG337t3Dhx9+iPT0dOTn52uNSrl7965BcdzfPk9PTzg5OWkutzRdXlJSYnCszdXTWsdpcXEx6uvr8dRTT2kt79OnDwBg7NixerdT96G+e6+IyPowSbERcrlc73JhxHDK5n5BK5VKrffqibOmTJmC5ORkvdv07NlT67054jSmXlOZs3+bano2BDCsbX/84x+Rnp6OuXPnol+/fvD09IQkSUhKSjJ4UjN97XtYmy31+Tfl6+sLhUKByspKuLu7a5arR/rcfyyrz56o76m5X1lZGVxcXHQ+FyKyTkxSHkPe3t5656jIy8vTeu/v7w93d3colcpmR1cYyt/fHx4eHrh8+fIDyxhbr7+/P1xcXHDt2jWddVevXoVMJkOHDh0MjlsfY/4bN6RtX3zxBZKTk7VG3NTW1up8dq11VqA1Pn/AsHi7du0KoHGUT9OEyNvbG66urrh165ZW+YMHDwJoHJEUHR2ts7/c3FxEREQYEzYRWQDvSXkMde7cGXfv3sWlS5c0ywoKCrBv3z6tcnK5HH/4wx/w5Zdf6k0qioqKDK5bJpNh/PjxOHjwoN5ZdYUQJtUrl8sxbNgwHDhwQGtYa2FhIT7//HMMHDjQbJcdXF1dAcCgSdQMaZtcLtc5A7Fu3TqdM17GxGHuWA1hSLz9+vUDAL3HyqBBg7B//37N/S7V1dX47LPPAABnzpzRu7/s7Gz079/fqLiJ6NHjmZTHUFJSEubPn4/nn38ec+bMQU1NDTZs2ICnnnpK50bI1NRUHDt2DLGxsZg5cya6deuG0tJSZGdnIzMzE6WlpQbXv3z5chw5cgSDBg1CSkoKIiIiUFBQgL179+LkyZPw8vIyqd5ly5bhm2++wcCBAzFr1izY2dkhLS0NdXV1WLlypcHxNkf9n/p7772HpKQk2NvbY+zYsZo/ws1padvGjBmDnTt3wtPTE926dUNWVhYyMzPh6+trljhaojU+f0Pi7dSpEyIjI5GZmYlp06ZprZs3bx4GDx6M+Ph4TJs2DQcOHEBFRQVGjx6NDRs2IDQ0FJMnT9bs9/z58ygtLUViYqIRPUFEFvGohxPRgzU3BLmoqEhvuabDOFs6BFkIIY4cOSIiIyOFg4ODCA8PF59++qneIchCCFFYWCjeeust0aFDB2Fvby+CgoLE0KFDxaZNmzRlDIlTCCHy8vLEq6++Kvz9/YWjo6Po1KmTeOutt0RdXZ1B9TYnOztbDB8+XLi5uQkXFxcxePBgcerUKZ1yhsZ9v6VLl4r27dsLmUymVb65/RrStrKyMvHaa68JPz8/4ebmJoYPHy6uXr0qQkJCtIYJGxNHcnKycHV11Ylr0KBBonv37gbHamg/NhevPqtXrxZubm56h2Tv2rVLdOvWTRPX3r17xe3bt8Wzzz4rJEnS2u/8+fNFx44dNcPcicj6SULwQRZEZL3u3r2LTp06YeXKlVpDpg1RV1eH0NBQLFiwAG+//baZIySi1sJ7UojIqnl6emLevHlYtWqVwaOa1NLT02Fvb68zjwsRWTeeSSEiIiKrxDMpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUkKERERWSUmKURERGSVmKQQERGRVWKSQkRERFbp/wMeP0o3HlQD8wAAAABJRU5ErkJggg==\n",
970 |             "text/plain": [
971 |               "<Figure size 350x350 with 1 Axes>"
972 |             ]
973 |           },
974 |           "metadata": {}
975 |         },
976 |         {
977 |           "output_type": "execute_result",
978 |           "data": {
979 |             "text/plain": [
980 |               "<ggplot: (8738951607405)>"
981 |             ]
982 |           },
983 |           "metadata": {},
984 |           "execution_count": 241
985 |         }
986 |       ]
987 |     }
988 |   ]
989 | }


--------------------------------------------------------------------------------
/data/ditella-crime-2004/CrimebyBlock.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/CrimebyBlock.dta


--------------------------------------------------------------------------------
/data/ditella-crime-2004/MonthlyPanel.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/MonthlyPanel.dta


--------------------------------------------------------------------------------
/data/ditella-crime-2004/README:
--------------------------------------------------------------------------------
 1 | Data from
 2 | 
 3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack
 4 | Rafael Di Tella
 5 | Ernesto Schargrodsky
 6 | AMERICAN ECONOMIC REVIEW
 7 | VOL. 94, NO. 1, MARCH 2004
 8 | https://www.aeaweb.org/articles?id=10.1257/000282804322970733
 9 | 
10 | Also includes a notebook to pre-process the data for the tutorial.
11 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice.


--------------------------------------------------------------------------------
/data/ditella-crime-2004/README~:
--------------------------------------------------------------------------------
 1 | Data from
 2 | 
 3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack
 4 | Rafael Di Tella
 5 | Ernesto Schargrodsky
 6 | AMERICAN ECONOMIC REVIEW
 7 | VOL. 94, NO. 1, MARCH 2004
 8 | 
 9 | Also includes a notebook to pre-process the data for the tutorial.
10 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice.


--------------------------------------------------------------------------------
/data/ditella-crime-2004/WeeklyPanel.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/WeeklyPanel.dta


--------------------------------------------------------------------------------
/data/ditella-crime-2004/data_cleaning.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "id": "35cf30ec",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# Data cleaning for demo of difference-in-differences estimation w/ machine learning methods\n",
 11 |     "# data from \"Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack\" Rafael Di Tella \n",
 12 |     "# https://www.aeaweb.org/articles?id=10.1257/000282804322970733"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 57,
 18 |    "id": "efebdb5e",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pandas as pd\n",
 23 |     "import numpy as np\n",
 24 |     "import pyreadstat    "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 66,
 30 |    "id": "59563136",
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stderr",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "<ipython-input-66-ff31ce430198>:1: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n",
 38 |       "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
 39 |       "  cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n",
 40 |       "<ipython-input-66-ff31ce430198>:2: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n",
 41 |       "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
 42 |       "  panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n",
 48 |     "panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 67,
 54 |    "id": "0fa52bad",
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "panel = panel.merge(cbb[['observ','educjefe','ocupado']], on='observ') # education of head of household, unemployment rate"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 68,
 64 |    "id": "6c908a03",
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "data": {
 69 |       "text/html": [
 70 |        "<div>\n",
 71 |        "<style scoped>\n",
 72 |        "    .dataframe tbody tr th:only-of-type {\n",
 73 |        "        vertical-align: middle;\n",
 74 |        "    }\n",
 75 |        "\n",
 76 |        "    .dataframe tbody tr th {\n",
 77 |        "        vertical-align: top;\n",
 78 |        "    }\n",
 79 |        "\n",
 80 |        "    .dataframe thead th {\n",
 81 |        "        text-align: right;\n",
 82 |        "    }\n",
 83 |        "</style>\n",
 84 |        "<table border=\"1\" class=\"dataframe\">\n",
 85 |        "  <thead>\n",
 86 |        "    <tr style=\"text-align: right;\">\n",
 87 |        "      <th></th>\n",
 88 |        "      <th>observ</th>\n",
 89 |        "      <th>barrio</th>\n",
 90 |        "      <th>calle</th>\n",
 91 |        "      <th>altura</th>\n",
 92 |        "      <th>institu1</th>\n",
 93 |        "      <th>institu3</th>\n",
 94 |        "      <th>distanci</th>\n",
 95 |        "      <th>edpub</th>\n",
 96 |        "      <th>estserv</th>\n",
 97 |        "      <th>banco</th>\n",
 98 |        "      <th>totrob</th>\n",
 99 |        "      <th>mes</th>\n",
100 |        "      <th>educjefe</th>\n",
101 |        "      <th>ocupado</th>\n",
102 |        "    </tr>\n",
103 |        "  </thead>\n",
104 |        "  <tbody>\n",
105 |        "    <tr>\n",
106 |        "      <th>0</th>\n",
107 |        "      <td>870.0</td>\n",
108 |        "      <td>Once</td>\n",
109 |        "      <td>Cordoba</td>\n",
110 |        "      <td>a2300</td>\n",
111 |        "      <td>0.0</td>\n",
112 |        "      <td>1.0</td>\n",
113 |        "      <td>1.0</td>\n",
114 |        "      <td>1.0</td>\n",
115 |        "      <td>0.0</td>\n",
116 |        "      <td>0.0</td>\n",
117 |        "      <td>0.00</td>\n",
118 |        "      <td>4.0</td>\n",
119 |        "      <td>10.846611</td>\n",
120 |        "      <td>0.949495</td>\n",
121 |        "    </tr>\n",
122 |        "    <tr>\n",
123 |        "      <th>1</th>\n",
124 |        "      <td>870.0</td>\n",
125 |        "      <td>Once</td>\n",
126 |        "      <td>Cordoba</td>\n",
127 |        "      <td>a2300</td>\n",
128 |        "      <td>0.0</td>\n",
129 |        "      <td>1.0</td>\n",
130 |        "      <td>1.0</td>\n",
131 |        "      <td>1.0</td>\n",
132 |        "      <td>0.0</td>\n",
133 |        "      <td>0.0</td>\n",
134 |        "      <td>0.00</td>\n",
135 |        "      <td>5.0</td>\n",
136 |        "      <td>10.846611</td>\n",
137 |        "      <td>0.949495</td>\n",
138 |        "    </tr>\n",
139 |        "    <tr>\n",
140 |        "      <th>2</th>\n",
141 |        "      <td>870.0</td>\n",
142 |        "      <td>Once</td>\n",
143 |        "      <td>Cordoba</td>\n",
144 |        "      <td>a2300</td>\n",
145 |        "      <td>0.0</td>\n",
146 |        "      <td>1.0</td>\n",
147 |        "      <td>1.0</td>\n",
148 |        "      <td>1.0</td>\n",
149 |        "      <td>0.0</td>\n",
150 |        "      <td>0.0</td>\n",
151 |        "      <td>0.00</td>\n",
152 |        "      <td>6.0</td>\n",
153 |        "      <td>10.846611</td>\n",
154 |        "      <td>0.949495</td>\n",
155 |        "    </tr>\n",
156 |        "    <tr>\n",
157 |        "      <th>3</th>\n",
158 |        "      <td>870.0</td>\n",
159 |        "      <td>Once</td>\n",
160 |        "      <td>Cordoba</td>\n",
161 |        "      <td>a2300</td>\n",
162 |        "      <td>0.0</td>\n",
163 |        "      <td>1.0</td>\n",
164 |        "      <td>1.0</td>\n",
165 |        "      <td>1.0</td>\n",
166 |        "      <td>0.0</td>\n",
167 |        "      <td>0.0</td>\n",
168 |        "      <td>0.00</td>\n",
169 |        "      <td>7.0</td>\n",
170 |        "      <td>10.846611</td>\n",
171 |        "      <td>0.949495</td>\n",
172 |        "    </tr>\n",
173 |        "    <tr>\n",
174 |        "      <th>4</th>\n",
175 |        "      <td>870.0</td>\n",
176 |        "      <td>Once</td>\n",
177 |        "      <td>Cordoba</td>\n",
178 |        "      <td>a2300</td>\n",
179 |        "      <td>0.0</td>\n",
180 |        "      <td>1.0</td>\n",
181 |        "      <td>1.0</td>\n",
182 |        "      <td>1.0</td>\n",
183 |        "      <td>0.0</td>\n",
184 |        "      <td>0.0</td>\n",
185 |        "      <td>0.00</td>\n",
186 |        "      <td>8.0</td>\n",
187 |        "      <td>10.846611</td>\n",
188 |        "      <td>0.949495</td>\n",
189 |        "    </tr>\n",
190 |        "    <tr>\n",
191 |        "      <th>...</th>\n",
192 |        "      <td>...</td>\n",
193 |        "      <td>...</td>\n",
194 |        "      <td>...</td>\n",
195 |        "      <td>...</td>\n",
196 |        "      <td>...</td>\n",
197 |        "      <td>...</td>\n",
198 |        "      <td>...</td>\n",
199 |        "      <td>...</td>\n",
200 |        "      <td>...</td>\n",
201 |        "      <td>...</td>\n",
202 |        "      <td>...</td>\n",
203 |        "      <td>...</td>\n",
204 |        "      <td>...</td>\n",
205 |        "      <td>...</td>\n",
206 |        "    </tr>\n",
207 |        "    <tr>\n",
208 |        "      <th>9631</th>\n",
209 |        "      <td>140.0</td>\n",
210 |        "      <td>Belgrano</td>\n",
211 |        "      <td>Virrey del Pino</td>\n",
212 |        "      <td>a1600</td>\n",
213 |        "      <td>0.0</td>\n",
214 |        "      <td>0.0</td>\n",
215 |        "      <td>3.0</td>\n",
216 |        "      <td>0.0</td>\n",
217 |        "      <td>0.0</td>\n",
218 |        "      <td>0.0</td>\n",
219 |        "      <td>0.25</td>\n",
220 |        "      <td>10.0</td>\n",
221 |        "      <td>12.771961</td>\n",
222 |        "      <td>0.950423</td>\n",
223 |        "    </tr>\n",
224 |        "    <tr>\n",
225 |        "      <th>9632</th>\n",
226 |        "      <td>140.0</td>\n",
227 |        "      <td>Belgrano</td>\n",
228 |        "      <td>Virrey del Pino</td>\n",
229 |        "      <td>a1600</td>\n",
230 |        "      <td>0.0</td>\n",
231 |        "      <td>0.0</td>\n",
232 |        "      <td>3.0</td>\n",
233 |        "      <td>0.0</td>\n",
234 |        "      <td>0.0</td>\n",
235 |        "      <td>0.0</td>\n",
236 |        "      <td>0.00</td>\n",
237 |        "      <td>11.0</td>\n",
238 |        "      <td>12.771961</td>\n",
239 |        "      <td>0.950423</td>\n",
240 |        "    </tr>\n",
241 |        "    <tr>\n",
242 |        "      <th>9633</th>\n",
243 |        "      <td>140.0</td>\n",
244 |        "      <td>Belgrano</td>\n",
245 |        "      <td>Virrey del Pino</td>\n",
246 |        "      <td>a1600</td>\n",
247 |        "      <td>0.0</td>\n",
248 |        "      <td>0.0</td>\n",
249 |        "      <td>3.0</td>\n",
250 |        "      <td>0.0</td>\n",
251 |        "      <td>0.0</td>\n",
252 |        "      <td>0.0</td>\n",
253 |        "      <td>0.00</td>\n",
254 |        "      <td>12.0</td>\n",
255 |        "      <td>12.771961</td>\n",
256 |        "      <td>0.950423</td>\n",
257 |        "    </tr>\n",
258 |        "    <tr>\n",
259 |        "      <th>9634</th>\n",
260 |        "      <td>140.0</td>\n",
261 |        "      <td>Belgrano</td>\n",
262 |        "      <td>Virrey del Pino</td>\n",
263 |        "      <td>a1600</td>\n",
264 |        "      <td>0.0</td>\n",
265 |        "      <td>0.0</td>\n",
266 |        "      <td>3.0</td>\n",
267 |        "      <td>0.0</td>\n",
268 |        "      <td>0.0</td>\n",
269 |        "      <td>0.0</td>\n",
270 |        "      <td>0.00</td>\n",
271 |        "      <td>72.0</td>\n",
272 |        "      <td>12.771961</td>\n",
273 |        "      <td>0.950423</td>\n",
274 |        "    </tr>\n",
275 |        "    <tr>\n",
276 |        "      <th>9635</th>\n",
277 |        "      <td>140.0</td>\n",
278 |        "      <td>Belgrano</td>\n",
279 |        "      <td>Virrey del Pino</td>\n",
280 |        "      <td>a1600</td>\n",
281 |        "      <td>0.0</td>\n",
282 |        "      <td>0.0</td>\n",
283 |        "      <td>3.0</td>\n",
284 |        "      <td>0.0</td>\n",
285 |        "      <td>0.0</td>\n",
286 |        "      <td>0.0</td>\n",
287 |        "      <td>0.00</td>\n",
288 |        "      <td>73.0</td>\n",
289 |        "      <td>12.771961</td>\n",
290 |        "      <td>0.950423</td>\n",
291 |        "    </tr>\n",
292 |        "  </tbody>\n",
293 |        "</table>\n",
294 |        "<p>9636 rows × 14 columns</p>\n",
295 |        "</div>"
296 |       ],
297 |       "text/plain": [
298 |        "      observ    barrio            calle altura  institu1  institu3  distanci  \\\n",
299 |        "0      870.0      Once          Cordoba  a2300       0.0       1.0       1.0   \n",
300 |        "1      870.0      Once          Cordoba  a2300       0.0       1.0       1.0   \n",
301 |        "2      870.0      Once          Cordoba  a2300       0.0       1.0       1.0   \n",
302 |        "3      870.0      Once          Cordoba  a2300       0.0       1.0       1.0   \n",
303 |        "4      870.0      Once          Cordoba  a2300       0.0       1.0       1.0   \n",
304 |        "...      ...       ...              ...    ...       ...       ...       ...   \n",
305 |        "9631   140.0  Belgrano  Virrey del Pino  a1600       0.0       0.0       3.0   \n",
306 |        "9632   140.0  Belgrano  Virrey del Pino  a1600       0.0       0.0       3.0   \n",
307 |        "9633   140.0  Belgrano  Virrey del Pino  a1600       0.0       0.0       3.0   \n",
308 |        "9634   140.0  Belgrano  Virrey del Pino  a1600       0.0       0.0       3.0   \n",
309 |        "9635   140.0  Belgrano  Virrey del Pino  a1600       0.0       0.0       3.0   \n",
310 |        "\n",
311 |        "      edpub  estserv  banco  totrob   mes   educjefe   ocupado  \n",
312 |        "0       1.0      0.0    0.0    0.00   4.0  10.846611  0.949495  \n",
313 |        "1       1.0      0.0    0.0    0.00   5.0  10.846611  0.949495  \n",
314 |        "2       1.0      0.0    0.0    0.00   6.0  10.846611  0.949495  \n",
315 |        "3       1.0      0.0    0.0    0.00   7.0  10.846611  0.949495  \n",
316 |        "4       1.0      0.0    0.0    0.00   8.0  10.846611  0.949495  \n",
317 |        "...     ...      ...    ...     ...   ...        ...       ...  \n",
318 |        "9631    0.0      0.0    0.0    0.25  10.0  12.771961  0.950423  \n",
319 |        "9632    0.0      0.0    0.0    0.00  11.0  12.771961  0.950423  \n",
320 |        "9633    0.0      0.0    0.0    0.00  12.0  12.771961  0.950423  \n",
321 |        "9634    0.0      0.0    0.0    0.00  72.0  12.771961  0.950423  \n",
322 |        "9635    0.0      0.0    0.0    0.00  73.0  12.771961  0.950423  \n",
323 |        "\n",
324 |        "[9636 rows x 14 columns]"
325 |       ]
326 |      },
327 |      "execution_count": 68,
328 |      "metadata": {},
329 |      "output_type": "execute_result"
330 |     }
331 |    ],
332 |    "source": [
333 |     "panel"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 69,
339 |    "id": "64777226",
340 |    "metadata": {},
341 |    "outputs": [],
342 |    "source": [
343 |     "panel = panel.drop(columns=['altura','institu3','distanci']) # unsure what these are"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "code",
348 |    "execution_count": 70,
349 |    "id": "64ecee42",
350 |    "metadata": {},
351 |    "outputs": [],
352 |    "source": [
353 |     "english_translation = {'observ': 'block',\n",
354 |     "                      'barrio': 'neighbourhood',\n",
355 |     "                      'calle': 'street',\n",
356 |     "                      'institu1': 'jewish_insitute',\n",
357 |     "                       'edpub': 'public_institution',\n",
358 |     "                       'estserv': 'gas_station',\n",
359 |     "                       'banco': 'bank',\n",
360 |     "                       'totrob': 'car_thefts',\n",
361 |     "                       'mes': 'month',\n",
362 |     "                       'educjefe': 'education',\n",
363 |     "                       'ocupado': 'employment_rate'\n",
364 |     "                      }\n",
365 |     "panel = panel.rename(columns=english_translation)"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "code",
370 |    "execution_count": 71,
371 |    "id": "8e49470e",
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "panel.to_csv(\"DiTella_crime.csv\", index=False)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "id": "98cd4270",
382 |    "metadata": {},
383 |    "outputs": [],
384 |    "source": []
385 |   }
386 |  ],
387 |  "metadata": {
388 |   "kernelspec": {
389 |    "display_name": "Python 3",
390 |    "language": "python",
391 |    "name": "python3"
392 |   },
393 |   "language_info": {
394 |    "codemirror_mode": {
395 |     "name": "ipython",
396 |     "version": 3
397 |    },
398 |    "file_extension": ".py",
399 |    "mimetype": "text/x-python",
400 |    "name": "python",
401 |    "nbconvert_exporter": "python",
402 |    "pygments_lexer": "ipython3",
403 |    "version": "3.8.8"
404 |   }
405 |  },
406 |  "nbformat": 4,
407 |  "nbformat_minor": 5
408 | }
409 | 


--------------------------------------------------------------------------------
/difference_in_differences.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |   "nbformat": 4,
   3 |   "nbformat_minor": 0,
   4 |   "metadata": {
   5 |     "colab": {
   6 |       "name": "difference-in-differences.ipynb",
   7 |       "provenance": [],
   8 |       "collapsed_sections": [],
   9 |       "toc_visible": true,
  10 |       "include_colab_link": true
  11 |     },
  12 |     "kernelspec": {
  13 |       "name": "python3",
  14 |       "display_name": "Python 3"
  15 |     },
  16 |     "language_info": {
  17 |       "name": "python"
  18 |     }
  19 |   },
  20 |   "cells": [
  21 |     {
  22 |       "cell_type": "markdown",
  23 |       "metadata": {
  24 |         "id": "view-in-github",
  25 |         "colab_type": "text"
  26 |       },
  27 |       "source": [
  28 |         "<a href=\"https://colab.research.google.com/github/vveitch/causality-tutorials/blob/main/difference_in_differences.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
  29 |       ]
  30 |     },
  31 |     {
  32 |       "cell_type": "markdown",
  33 |       "metadata": {
  34 |         "id": "QfZkNLUb4B-p"
  35 |       },
  36 |       "source": [
  37 |         "# Difference-in-Differences Estimation Tutorial\n",
  38 |         "\n",
  39 |         "A short example on how to estimate the difference-in-differences ATT with 2 period panel data using using machine learning methods.\n",
  40 |         "\n",
  41 |         "Data from this paper: https://www.aeaweb.org/articles?id=10.1257/000282804322970733\n",
  42 |         "\n",
  43 |         "In brief: following a terrorist attack on a synagogue in Buenos Aires, additional police officers were stationed on blocks containing Jewish institutions. This provides a natural experiment for the effect of policing on deterring crime. The data includes the number of car thefts in many city blocks the months before and after the increase in policing. Comparing the change in thefts for blocks with Jewish institutions (hence, increased police) to the other blocks gives a measurement. However, blocks with Jewish institutions may differ in significant ways---e.g., they may tend to be better educated or located in certain neighbourhoods. We want to use machine learning methods to control for such potential issues. "
  44 |       ]
  45 |     },
  46 |     {
  47 |       "cell_type": "code",
  48 |       "metadata": {
  49 |         "id": "dS2X3Bq1-fxE"
  50 |       },
  51 |       "source": [
  52 |         "import numpy as np\n",
  53 |         "import pandas as pd\n",
  54 |         "import scipy as sp\n",
  55 |         "from sklearn import preprocessing\n",
  56 |         "from sklearn.linear_model import LinearRegression, LogisticRegression\n",
  57 |         "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
  58 |         "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
  59 |         "from sklearn.metrics import mean_squared_error, log_loss\n",
  60 |         "import sklearn\n",
  61 |         "import os"
  62 |       ],
  63 |       "execution_count": 197,
  64 |       "outputs": []
  65 |     },
  66 |     {
  67 |       "cell_type": "code",
  68 |       "metadata": {
  69 |         "id": "nxJ46X9cFJ9X"
  70 |       },
  71 |       "source": [
  72 |         "RANDOM_SEED=42\n",
  73 |         "np.random.seed(RANDOM_SEED)"
  74 |       ],
  75 |       "execution_count": 198,
  76 |       "outputs": []
  77 |     },
  78 |     {
  79 |       "cell_type": "markdown",
  80 |       "metadata": {
  81 |         "id": "yPbJeayiEs3u"
  82 |       },
  83 |       "source": [
  84 |         "##Load and Format Data\n",
  85 |         "\n",
  86 |         "We reformat the data so that there is an \"outcome\" column equal to the difference in car thefts after and before the time period, a \"treatment\" column indictaing the presence of a jewish institute, and \"confounders\" denoting variables that may differ between jewish and non-jewish blocks, and which may also affect the change in crime rate. \n",
  87 |         "\n",
  88 |         "After doing this formatting, the estimation procedure is identical to computing the ATT with a regression adjustment"
  89 |       ]
  90 |     },
  91 |     {
  92 |       "cell_type": "code",
  93 |       "metadata": {
  94 |         "id": "2AC9TPko-hbt"
  95 |       },
  96 |       "source": [
  97 |         "panel = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/ditella-crime-2004/DiTella_crime.csv')\n"
  98 |       ],
  99 |       "execution_count": 199,
 100 |       "outputs": []
 101 |     },
 102 |     {
 103 |       "cell_type": "code",
 104 |       "metadata": {
 105 |         "colab": {
 106 |           "base_uri": "https://localhost:8080/",
 107 |           "height": 203
 108 |         },
 109 |         "id": "-A1LX6-t-hZD",
 110 |         "outputId": "b94e3198-8c72-423d-a01d-7fc26cc26856"
 111 |       },
 112 |       "source": [
 113 |         "panel.head()"
 114 |       ],
 115 |       "execution_count": 200,
 116 |       "outputs": [
 117 |         {
 118 |           "output_type": "execute_result",
 119 |           "data": {
 120 |             "text/html": [
 121 |               "<div>\n",
 122 |               "<style scoped>\n",
 123 |               "    .dataframe tbody tr th:only-of-type {\n",
 124 |               "        vertical-align: middle;\n",
 125 |               "    }\n",
 126 |               "\n",
 127 |               "    .dataframe tbody tr th {\n",
 128 |               "        vertical-align: top;\n",
 129 |               "    }\n",
 130 |               "\n",
 131 |               "    .dataframe thead th {\n",
 132 |               "        text-align: right;\n",
 133 |               "    }\n",
 134 |               "</style>\n",
 135 |               "<table border=\"1\" class=\"dataframe\">\n",
 136 |               "  <thead>\n",
 137 |               "    <tr style=\"text-align: right;\">\n",
 138 |               "      <th></th>\n",
 139 |               "      <th>block</th>\n",
 140 |               "      <th>neighbourhood</th>\n",
 141 |               "      <th>street</th>\n",
 142 |               "      <th>jewish_insitute</th>\n",
 143 |               "      <th>public_institution</th>\n",
 144 |               "      <th>gas_station</th>\n",
 145 |               "      <th>bank</th>\n",
 146 |               "      <th>car_thefts</th>\n",
 147 |               "      <th>month</th>\n",
 148 |               "      <th>education</th>\n",
 149 |               "      <th>employment_rate</th>\n",
 150 |               "    </tr>\n",
 151 |               "  </thead>\n",
 152 |               "  <tbody>\n",
 153 |               "    <tr>\n",
 154 |               "      <th>0</th>\n",
 155 |               "      <td>870.0</td>\n",
 156 |               "      <td>Once</td>\n",
 157 |               "      <td>Cordoba</td>\n",
 158 |               "      <td>0.0</td>\n",
 159 |               "      <td>1.0</td>\n",
 160 |               "      <td>0.0</td>\n",
 161 |               "      <td>0.0</td>\n",
 162 |               "      <td>0.0</td>\n",
 163 |               "      <td>4.0</td>\n",
 164 |               "      <td>10.846611</td>\n",
 165 |               "      <td>0.949495</td>\n",
 166 |               "    </tr>\n",
 167 |               "    <tr>\n",
 168 |               "      <th>1</th>\n",
 169 |               "      <td>870.0</td>\n",
 170 |               "      <td>Once</td>\n",
 171 |               "      <td>Cordoba</td>\n",
 172 |               "      <td>0.0</td>\n",
 173 |               "      <td>1.0</td>\n",
 174 |               "      <td>0.0</td>\n",
 175 |               "      <td>0.0</td>\n",
 176 |               "      <td>0.0</td>\n",
 177 |               "      <td>5.0</td>\n",
 178 |               "      <td>10.846611</td>\n",
 179 |               "      <td>0.949495</td>\n",
 180 |               "    </tr>\n",
 181 |               "    <tr>\n",
 182 |               "      <th>2</th>\n",
 183 |               "      <td>870.0</td>\n",
 184 |               "      <td>Once</td>\n",
 185 |               "      <td>Cordoba</td>\n",
 186 |               "      <td>0.0</td>\n",
 187 |               "      <td>1.0</td>\n",
 188 |               "      <td>0.0</td>\n",
 189 |               "      <td>0.0</td>\n",
 190 |               "      <td>0.0</td>\n",
 191 |               "      <td>6.0</td>\n",
 192 |               "      <td>10.846611</td>\n",
 193 |               "      <td>0.949495</td>\n",
 194 |               "    </tr>\n",
 195 |               "    <tr>\n",
 196 |               "      <th>3</th>\n",
 197 |               "      <td>870.0</td>\n",
 198 |               "      <td>Once</td>\n",
 199 |               "      <td>Cordoba</td>\n",
 200 |               "      <td>0.0</td>\n",
 201 |               "      <td>1.0</td>\n",
 202 |               "      <td>0.0</td>\n",
 203 |               "      <td>0.0</td>\n",
 204 |               "      <td>0.0</td>\n",
 205 |               "      <td>7.0</td>\n",
 206 |               "      <td>10.846611</td>\n",
 207 |               "      <td>0.949495</td>\n",
 208 |               "    </tr>\n",
 209 |               "    <tr>\n",
 210 |               "      <th>4</th>\n",
 211 |               "      <td>870.0</td>\n",
 212 |               "      <td>Once</td>\n",
 213 |               "      <td>Cordoba</td>\n",
 214 |               "      <td>0.0</td>\n",
 215 |               "      <td>1.0</td>\n",
 216 |               "      <td>0.0</td>\n",
 217 |               "      <td>0.0</td>\n",
 218 |               "      <td>0.0</td>\n",
 219 |               "      <td>8.0</td>\n",
 220 |               "      <td>10.846611</td>\n",
 221 |               "      <td>0.949495</td>\n",
 222 |               "    </tr>\n",
 223 |               "  </tbody>\n",
 224 |               "</table>\n",
 225 |               "</div>"
 226 |             ],
 227 |             "text/plain": [
 228 |               "   block neighbourhood   street  ...  month  education  employment_rate\n",
 229 |               "0  870.0          Once  Cordoba  ...    4.0  10.846611         0.949495\n",
 230 |               "1  870.0          Once  Cordoba  ...    5.0  10.846611         0.949495\n",
 231 |               "2  870.0          Once  Cordoba  ...    6.0  10.846611         0.949495\n",
 232 |               "3  870.0          Once  Cordoba  ...    7.0  10.846611         0.949495\n",
 233 |               "4  870.0          Once  Cordoba  ...    8.0  10.846611         0.949495\n",
 234 |               "\n",
 235 |               "[5 rows x 11 columns]"
 236 |             ]
 237 |           },
 238 |           "metadata": {},
 239 |           "execution_count": 200
 240 |         }
 241 |       ]
 242 |     },
 243 |     {
 244 |       "cell_type": "code",
 245 |       "metadata": {
 246 |         "id": "zUpUVabW59bS"
 247 |       },
 248 |       "source": [
 249 |         "# Terrorist attack occurred in July 18, and increased police presence begins July 25. Data before this is before period, and after is after period \n",
 250 |         "first_period = panel['month'].isin([4., 5., 6., 71.])\n",
 251 |         "panel['first_period']=first_period"
 252 |       ],
 253 |       "execution_count": 201,
 254 |       "outputs": []
 255 |     },
 256 |     {
 257 |       "cell_type": "code",
 258 |       "metadata": {
 259 |         "id": "D-8dY5W8Q-PJ"
 260 |       },
 261 |       "source": [
 262 |         "# code neighbourhood as integer for later convenience\n",
 263 |         "panel['neighbourhood']=panel['neighbourhood'].astype('category').cat.codes"
 264 |       ],
 265 |       "execution_count": 202,
 266 |       "outputs": []
 267 |     },
 268 |     {
 269 |       "cell_type": "code",
 270 |       "metadata": {
 271 |         "colab": {
 272 |           "base_uri": "https://localhost:8080/",
 273 |           "height": 447
 274 |         },
 275 |         "id": "ZBR4R-90DmRn",
 276 |         "outputId": "a121111a-1d58-4426-dd0c-354c147453d6"
 277 |       },
 278 |       "source": [
 279 |         "# We need to reduce the multiple before and after months in some fashion\n",
 280 |         "# There is not a clear canonical way to do this, but an average seems reasonable\n",
 281 |         "panel = panel.groupby(['block', 'first_period']).mean()\n",
 282 |         "panel = panel.reset_index(level='first_period')\n",
 283 |         "panel"
 284 |       ],
 285 |       "execution_count": 203,
 286 |       "outputs": [
 287 |         {
 288 |           "output_type": "execute_result",
 289 |           "data": {
 290 |             "text/html": [
 291 |               "<div>\n",
 292 |               "<style scoped>\n",
 293 |               "    .dataframe tbody tr th:only-of-type {\n",
 294 |               "        vertical-align: middle;\n",
 295 |               "    }\n",
 296 |               "\n",
 297 |               "    .dataframe tbody tr th {\n",
 298 |               "        vertical-align: top;\n",
 299 |               "    }\n",
 300 |               "\n",
 301 |               "    .dataframe thead th {\n",
 302 |               "        text-align: right;\n",
 303 |               "    }\n",
 304 |               "</style>\n",
 305 |               "<table border=\"1\" class=\"dataframe\">\n",
 306 |               "  <thead>\n",
 307 |               "    <tr style=\"text-align: right;\">\n",
 308 |               "      <th></th>\n",
 309 |               "      <th>first_period</th>\n",
 310 |               "      <th>neighbourhood</th>\n",
 311 |               "      <th>jewish_insitute</th>\n",
 312 |               "      <th>public_institution</th>\n",
 313 |               "      <th>gas_station</th>\n",
 314 |               "      <th>bank</th>\n",
 315 |               "      <th>car_thefts</th>\n",
 316 |               "      <th>month</th>\n",
 317 |               "      <th>education</th>\n",
 318 |               "      <th>employment_rate</th>\n",
 319 |               "    </tr>\n",
 320 |               "    <tr>\n",
 321 |               "      <th>block</th>\n",
 322 |               "      <th></th>\n",
 323 |               "      <th></th>\n",
 324 |               "      <th></th>\n",
 325 |               "      <th></th>\n",
 326 |               "      <th></th>\n",
 327 |               "      <th></th>\n",
 328 |               "      <th></th>\n",
 329 |               "      <th></th>\n",
 330 |               "      <th></th>\n",
 331 |               "      <th></th>\n",
 332 |               "    </tr>\n",
 333 |               "  </thead>\n",
 334 |               "  <tbody>\n",
 335 |               "    <tr>\n",
 336 |               "      <th>1.0</th>\n",
 337 |               "      <td>False</td>\n",
 338 |               "      <td>0</td>\n",
 339 |               "      <td>0.0</td>\n",
 340 |               "      <td>0.0</td>\n",
 341 |               "      <td>0.0</td>\n",
 342 |               "      <td>0.0</td>\n",
 343 |               "      <td>0.000000</td>\n",
 344 |               "      <td>25.25</td>\n",
 345 |               "      <td>11.919889</td>\n",
 346 |               "      <td>0.926594</td>\n",
 347 |               "    </tr>\n",
 348 |               "    <tr>\n",
 349 |               "      <th>1.0</th>\n",
 350 |               "      <td>True</td>\n",
 351 |               "      <td>0</td>\n",
 352 |               "      <td>0.0</td>\n",
 353 |               "      <td>0.0</td>\n",
 354 |               "      <td>0.0</td>\n",
 355 |               "      <td>0.0</td>\n",
 356 |               "      <td>0.000000</td>\n",
 357 |               "      <td>5.00</td>\n",
 358 |               "      <td>11.919889</td>\n",
 359 |               "      <td>0.926594</td>\n",
 360 |               "    </tr>\n",
 361 |               "    <tr>\n",
 362 |               "      <th>2.0</th>\n",
 363 |               "      <td>False</td>\n",
 364 |               "      <td>0</td>\n",
 365 |               "      <td>0.0</td>\n",
 366 |               "      <td>0.0</td>\n",
 367 |               "      <td>0.0</td>\n",
 368 |               "      <td>0.0</td>\n",
 369 |               "      <td>0.156250</td>\n",
 370 |               "      <td>25.25</td>\n",
 371 |               "      <td>11.919889</td>\n",
 372 |               "      <td>0.926594</td>\n",
 373 |               "    </tr>\n",
 374 |               "    <tr>\n",
 375 |               "      <th>2.0</th>\n",
 376 |               "      <td>True</td>\n",
 377 |               "      <td>0</td>\n",
 378 |               "      <td>0.0</td>\n",
 379 |               "      <td>0.0</td>\n",
 380 |               "      <td>0.0</td>\n",
 381 |               "      <td>0.0</td>\n",
 382 |               "      <td>0.000000</td>\n",
 383 |               "      <td>5.00</td>\n",
 384 |               "      <td>11.919889</td>\n",
 385 |               "      <td>0.926594</td>\n",
 386 |               "    </tr>\n",
 387 |               "    <tr>\n",
 388 |               "      <th>3.0</th>\n",
 389 |               "      <td>False</td>\n",
 390 |               "      <td>0</td>\n",
 391 |               "      <td>0.0</td>\n",
 392 |               "      <td>0.0</td>\n",
 393 |               "      <td>0.0</td>\n",
 394 |               "      <td>0.0</td>\n",
 395 |               "      <td>0.031250</td>\n",
 396 |               "      <td>25.25</td>\n",
 397 |               "      <td>11.919889</td>\n",
 398 |               "      <td>0.926594</td>\n",
 399 |               "    </tr>\n",
 400 |               "    <tr>\n",
 401 |               "      <th>...</th>\n",
 402 |               "      <td>...</td>\n",
 403 |               "      <td>...</td>\n",
 404 |               "      <td>...</td>\n",
 405 |               "      <td>...</td>\n",
 406 |               "      <td>...</td>\n",
 407 |               "      <td>...</td>\n",
 408 |               "      <td>...</td>\n",
 409 |               "      <td>...</td>\n",
 410 |               "      <td>...</td>\n",
 411 |               "      <td>...</td>\n",
 412 |               "    </tr>\n",
 413 |               "    <tr>\n",
 414 |               "      <th>874.0</th>\n",
 415 |               "      <td>True</td>\n",
 416 |               "      <td>1</td>\n",
 417 |               "      <td>0.0</td>\n",
 418 |               "      <td>0.0</td>\n",
 419 |               "      <td>0.0</td>\n",
 420 |               "      <td>0.0</td>\n",
 421 |               "      <td>0.000000</td>\n",
 422 |               "      <td>5.00</td>\n",
 423 |               "      <td>10.898485</td>\n",
 424 |               "      <td>0.939759</td>\n",
 425 |               "    </tr>\n",
 426 |               "    <tr>\n",
 427 |               "      <th>875.0</th>\n",
 428 |               "      <td>False</td>\n",
 429 |               "      <td>1</td>\n",
 430 |               "      <td>0.0</td>\n",
 431 |               "      <td>0.0</td>\n",
 432 |               "      <td>0.0</td>\n",
 433 |               "      <td>0.0</td>\n",
 434 |               "      <td>0.000000</td>\n",
 435 |               "      <td>25.25</td>\n",
 436 |               "      <td>10.898485</td>\n",
 437 |               "      <td>0.939759</td>\n",
 438 |               "    </tr>\n",
 439 |               "    <tr>\n",
 440 |               "      <th>875.0</th>\n",
 441 |               "      <td>True</td>\n",
 442 |               "      <td>1</td>\n",
 443 |               "      <td>0.0</td>\n",
 444 |               "      <td>0.0</td>\n",
 445 |               "      <td>0.0</td>\n",
 446 |               "      <td>0.0</td>\n",
 447 |               "      <td>0.083333</td>\n",
 448 |               "      <td>5.00</td>\n",
 449 |               "      <td>10.898485</td>\n",
 450 |               "      <td>0.939759</td>\n",
 451 |               "    </tr>\n",
 452 |               "    <tr>\n",
 453 |               "      <th>876.0</th>\n",
 454 |               "      <td>False</td>\n",
 455 |               "      <td>1</td>\n",
 456 |               "      <td>0.0</td>\n",
 457 |               "      <td>0.0</td>\n",
 458 |               "      <td>0.0</td>\n",
 459 |               "      <td>0.0</td>\n",
 460 |               "      <td>0.000000</td>\n",
 461 |               "      <td>25.25</td>\n",
 462 |               "      <td>10.898485</td>\n",
 463 |               "      <td>0.939759</td>\n",
 464 |               "    </tr>\n",
 465 |               "    <tr>\n",
 466 |               "      <th>876.0</th>\n",
 467 |               "      <td>True</td>\n",
 468 |               "      <td>1</td>\n",
 469 |               "      <td>0.0</td>\n",
 470 |               "      <td>0.0</td>\n",
 471 |               "      <td>0.0</td>\n",
 472 |               "      <td>0.0</td>\n",
 473 |               "      <td>0.083333</td>\n",
 474 |               "      <td>5.00</td>\n",
 475 |               "      <td>10.898485</td>\n",
 476 |               "      <td>0.939759</td>\n",
 477 |               "    </tr>\n",
 478 |               "  </tbody>\n",
 479 |               "</table>\n",
 480 |               "<p>1752 rows × 10 columns</p>\n",
 481 |               "</div>"
 482 |             ],
 483 |             "text/plain": [
 484 |               "       first_period  neighbourhood  ...  education  employment_rate\n",
 485 |               "block                               ...                            \n",
 486 |               "1.0           False              0  ...  11.919889         0.926594\n",
 487 |               "1.0            True              0  ...  11.919889         0.926594\n",
 488 |               "2.0           False              0  ...  11.919889         0.926594\n",
 489 |               "2.0            True              0  ...  11.919889         0.926594\n",
 490 |               "3.0           False              0  ...  11.919889         0.926594\n",
 491 |               "...             ...            ...  ...        ...              ...\n",
 492 |               "874.0          True              1  ...  10.898485         0.939759\n",
 493 |               "875.0         False              1  ...  10.898485         0.939759\n",
 494 |               "875.0          True              1  ...  10.898485         0.939759\n",
 495 |               "876.0         False              1  ...  10.898485         0.939759\n",
 496 |               "876.0          True              1  ...  10.898485         0.939759\n",
 497 |               "\n",
 498 |               "[1752 rows x 10 columns]"
 499 |             ]
 500 |           },
 501 |           "metadata": {},
 502 |           "execution_count": 203
 503 |         }
 504 |       ]
 505 |     },
 506 |     {
 507 |       "cell_type": "code",
 508 |       "metadata": {
 509 |         "id": "APOqpHmrOGzo",
 510 |         "colab": {
 511 |           "base_uri": "https://localhost:8080/"
 512 |         },
 513 |         "outputId": "397aa5fc-b7c1-4617-dc99-765a18ceda5f"
 514 |       },
 515 |       "source": [
 516 |         "# now create a version of the data w/ \"outcome\" = after - before thefts, and \n",
 517 |         "compact_df = panel[~panel['first_period']]\n",
 518 |         "car_thefts = panel['car_thefts'].values\n",
 519 |         "compact_df['Y1-Y0']=car_thefts[~panel['first_period']] - car_thefts[panel['first_period']]"
 520 |       ],
 521 |       "execution_count": 204,
 522 |       "outputs": [
 523 |         {
 524 |           "output_type": "stream",
 525 |           "name": "stderr",
 526 |           "text": [
 527 |             "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
 528 |             "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 529 |             "Try using .loc[row_indexer,col_indexer] = value instead\n",
 530 |             "\n",
 531 |             "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 532 |             "  after removing the cwd from sys.path.\n"
 533 |           ]
 534 |         }
 535 |       ]
 536 |     },
 537 |     {
 538 |       "cell_type": "code",
 539 |       "metadata": {
 540 |         "id": "uN-97eQ3FvW5"
 541 |       },
 542 |       "source": [
 543 |         "# format this in a manner sympatico with ATT estimation\n",
 544 |         "compact_df = compact_df.reset_index()\n",
 545 |         "\n",
 546 |         "outcome = compact_df['Y1-Y0']\n",
 547 |         "treatment = compact_df['jewish_insitute']\n",
 548 |         "confounders = compact_df[['neighbourhood','public_institution', 'gas_station', 'bank', 'education', 'employment_rate']]"
 549 |       ],
 550 |       "execution_count": 205,
 551 |       "outputs": []
 552 |     },
 553 |     {
 554 |       "cell_type": "code",
 555 |       "metadata": {
 556 |         "colab": {
 557 |           "base_uri": "https://localhost:8080/"
 558 |         },
 559 |         "id": "cYl3bRQ4HliO",
 560 |         "outputId": "7379f923-f388-400e-8557-3848238cc675"
 561 |       },
 562 |       "source": [
 563 |         "# finally, do some light data cleaning\n",
 564 |         "treatment=treatment.astype(int)\n",
 565 |         "\n",
 566 |         "# scale continuous covariates\n",
 567 |         "cont_vars = ['education', 'employment_rate']\n",
 568 |         "confounders[cont_vars] = preprocessing.scale(confounders[cont_vars])\n",
 569 |         "\n"
 570 |       ],
 571 |       "execution_count": 206,
 572 |       "outputs": [
 573 |         {
 574 |           "output_type": "stream",
 575 |           "name": "stderr",
 576 |           "text": [
 577 |             "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
 578 |             "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 579 |             "Try using .loc[row_indexer,col_indexer] = value instead\n",
 580 |             "\n",
 581 |             "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 582 |             "  \n",
 583 |             "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py:1734: SettingWithCopyWarning: \n",
 584 |             "A value is trying to be set on a copy of a slice from a DataFrame.\n",
 585 |             "Try using .loc[row_indexer,col_indexer] = value instead\n",
 586 |             "\n",
 587 |             "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
 588 |             "  isetter(loc, value[:, i].tolist())\n"
 589 |           ]
 590 |         }
 591 |       ]
 592 |     },
 593 |     {
 594 |       "cell_type": "markdown",
 595 |       "metadata": {
 596 |         "id": "C576dWRsa3ad"
 597 |       },
 598 |       "source": [
 599 |         "## Specify Nuisance Function Models\n",
 600 |         "\n",
 601 |         "The next step is to specify models for the conditional expected outcome and propensity score"
 602 |       ]
 603 |     },
 604 |     {
 605 |       "cell_type": "code",
 606 |       "metadata": {
 607 |         "colab": {
 608 |           "base_uri": "https://localhost:8080/"
 609 |         },
 610 |         "id": "qyOhSZRQRb8W",
 611 |         "outputId": "63ed01b0-48af-41b4-d878-909b06470000"
 612 |       },
 613 |       "source": [
 614 |         "# specify a model for the conditional expected outcome\n",
 615 |         "\n",
 616 |         "# TODO(victorveitch) the covariates have basically no predictive power, replace this example with something better\n",
 617 |         "\n",
 618 |         "# make a function that returns a sklearn model for later use in k-folding\n",
 619 |         "def make_Q_model():\n",
 620 |         "  # return LinearRegression()\n",
 621 |         " return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=100, max_depth=2)\n",
 622 |         "Q_model = make_Q_model()\n",
 623 |         "\n",
 624 |         "# Sanity check that chosen model actually improves test error\n",
 625 |         "# A real analysis should give substantial attention to model selection and validation \n",
 626 |         "\n",
 627 |         "X_w_treatment = confounders.copy()\n",
 628 |         "X_w_treatment[\"treatment\"] = treatment\n",
 629 |         "\n",
 630 |         "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
 631 |         "Q_model.fit(X_train, y_train)\n",
 632 |         "y_pred = Q_model.predict(X_test)\n",
 633 |         "\n",
 634 |         "test_mse=mean_squared_error(y_pred, y_test)\n",
 635 |         "print(f\"Test MSE of fit model {test_mse}\") \n",
 636 |         "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
 637 |         "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
 638 |       ],
 639 |       "execution_count": 207,
 640 |       "outputs": [
 641 |         {
 642 |           "output_type": "stream",
 643 |           "name": "stdout",
 644 |           "text": [
 645 |             "Test MSE of fit model 0.027801530904389606\n",
 646 |             "Test MSE of no-covariate model 0.028564516759592096\n"
 647 |           ]
 648 |         }
 649 |       ]
 650 |     },
 651 |     {
 652 |       "cell_type": "code",
 653 |       "metadata": {
 654 |         "colab": {
 655 |           "base_uri": "https://localhost:8080/"
 656 |         },
 657 |         "id": "uq6eZEBXbsaI",
 658 |         "outputId": "5b82bcee-03a4-48db-8a16-8c68168245b4"
 659 |       },
 660 |       "source": [
 661 |         "# specify a model for the propensity score\n",
 662 |         "\n",
 663 |         "def make_g_model():\n",
 664 |         "#  return LogisticRegression(max_iter=1000)\n",
 665 |         "  return RandomForestClassifier(n_estimators=100, max_depth=2)\n",
 666 |         "\n",
 667 |         "g_model = make_g_model()\n",
 668 |         "# Sanity check that chosen model actually improves test error\n",
 669 |         "# A real analysis should give substantial attention to model selection and validation \n",
 670 |         "\n",
 671 |         "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n",
 672 |         "g_model.fit(X_train, a_train)\n",
 673 |         "a_pred = g_model.predict_proba(X_test)[:,1]\n",
 674 |         "\n",
 675 |         "test_ce=log_loss(a_test, a_pred)\n",
 676 |         "print(f\"Test CE of fit model {test_ce}\") \n",
 677 |         "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
 678 |         "print(f\"Test CE of no-covariate model {baseline_ce}\")"
 679 |       ],
 680 |       "execution_count": 208,
 681 |       "outputs": [
 682 |         {
 683 |           "output_type": "stream",
 684 |           "name": "stdout",
 685 |           "text": [
 686 |             "Test CE of fit model 0.1597166570168377\n",
 687 |             "Test CE of no-covariate model 0.16733990853941555\n"
 688 |           ]
 689 |         }
 690 |       ]
 691 |     },
 692 |     {
 693 |       "cell_type": "markdown",
 694 |       "metadata": {
 695 |         "id": "2RkvV_4_dFWo"
 696 |       },
 697 |       "source": [
 698 |         "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
 699 |       ]
 700 |     },
 701 |     {
 702 |       "cell_type": "code",
 703 |       "metadata": {
 704 |         "id": "KA0AsEGJ_X3b"
 705 |       },
 706 |       "source": [
 707 |         "# helper functions to implement the cross fitting\n",
 708 |         "\n",
 709 |         "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
 710 |         "    \"\"\"\n",
 711 |         "    Implements K fold cross-fitting for the model predicting the treatment A. \n",
 712 |         "    That is, \n",
 713 |         "    1. Split data into K folds\n",
 714 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
 715 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
 716 |         "    Returns an array containing the predictions  \n",
 717 |         "\n",
 718 |         "    Args:\n",
 719 |         "    model: function that returns sklearn model (which implements fit and predict_prob)\n",
 720 |         "    X: dataframe of variables to adjust for\n",
 721 |         "    A: array of treatments\n",
 722 |         "    n_splits: number of splits to use\n",
 723 |         "    \"\"\"\n",
 724 |         "    predictions = np.full_like(A, np.nan, dtype=float)\n",
 725 |         "    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 726 |         "    \n",
 727 |         "    for train_index, test_index in kf.split(X, A):\n",
 728 |         "      X_train = X.loc[train_index]\n",
 729 |         "      A_train = A.loc[train_index]\n",
 730 |         "      g = make_model()\n",
 731 |         "      g.fit(X_train, A_train)\n",
 732 |         "\n",
 733 |         "      # get predictions for split\n",
 734 |         "      predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
 735 |         "\n",
 736 |         "    assert np.isnan(predictions).sum() == 0\n",
 737 |         "    return predictions\n",
 738 |         "\n",
 739 |         "\n",
 740 |         "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
 741 |         "    \"\"\"\n",
 742 |         "    Implements K fold cross-fitting for the model predicting the outcome Y. \n",
 743 |         "    That is, \n",
 744 |         "    1. Split data into K folds\n",
 745 |         "    2. For each fold j, the model is fit on the other K-1 folds\n",
 746 |         "    3. The fitted model is used to make predictions for each data point in fold j\n",
 747 |         "    Returns two arrays containing the predictions for all units untreated, all units treated  \n",
 748 |         "\n",
 749 |         "    Args:\n",
 750 |         "    model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
 751 |         "    X: dataframe of variables to adjust for\n",
 752 |         "    y: array of outcomes\n",
 753 |         "    A: array of treatments\n",
 754 |         "    n_splits: number of splits to use\n",
 755 |         "    output_type: type of outcome, \"binary\" or \"continuous\"\n",
 756 |         "\n",
 757 |         "    \"\"\"\n",
 758 |         "    predictions0 = np.full_like(A, np.nan, dtype=float)\n",
 759 |         "    predictions1 = np.full_like(y, np.nan, dtype=float)\n",
 760 |         "    if output_type == 'binary':\n",
 761 |         "      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 762 |         "    elif output_type == 'continuous':\n",
 763 |         "      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
 764 |         "\n",
 765 |         "    # include the treatment as input feature\n",
 766 |         "    X_w_treatment = X.copy()\n",
 767 |         "    X_w_treatment[\"A\"] = A\n",
 768 |         "\n",
 769 |         "    # for predicting effect under treatment / control status for each data point \n",
 770 |         "    X0 = X_w_treatment.copy()\n",
 771 |         "    X0[\"A\"] = 0\n",
 772 |         "    X1 = X_w_treatment.copy()\n",
 773 |         "    X1[\"A\"] = 1\n",
 774 |         "\n",
 775 |         "    \n",
 776 |         "    for train_index, test_index in kf.split(X_w_treatment, y):\n",
 777 |         "      X_train = X_w_treatment.loc[train_index]\n",
 778 |         "      y_train = y.loc[train_index]\n",
 779 |         "      q = make_model()\n",
 780 |         "      q.fit(X_train, y_train)\n",
 781 |         "\n",
 782 |         "      if output_type =='binary':\n",
 783 |         "        predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
 784 |         "        predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
 785 |         "      elif output_type == 'continuous':\n",
 786 |         "        predictions0[test_index] = q.predict(X0.loc[test_index])\n",
 787 |         "        predictions1[test_index] = q.predict(X1.loc[test_index])\n",
 788 |         "\n",
 789 |         "    assert np.isnan(predictions0).sum() == 0\n",
 790 |         "    assert np.isnan(predictions1).sum() == 0\n",
 791 |         "    return predictions0, predictions1"
 792 |       ],
 793 |       "execution_count": 209,
 794 |       "outputs": []
 795 |     },
 796 |     {
 797 |       "cell_type": "code",
 798 |       "metadata": {
 799 |         "id": "wVcE6pRQeMNf"
 800 |       },
 801 |       "source": [
 802 |         "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
 803 |       ],
 804 |       "execution_count": 210,
 805 |       "outputs": []
 806 |     },
 807 |     {
 808 |       "cell_type": "code",
 809 |       "metadata": {
 810 |         "id": "GLEHlLLdWSh9"
 811 |       },
 812 |       "source": [
 813 |         "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
 814 |       ],
 815 |       "execution_count": 211,
 816 |       "outputs": []
 817 |     },
 818 |     {
 819 |       "cell_type": "code",
 820 |       "metadata": {
 821 |         "colab": {
 822 |           "base_uri": "https://localhost:8080/",
 823 |           "height": 203
 824 |         },
 825 |         "id": "_NVCV0q0g8wQ",
 826 |         "outputId": "625e4e9d-8ea4-4e57-c684-6037a4ce3b3f"
 827 |       },
 828 |       "source": [
 829 |         "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
 830 |         "data_and_nuisance_estimates.head()"
 831 |       ],
 832 |       "execution_count": 212,
 833 |       "outputs": [
 834 |         {
 835 |           "output_type": "execute_result",
 836 |           "data": {
 837 |             "text/html": [
 838 |               "<div>\n",
 839 |               "<style scoped>\n",
 840 |               "    .dataframe tbody tr th:only-of-type {\n",
 841 |               "        vertical-align: middle;\n",
 842 |               "    }\n",
 843 |               "\n",
 844 |               "    .dataframe tbody tr th {\n",
 845 |               "        vertical-align: top;\n",
 846 |               "    }\n",
 847 |               "\n",
 848 |               "    .dataframe thead th {\n",
 849 |               "        text-align: right;\n",
 850 |               "    }\n",
 851 |               "</style>\n",
 852 |               "<table border=\"1\" class=\"dataframe\">\n",
 853 |               "  <thead>\n",
 854 |               "    <tr style=\"text-align: right;\">\n",
 855 |               "      <th></th>\n",
 856 |               "      <th>g</th>\n",
 857 |               "      <th>Q0</th>\n",
 858 |               "      <th>Q1</th>\n",
 859 |               "      <th>A</th>\n",
 860 |               "      <th>Y</th>\n",
 861 |               "    </tr>\n",
 862 |               "  </thead>\n",
 863 |               "  <tbody>\n",
 864 |               "    <tr>\n",
 865 |               "      <th>0</th>\n",
 866 |               "      <td>0.027920</td>\n",
 867 |               "      <td>-0.065413</td>\n",
 868 |               "      <td>-0.133397</td>\n",
 869 |               "      <td>0</td>\n",
 870 |               "      <td>0.000000</td>\n",
 871 |               "    </tr>\n",
 872 |               "    <tr>\n",
 873 |               "      <th>1</th>\n",
 874 |               "      <td>0.027276</td>\n",
 875 |               "      <td>-0.070393</td>\n",
 876 |               "      <td>-0.118878</td>\n",
 877 |               "      <td>0</td>\n",
 878 |               "      <td>0.156250</td>\n",
 879 |               "    </tr>\n",
 880 |               "    <tr>\n",
 881 |               "      <th>2</th>\n",
 882 |               "      <td>0.028456</td>\n",
 883 |               "      <td>-0.076041</td>\n",
 884 |               "      <td>-0.142226</td>\n",
 885 |               "      <td>0</td>\n",
 886 |               "      <td>-0.302083</td>\n",
 887 |               "    </tr>\n",
 888 |               "    <tr>\n",
 889 |               "      <th>3</th>\n",
 890 |               "      <td>0.028456</td>\n",
 891 |               "      <td>-0.065413</td>\n",
 892 |               "      <td>-0.133397</td>\n",
 893 |               "      <td>0</td>\n",
 894 |               "      <td>0.062500</td>\n",
 895 |               "    </tr>\n",
 896 |               "    <tr>\n",
 897 |               "      <th>4</th>\n",
 898 |               "      <td>0.025655</td>\n",
 899 |               "      <td>-0.020747</td>\n",
 900 |               "      <td>-0.085449</td>\n",
 901 |               "      <td>0</td>\n",
 902 |               "      <td>0.062500</td>\n",
 903 |               "    </tr>\n",
 904 |               "  </tbody>\n",
 905 |               "</table>\n",
 906 |               "</div>"
 907 |             ],
 908 |             "text/plain": [
 909 |               "          g        Q0        Q1  A         Y\n",
 910 |               "0  0.027920 -0.065413 -0.133397  0  0.000000\n",
 911 |               "1  0.027276 -0.070393 -0.118878  0  0.156250\n",
 912 |               "2  0.028456 -0.076041 -0.142226  0 -0.302083\n",
 913 |               "3  0.028456 -0.065413 -0.133397  0  0.062500\n",
 914 |               "4  0.025655 -0.020747 -0.085449  0  0.062500"
 915 |             ]
 916 |           },
 917 |           "metadata": {},
 918 |           "execution_count": 212
 919 |         }
 920 |       ]
 921 |     },
 922 |     {
 923 |       "cell_type": "markdown",
 924 |       "metadata": {
 925 |         "id": "VNhM7URdgzQB"
 926 |       },
 927 |       "source": [
 928 |         "## Combine predicted values and data into estimate of ATT"
 929 |       ]
 930 |     },
 931 |     {
 932 |       "cell_type": "code",
 933 |       "metadata": {
 934 |         "id": "J-vONC5ejwh2"
 935 |       },
 936 |       "source": [
 937 |         "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
 938 |         "  \"\"\"\n",
 939 |         "  # Double ML estimator for the ATT\n",
 940 |         "  This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n",
 941 |         "  \"\"\"\n",
 942 |         "\n",
 943 |         "  if prob_t is None:\n",
 944 |         "    prob_t = A.mean() # estimate marginal probability of treatment\n",
 945 |         "\n",
 946 |         "  tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n",
 947 |         "  \n",
 948 |         "  scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n",
 949 |         "  n = Y.shape[0] # number of observations\n",
 950 |         "  std_hat = np.std(scores) / np.sqrt(n)\n",
 951 |         "\n",
 952 |         "  return tau_hat, std_hat\n"
 953 |       ],
 954 |       "execution_count": 213,
 955 |       "outputs": []
 956 |     },
 957 |     {
 958 |       "cell_type": "code",
 959 |       "metadata": {
 960 |         "colab": {
 961 |           "base_uri": "https://localhost:8080/"
 962 |         },
 963 |         "id": "SjDj0F9Bm9uq",
 964 |         "outputId": "bfbca9bb-c2e0-4171-d65f-bebb71fd0da1"
 965 |       },
 966 |       "source": [
 967 |         "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n",
 968 |         "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
 969 |       ],
 970 |       "execution_count": 214,
 971 |       "outputs": [
 972 |         {
 973 |           "output_type": "stream",
 974 |           "name": "stdout",
 975 |           "text": [
 976 |             "The estimate is -0.0777691984649497 pm 0.05810535308191231\n"
 977 |           ]
 978 |         }
 979 |       ]
 980 |     },
 981 |     {
 982 |       "cell_type": "code",
 983 |       "metadata": {
 984 |         "colab": {
 985 |           "base_uri": "https://localhost:8080/"
 986 |         },
 987 |         "id": "R3YqKD60UElw",
 988 |         "outputId": "b4dad931-c970-429e-8c83-ece9db655c9f"
 989 |       },
 990 |       "source": [
 991 |         "# for comparison, the point estimate without any covariate correction\n",
 992 |         "outcome[treatment==1].mean()-outcome[treatment==0].mean()"
 993 |       ],
 994 |       "execution_count": 215,
 995 |       "outputs": [
 996 |         {
 997 |           "output_type": "execute_result",
 998 |           "data": {
 999 |             "text/plain": [
1000 |               "-0.06683773314434818"
1001 |             ]
1002 |           },
1003 |           "metadata": {},
1004 |           "execution_count": 215
1005 |         }
1006 |       ]
1007 |     },
1008 |     {
1009 |       "cell_type": "code",
1010 |       "metadata": {
1011 |         "id": "37ep7LyGUHH9"
1012 |       },
1013 |       "source": [
1014 |         ""
1015 |       ],
1016 |       "execution_count": 215,
1017 |       "outputs": []
1018 |     }
1019 |   ]
1020 | }


--------------------------------------------------------------------------------