├── .gitignore
├── ATE_Estimation_with_Machine_Learning.ipynb
├── IV_Strategies.ipynb
├── LICENSE
├── README.md
├── Sensitivity_Analysis.ipynb
├── data
├── ditella-crime-2004
│ ├── CrimebyBlock.dta
│ ├── DiTella_crime.csv
│ ├── MonthlyPanel.dta
│ ├── README
│ ├── README~
│ ├── WeeklyPanel.dta
│ └── data_cleaning.ipynb
├── hbp_dbp.csv
└── outvote_2020_data.csv
└── difference_in_differences.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/ATE_Estimation_with_Machine_Learning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "ATE-Estimation-with-Machine-Learning.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true,
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "QfZkNLUb4B-p"
35 | },
36 | "source": [
37 | "# ATT Estimation Tutorial\n",
38 | "\n",
39 | "This tutorial gives a short example for how to estimate average treatment effect on the treated using machine learning methods"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "metadata": {
45 | "id": "dS2X3Bq1-fxE"
46 | },
47 | "source": [
48 | "import numpy as np\n",
49 | "import pandas as pd\n",
50 | "import scipy as sp\n",
51 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
52 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
53 | "from sklearn.metrics import mean_squared_error, log_loss\n",
54 | "import sklearn\n",
55 | "import os"
56 | ],
57 | "execution_count": 6,
58 | "outputs": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "nxJ46X9cFJ9X"
64 | },
65 | "source": [
66 | "RANDOM_SEED=42\n",
67 | "np.random.seed(RANDOM_SEED)"
68 | ],
69 | "execution_count": 7,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "markdown",
74 | "metadata": {
75 | "id": "yPbJeayiEs3u"
76 | },
77 | "source": [
78 | "##Load and Format LaLonde Observational Data"
79 | ]
80 | },
81 | {
82 | "cell_type": "code",
83 | "metadata": {
84 | "id": "2AC9TPko-hbt"
85 | },
86 | "source": [
87 | "def make_data_lalonde(df):\n",
88 | " df_new = df.drop(['nodegree'], axis=1)\n",
89 | " df_new['pos74'] = (df_new['RE74'] > 0).astype(int)\n",
90 | " df_new['pos75'] = (df_new['RE75'] > 0).astype(int)\n",
91 | " df_new['treatment'] = df_new['treatment'].astype(int)\n",
92 | " return df_new\n",
93 | "\n",
94 | "\n",
95 | "col_names = ['treatment', 'age', 'education', 'black',\n",
96 | " 'hispanic', 'married', 'nodegree', 'RE74', 'RE75', 'RE78']\n",
97 | "control = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/psid_controls.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n",
98 | "treatment = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/nswre74_treated.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n",
99 | "\n",
100 | "lalonde1 = pd.concat([control, treatment]).reset_index(drop=True)\n",
101 | "lalonde1 = make_data_lalonde(lalonde1)"
102 | ],
103 | "execution_count": 8,
104 | "outputs": []
105 | },
106 | {
107 | "cell_type": "code",
108 | "metadata": {
109 | "colab": {
110 | "base_uri": "https://localhost:8080/",
111 | "height": 203
112 | },
113 | "id": "-A1LX6-t-hZD",
114 | "outputId": "b0e276e2-dce3-424d-ffc7-e2b992ad62ec"
115 | },
116 | "source": [
117 | "lalonde1.head()"
118 | ],
119 | "execution_count": 9,
120 | "outputs": [
121 | {
122 | "output_type": "execute_result",
123 | "data": {
124 | "text/html": [
125 | "
\n",
126 | "\n",
139 | "
\n",
140 | " \n",
141 | " \n",
142 | " | \n",
143 | " treatment | \n",
144 | " age | \n",
145 | " education | \n",
146 | " black | \n",
147 | " hispanic | \n",
148 | " married | \n",
149 | " RE74 | \n",
150 | " RE75 | \n",
151 | " RE78 | \n",
152 | " pos74 | \n",
153 | " pos75 | \n",
154 | "
\n",
155 | " \n",
156 | " \n",
157 | " \n",
158 | " 0 | \n",
159 | " 0 | \n",
160 | " 47.0 | \n",
161 | " 12.0 | \n",
162 | " 0.0 | \n",
163 | " 0.0 | \n",
164 | " 0.0 | \n",
165 | " 0.0 | \n",
166 | " 0.0 | \n",
167 | " 0.0 | \n",
168 | " 0 | \n",
169 | " 0 | \n",
170 | "
\n",
171 | " \n",
172 | " 1 | \n",
173 | " 0 | \n",
174 | " 50.0 | \n",
175 | " 12.0 | \n",
176 | " 1.0 | \n",
177 | " 0.0 | \n",
178 | " 1.0 | \n",
179 | " 0.0 | \n",
180 | " 0.0 | \n",
181 | " 0.0 | \n",
182 | " 0 | \n",
183 | " 0 | \n",
184 | "
\n",
185 | " \n",
186 | " 2 | \n",
187 | " 0 | \n",
188 | " 44.0 | \n",
189 | " 12.0 | \n",
190 | " 0.0 | \n",
191 | " 0.0 | \n",
192 | " 0.0 | \n",
193 | " 0.0 | \n",
194 | " 0.0 | \n",
195 | " 0.0 | \n",
196 | " 0 | \n",
197 | " 0 | \n",
198 | "
\n",
199 | " \n",
200 | " 3 | \n",
201 | " 0 | \n",
202 | " 28.0 | \n",
203 | " 12.0 | \n",
204 | " 1.0 | \n",
205 | " 0.0 | \n",
206 | " 1.0 | \n",
207 | " 0.0 | \n",
208 | " 0.0 | \n",
209 | " 0.0 | \n",
210 | " 0 | \n",
211 | " 0 | \n",
212 | "
\n",
213 | " \n",
214 | " 4 | \n",
215 | " 0 | \n",
216 | " 54.0 | \n",
217 | " 12.0 | \n",
218 | " 0.0 | \n",
219 | " 0.0 | \n",
220 | " 1.0 | \n",
221 | " 0.0 | \n",
222 | " 0.0 | \n",
223 | " 0.0 | \n",
224 | " 0 | \n",
225 | " 0 | \n",
226 | "
\n",
227 | " \n",
228 | "
\n",
229 | "
"
230 | ],
231 | "text/plain": [
232 | " treatment age education black hispanic ... RE74 RE75 RE78 pos74 pos75\n",
233 | "0 0 47.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n",
234 | "1 0 50.0 12.0 1.0 0.0 ... 0.0 0.0 0.0 0 0\n",
235 | "2 0 44.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n",
236 | "3 0 28.0 12.0 1.0 0.0 ... 0.0 0.0 0.0 0 0\n",
237 | "4 0 54.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n",
238 | "\n",
239 | "[5 rows x 11 columns]"
240 | ]
241 | },
242 | "metadata": {},
243 | "execution_count": 9
244 | }
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "metadata": {
250 | "id": "APOqpHmrOGzo"
251 | },
252 | "source": [
253 | "confounders = lalonde1.drop(columns=['RE78', 'treatment'])\n",
254 | "outcome = lalonde1['RE78']\n",
255 | "treatment = lalonde1['treatment']"
256 | ],
257 | "execution_count": 10,
258 | "outputs": []
259 | },
260 | {
261 | "cell_type": "markdown",
262 | "metadata": {
263 | "id": "C576dWRsa3ad"
264 | },
265 | "source": [
266 | "## Specify Nuisance Function Models\n",
267 | "\n",
268 | "The next step is to specify models for the conditional expected outcome and propensity score"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "metadata": {
274 | "colab": {
275 | "base_uri": "https://localhost:8080/"
276 | },
277 | "id": "qyOhSZRQRb8W",
278 | "outputId": "7df1d854-c13f-4f93-ec7c-4977df6ad283"
279 | },
280 | "source": [
281 | "# specify a model for the conditional expected outcome\n",
282 | "\n",
283 | "# make a function that returns a sklearn model for later use in k-folding\n",
284 | "def make_Q_model():\n",
285 | " return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=500, max_depth=None)\n",
286 | "Q_model = make_Q_model()\n",
287 | "\n",
288 | "# Sanity check that chosen model actually improves test error\n",
289 | "# A real analysis should give substantial attention to model selection and validation \n",
290 | "\n",
291 | "X_w_treatment = confounders.copy()\n",
292 | "X_w_treatment[\"treatment\"] = treatment\n",
293 | "\n",
294 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
295 | "Q_model.fit(X_train, y_train)\n",
296 | "y_pred = Q_model.predict(X_test)\n",
297 | "\n",
298 | "test_mse=mean_squared_error(y_pred, y_test)\n",
299 | "print(f\"Test MSE of fit model {test_mse}\") \n",
300 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
301 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
302 | ],
303 | "execution_count": 11,
304 | "outputs": [
305 | {
306 | "output_type": "stream",
307 | "name": "stdout",
308 | "text": [
309 | "Test MSE of fit model 105637760.68507269\n",
310 | "Test MSE of no-covariate model 246319790.55062827\n"
311 | ]
312 | }
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "metadata": {
318 | "colab": {
319 | "base_uri": "https://localhost:8080/"
320 | },
321 | "id": "uq6eZEBXbsaI",
322 | "outputId": "974c356c-07f3-4573-f8c3-b83400c82169"
323 | },
324 | "source": [
325 | "# specify a model for the propensity score\n",
326 | "\n",
327 | "def make_g_model():\n",
328 | "# return LogisticRegression(max_iter=1000)\n",
329 | " return RandomForestClassifier(n_estimators=100, max_depth=5)\n",
330 | "\n",
331 | "g_model = make_g_model()\n",
332 | "# Sanity check that chosen model actually improves test error\n",
333 | "# A real analysis should give substantial attention to model selection and validation \n",
334 | "\n",
335 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n",
336 | "g_model.fit(X_train, a_train)\n",
337 | "a_pred = g_model.predict_proba(X_test)[:,1]\n",
338 | "\n",
339 | "test_ce=log_loss(a_test, a_pred)\n",
340 | "print(f\"Test CE of fit model {test_ce}\") \n",
341 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
342 | "print(f\"Test CE of no-covariate model {baseline_ce}\")"
343 | ],
344 | "execution_count": 12,
345 | "outputs": [
346 | {
347 | "output_type": "stream",
348 | "name": "stdout",
349 | "text": [
350 | "Test CE of fit model 0.07789407933364972\n",
351 | "Test CE of no-covariate model 0.21817471356014154\n"
352 | ]
353 | }
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {
359 | "id": "2RkvV_4_dFWo"
360 | },
361 | "source": [
362 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "metadata": {
368 | "id": "KA0AsEGJ_X3b"
369 | },
370 | "source": [
371 | "# helper functions to implement the cross fitting\n",
372 | "\n",
373 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
374 | " \"\"\"\n",
375 | " Implements K fold cross-fitting for the model predicting the treatment A. \n",
376 | " That is, \n",
377 | " 1. Split data into K folds\n",
378 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
379 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
380 | " Returns an array containing the predictions \n",
381 | "\n",
382 | " Args:\n",
383 | " model: function that returns sklearn model (which implements fit and predict_prob)\n",
384 | " X: dataframe of variables to adjust for\n",
385 | " A: array of treatments\n",
386 | " n_splits: number of splits to use\n",
387 | " \"\"\"\n",
388 | " predictions = np.full_like(A, np.nan, dtype=float)\n",
389 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
390 | " \n",
391 | " for train_index, test_index in kf.split(X, A):\n",
392 | " X_train = X.loc[train_index]\n",
393 | " A_train = A.loc[train_index]\n",
394 | " g = make_model()\n",
395 | " g.fit(X_train, A_train)\n",
396 | "\n",
397 | " # get predictions for split\n",
398 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
399 | "\n",
400 | " assert np.isnan(predictions).sum() == 0\n",
401 | " return predictions\n",
402 | "\n",
403 | "\n",
404 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
405 | " \"\"\"\n",
406 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n",
407 | " That is, \n",
408 | " 1. Split data into K folds\n",
409 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
410 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
411 | " Returns two arrays containing the predictions for all units untreated, all units treated \n",
412 | "\n",
413 | " Args:\n",
414 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
415 | " X: dataframe of variables to adjust for\n",
416 | " y: array of outcomes\n",
417 | " A: array of treatments\n",
418 | " n_splits: number of splits to use\n",
419 | " output_type: type of outcome, \"binary\" or \"continuous\"\n",
420 | "\n",
421 | " \"\"\"\n",
422 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n",
423 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n",
424 | " if output_type == 'binary':\n",
425 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
426 | " elif output_type == 'continuous':\n",
427 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
428 | "\n",
429 | " # include the treatment as input feature\n",
430 | " X_w_treatment = X.copy()\n",
431 | " X_w_treatment[\"A\"] = A\n",
432 | "\n",
433 | " # for predicting effect under treatment / control status for each data point \n",
434 | " X0 = X_w_treatment.copy()\n",
435 | " X0[\"A\"] = 0\n",
436 | " X1 = X_w_treatment.copy()\n",
437 | " X1[\"A\"] = 1\n",
438 | "\n",
439 | " \n",
440 | " for train_index, test_index in kf.split(X_w_treatment, y):\n",
441 | " X_train = X_w_treatment.loc[train_index]\n",
442 | " y_train = y.loc[train_index]\n",
443 | " q = make_model()\n",
444 | " q.fit(X_train, y_train)\n",
445 | "\n",
446 | " if output_type =='binary':\n",
447 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
448 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
449 | " elif output_type == 'continuous':\n",
450 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n",
451 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n",
452 | "\n",
453 | " assert np.isnan(predictions0).sum() == 0\n",
454 | " assert np.isnan(predictions1).sum() == 0\n",
455 | " return predictions0, predictions1"
456 | ],
457 | "execution_count": 13,
458 | "outputs": []
459 | },
460 | {
461 | "cell_type": "code",
462 | "metadata": {
463 | "id": "wVcE6pRQeMNf"
464 | },
465 | "source": [
466 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
467 | ],
468 | "execution_count": 14,
469 | "outputs": []
470 | },
471 | {
472 | "cell_type": "code",
473 | "metadata": {
474 | "id": "GLEHlLLdWSh9"
475 | },
476 | "source": [
477 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
478 | ],
479 | "execution_count": 15,
480 | "outputs": []
481 | },
482 | {
483 | "cell_type": "code",
484 | "metadata": {
485 | "colab": {
486 | "base_uri": "https://localhost:8080/",
487 | "height": 203
488 | },
489 | "id": "_NVCV0q0g8wQ",
490 | "outputId": "b638a74f-1c3f-4860-bd3d-af9eb397832a"
491 | },
492 | "source": [
493 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
494 | "data_and_nuisance_estimates.head()"
495 | ],
496 | "execution_count": 16,
497 | "outputs": [
498 | {
499 | "output_type": "execute_result",
500 | "data": {
501 | "text/html": [
502 | "\n",
503 | "\n",
516 | "
\n",
517 | " \n",
518 | " \n",
519 | " | \n",
520 | " g | \n",
521 | " Q0 | \n",
522 | " Q1 | \n",
523 | " A | \n",
524 | " Y | \n",
525 | "
\n",
526 | " \n",
527 | " \n",
528 | " \n",
529 | " 0 | \n",
530 | " 0.313350 | \n",
531 | " 95.549536 | \n",
532 | " 1571.922518 | \n",
533 | " 0 | \n",
534 | " 0.0 | \n",
535 | "
\n",
536 | " \n",
537 | " 1 | \n",
538 | " 0.191958 | \n",
539 | " 2032.024647 | \n",
540 | " 3895.070486 | \n",
541 | " 0 | \n",
542 | " 0.0 | \n",
543 | "
\n",
544 | " \n",
545 | " 2 | \n",
546 | " 0.470788 | \n",
547 | " 29.940432 | \n",
548 | " 1731.498259 | \n",
549 | " 0 | \n",
550 | " 0.0 | \n",
551 | "
\n",
552 | " \n",
553 | " 3 | \n",
554 | " 0.517957 | \n",
555 | " 11037.487272 | \n",
556 | " 9030.776610 | \n",
557 | " 0 | \n",
558 | " 0.0 | \n",
559 | "
\n",
560 | " \n",
561 | " 4 | \n",
562 | " 0.014246 | \n",
563 | " 0.000000 | \n",
564 | " 2139.630960 | \n",
565 | " 0 | \n",
566 | " 0.0 | \n",
567 | "
\n",
568 | " \n",
569 | "
\n",
570 | "
"
571 | ],
572 | "text/plain": [
573 | " g Q0 Q1 A Y\n",
574 | "0 0.313350 95.549536 1571.922518 0 0.0\n",
575 | "1 0.191958 2032.024647 3895.070486 0 0.0\n",
576 | "2 0.470788 29.940432 1731.498259 0 0.0\n",
577 | "3 0.517957 11037.487272 9030.776610 0 0.0\n",
578 | "4 0.014246 0.000000 2139.630960 0 0.0"
579 | ]
580 | },
581 | "metadata": {},
582 | "execution_count": 16
583 | }
584 | ]
585 | },
586 | {
587 | "cell_type": "markdown",
588 | "metadata": {
589 | "id": "VNhM7URdgzQB"
590 | },
591 | "source": [
592 | "## Combine predicted values and data into estimate of ATT"
593 | ]
594 | },
595 | {
596 | "cell_type": "code",
597 | "metadata": {
598 | "id": "J-vONC5ejwh2"
599 | },
600 | "source": [
601 | "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
602 | " \"\"\"\n",
603 | " # Double ML estimator for the ATT\n",
604 | " This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n",
605 | " \"\"\"\n",
606 | "\n",
607 | " if prob_t is None:\n",
608 | " prob_t = A.mean() # estimate marginal probability of treatment\n",
609 | "\n",
610 | " tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n",
611 | " \n",
612 | " scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n",
613 | " n = Y.shape[0] # number of observations\n",
614 | " std_hat = np.std(scores) / np.sqrt(n)\n",
615 | "\n",
616 | " return tau_hat, std_hat\n"
617 | ],
618 | "execution_count": 17,
619 | "outputs": []
620 | },
621 | {
622 | "cell_type": "code",
623 | "metadata": {
624 | "id": "O_F5r0SSkzzK"
625 | },
626 | "source": [
627 | "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
628 | " \"\"\"\n",
629 | " # Double ML estimator for the ATE\n",
630 | " \"\"\"\n",
631 | "\n",
632 | " tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n",
633 | " \n",
634 | " scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n",
635 | " n = Y.shape[0] # number of observations\n",
636 | " std_hat = np.std(scores) / np.sqrt(n)\n",
637 | "\n",
638 | " return tau_hat, std_hat\n"
639 | ],
640 | "execution_count": 18,
641 | "outputs": []
642 | },
643 | {
644 | "cell_type": "code",
645 | "metadata": {
646 | "colab": {
647 | "base_uri": "https://localhost:8080/"
648 | },
649 | "id": "SjDj0F9Bm9uq",
650 | "outputId": "fdef5c08-3829-400b-ea0e-1cb5dd01bc25"
651 | },
652 | "source": [
653 | "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n",
654 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
655 | ],
656 | "execution_count": 19,
657 | "outputs": [
658 | {
659 | "output_type": "stream",
660 | "name": "stdout",
661 | "text": [
662 | "The estimate is 1300.9807431649592 pm 1622.6924287596182\n"
663 | ]
664 | }
665 | ]
666 | },
667 | {
668 | "cell_type": "code",
669 | "metadata": {
670 | "colab": {
671 | "base_uri": "https://localhost:8080/"
672 | },
673 | "id": "vSaOp1HwlQ4i",
674 | "outputId": "874e2ea0-dfc1-4594-9d45-b6663f69f163"
675 | },
676 | "source": [
677 | "in_treated = data_and_nuisance_estimates['A']==1\n",
678 | "treated_estimates = data_and_nuisance_estimates[in_treated]\n",
679 | "tau_hat, std_hat = ate_aiptw(**treated_estimates)\n",
680 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
681 | ],
682 | "execution_count": 20,
683 | "outputs": [
684 | {
685 | "output_type": "stream",
686 | "name": "stdout",
687 | "text": [
688 | "The estimate is -33439.05484914103 pm 50637.28008886066\n"
689 | ]
690 | }
691 | ]
692 | },
693 | {
694 | "cell_type": "code",
695 | "metadata": {
696 | "colab": {
697 | "base_uri": "https://localhost:8080/"
698 | },
699 | "id": "IOuJnlbEo8j_",
700 | "outputId": "74678791-7163-41e6-f7a9-a04a7b669e81"
701 | },
702 | "source": [
703 | "# The LaLonde data has severe overlap issues. Lets try computing the estimate restricted to a population with only reasonable propensity scores\n",
704 | "g = data_and_nuisance_estimates['g']\n",
705 | "in_overlap_popluation = ( g < 0.90)\n",
706 | "overlap_data_and_nuisance = data_and_nuisance_estimates[in_overlap_popluation]\n",
707 | "tau_hat, std_hat = att_aiptw(**overlap_data_and_nuisance)\n",
708 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
709 | ],
710 | "execution_count": 21,
711 | "outputs": [
712 | {
713 | "output_type": "stream",
714 | "name": "stdout",
715 | "text": [
716 | "The estimate is 572.1572812652179 pm 1501.516696945994\n"
717 | ]
718 | }
719 | ]
720 | },
721 | {
722 | "cell_type": "code",
723 | "metadata": {
724 | "id": "LnJppbQdjwVI"
725 | },
726 | "source": [
727 | ""
728 | ],
729 | "execution_count": 22,
730 | "outputs": []
731 | }
732 | ]
733 | }
--------------------------------------------------------------------------------
/IV_Strategies.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "id": "view-in-github",
7 | "colab_type": "text"
8 | },
9 | "source": [
10 | "
"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "metadata": {
16 | "id": "3Pr7ijIYeO--"
17 | },
18 | "source": [
19 | "# LATE Estimation Tutorial"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {
25 | "id": "kVIsqn30gqCx"
26 | },
27 | "source": [
28 | "This tutorial gives a short example for how to use instrument variable to estimate local average treatment effect using machine learning methods"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 1,
34 | "metadata": {
35 | "id": "Nv0YbKbGea6U"
36 | },
37 | "outputs": [],
38 | "source": [
39 | "import numpy as np\n",
40 | "import pandas as pd\n",
41 | "import sklearn\n",
42 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
43 | "from sklearn.linear_model import LogisticRegression\n",
44 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
45 | "from sklearn.metrics import mean_squared_error, log_loss\n",
46 | "import math"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 2,
52 | "metadata": {
53 | "id": "ONvBs_yvia3a"
54 | },
55 | "outputs": [],
56 | "source": [
57 | "RANDOM_SEED=0\n",
58 | "np.random.seed(RANDOM_SEED)"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {
64 | "id": "yPbJeayiEs3u"
65 | },
66 | "source": [
67 | "##Load Outvote 2020 Observational Data"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {
73 | "id": "2t60_xU_qORv"
74 | },
75 | "source": [
76 | "First, load the observational data."
77 | ]
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 3,
82 | "metadata": {
83 | "colab": {
84 | "base_uri": "https://localhost:8080/",
85 | "height": 488
86 | },
87 | "id": "iCuOMjvsoWpm",
88 | "outputId": "5eac723f-847b-41f0-a01b-377ec7ec1261"
89 | },
90 | "outputs": [
91 | {
92 | "output_type": "execute_result",
93 | "data": {
94 | "text/plain": [
95 | " voted_2020 messaged queue_position queue_length \\\n",
96 | "0 1 1 1.0 19.0 \n",
97 | "1 1 1 6.0 19.0 \n",
98 | "2 0 1 8.0 19.0 \n",
99 | "3 1 1 10.0 19.0 \n",
100 | "4 1 1 18.0 19.0 \n",
101 | "... ... ... ... ... \n",
102 | "81199 1 0 5.0 5.0 \n",
103 | "81200 1 0 2.0 3.0 \n",
104 | "81201 0 0 1.0 2.0 \n",
105 | "81202 1 1 2.0 9.0 \n",
106 | "81203 0 1 3.0 3.0 \n",
107 | "\n",
108 | " queue_id voted_2018 voted_2016 is_Democrat \\\n",
109 | "0 xgcwm279xcwkxjq4zxabo 0 0 0 \n",
110 | "1 xgcwm279xcwkxjq4zxabo 1 1 1 \n",
111 | "2 xgcwm279xcwkxjq4zxabo 1 0 1 \n",
112 | "3 xgcwm279xcwkxjq4zxabo 1 1 0 \n",
113 | "4 xgcwm279xcwkxjq4zxabo 0 1 1 \n",
114 | "... ... ... ... ... \n",
115 | "81199 0j3f3gyc3qspaukludfcpy4 1 1 0 \n",
116 | "81200 ixyu0l548p9dextzgjspdr 0 1 0 \n",
117 | "81201 ghclqszfr6qvz2i5yesrwo 0 1 0 \n",
118 | "81202 sscqhe1ttlh7fmkmuk231f 1 1 0 \n",
119 | "81203 41tse6pfdjlw0kwcm7rbo 0 0 0 \n",
120 | "\n",
121 | " is_Republican is_Male is_Female is_Married is_Urban is_Rural \\\n",
122 | "0 0 0 1 0 0 0 \n",
123 | "1 0 1 0 0 0 0 \n",
124 | "2 0 0 1 0 0 0 \n",
125 | "3 0 1 0 0 0 0 \n",
126 | "4 0 0 1 0 0 0 \n",
127 | "... ... ... ... ... ... ... \n",
128 | "81199 0 0 1 1 0 0 \n",
129 | "81200 0 0 1 0 0 0 \n",
130 | "81201 0 0 1 0 0 0 \n",
131 | "81202 0 1 0 1 0 0 \n",
132 | "81203 0 1 0 0 0 0 \n",
133 | "\n",
134 | " is_Battleground age \n",
135 | "0 1 68.0 \n",
136 | "1 0 52.0 \n",
137 | "2 0 26.0 \n",
138 | "3 1 39.0 \n",
139 | "4 1 23.0 \n",
140 | "... ... ... \n",
141 | "81199 1 54.0 \n",
142 | "81200 0 36.0 \n",
143 | "81201 1 90.0 \n",
144 | "81202 1 72.0 \n",
145 | "81203 1 72.0 \n",
146 | "\n",
147 | "[81204 rows x 16 columns]"
148 | ],
149 | "text/html": [
150 | "\n",
151 | " \n",
152 | "
\n",
153 | "
\n",
154 | "\n",
167 | "
\n",
168 | " \n",
169 | " \n",
170 | " | \n",
171 | " voted_2020 | \n",
172 | " messaged | \n",
173 | " queue_position | \n",
174 | " queue_length | \n",
175 | " queue_id | \n",
176 | " voted_2018 | \n",
177 | " voted_2016 | \n",
178 | " is_Democrat | \n",
179 | " is_Republican | \n",
180 | " is_Male | \n",
181 | " is_Female | \n",
182 | " is_Married | \n",
183 | " is_Urban | \n",
184 | " is_Rural | \n",
185 | " is_Battleground | \n",
186 | " age | \n",
187 | "
\n",
188 | " \n",
189 | " \n",
190 | " \n",
191 | " 0 | \n",
192 | " 1 | \n",
193 | " 1 | \n",
194 | " 1.0 | \n",
195 | " 19.0 | \n",
196 | " xgcwm279xcwkxjq4zxabo | \n",
197 | " 0 | \n",
198 | " 0 | \n",
199 | " 0 | \n",
200 | " 0 | \n",
201 | " 0 | \n",
202 | " 1 | \n",
203 | " 0 | \n",
204 | " 0 | \n",
205 | " 0 | \n",
206 | " 1 | \n",
207 | " 68.0 | \n",
208 | "
\n",
209 | " \n",
210 | " 1 | \n",
211 | " 1 | \n",
212 | " 1 | \n",
213 | " 6.0 | \n",
214 | " 19.0 | \n",
215 | " xgcwm279xcwkxjq4zxabo | \n",
216 | " 1 | \n",
217 | " 1 | \n",
218 | " 1 | \n",
219 | " 0 | \n",
220 | " 1 | \n",
221 | " 0 | \n",
222 | " 0 | \n",
223 | " 0 | \n",
224 | " 0 | \n",
225 | " 0 | \n",
226 | " 52.0 | \n",
227 | "
\n",
228 | " \n",
229 | " 2 | \n",
230 | " 0 | \n",
231 | " 1 | \n",
232 | " 8.0 | \n",
233 | " 19.0 | \n",
234 | " xgcwm279xcwkxjq4zxabo | \n",
235 | " 1 | \n",
236 | " 0 | \n",
237 | " 1 | \n",
238 | " 0 | \n",
239 | " 0 | \n",
240 | " 1 | \n",
241 | " 0 | \n",
242 | " 0 | \n",
243 | " 0 | \n",
244 | " 0 | \n",
245 | " 26.0 | \n",
246 | "
\n",
247 | " \n",
248 | " 3 | \n",
249 | " 1 | \n",
250 | " 1 | \n",
251 | " 10.0 | \n",
252 | " 19.0 | \n",
253 | " xgcwm279xcwkxjq4zxabo | \n",
254 | " 1 | \n",
255 | " 1 | \n",
256 | " 0 | \n",
257 | " 0 | \n",
258 | " 1 | \n",
259 | " 0 | \n",
260 | " 0 | \n",
261 | " 0 | \n",
262 | " 0 | \n",
263 | " 1 | \n",
264 | " 39.0 | \n",
265 | "
\n",
266 | " \n",
267 | " 4 | \n",
268 | " 1 | \n",
269 | " 1 | \n",
270 | " 18.0 | \n",
271 | " 19.0 | \n",
272 | " xgcwm279xcwkxjq4zxabo | \n",
273 | " 0 | \n",
274 | " 1 | \n",
275 | " 1 | \n",
276 | " 0 | \n",
277 | " 0 | \n",
278 | " 1 | \n",
279 | " 0 | \n",
280 | " 0 | \n",
281 | " 0 | \n",
282 | " 1 | \n",
283 | " 23.0 | \n",
284 | "
\n",
285 | " \n",
286 | " ... | \n",
287 | " ... | \n",
288 | " ... | \n",
289 | " ... | \n",
290 | " ... | \n",
291 | " ... | \n",
292 | " ... | \n",
293 | " ... | \n",
294 | " ... | \n",
295 | " ... | \n",
296 | " ... | \n",
297 | " ... | \n",
298 | " ... | \n",
299 | " ... | \n",
300 | " ... | \n",
301 | " ... | \n",
302 | " ... | \n",
303 | "
\n",
304 | " \n",
305 | " 81199 | \n",
306 | " 1 | \n",
307 | " 0 | \n",
308 | " 5.0 | \n",
309 | " 5.0 | \n",
310 | " 0j3f3gyc3qspaukludfcpy4 | \n",
311 | " 1 | \n",
312 | " 1 | \n",
313 | " 0 | \n",
314 | " 0 | \n",
315 | " 0 | \n",
316 | " 1 | \n",
317 | " 1 | \n",
318 | " 0 | \n",
319 | " 0 | \n",
320 | " 1 | \n",
321 | " 54.0 | \n",
322 | "
\n",
323 | " \n",
324 | " 81200 | \n",
325 | " 1 | \n",
326 | " 0 | \n",
327 | " 2.0 | \n",
328 | " 3.0 | \n",
329 | " ixyu0l548p9dextzgjspdr | \n",
330 | " 0 | \n",
331 | " 1 | \n",
332 | " 0 | \n",
333 | " 0 | \n",
334 | " 0 | \n",
335 | " 1 | \n",
336 | " 0 | \n",
337 | " 0 | \n",
338 | " 0 | \n",
339 | " 0 | \n",
340 | " 36.0 | \n",
341 | "
\n",
342 | " \n",
343 | " 81201 | \n",
344 | " 0 | \n",
345 | " 0 | \n",
346 | " 1.0 | \n",
347 | " 2.0 | \n",
348 | " ghclqszfr6qvz2i5yesrwo | \n",
349 | " 0 | \n",
350 | " 1 | \n",
351 | " 0 | \n",
352 | " 0 | \n",
353 | " 0 | \n",
354 | " 1 | \n",
355 | " 0 | \n",
356 | " 0 | \n",
357 | " 0 | \n",
358 | " 1 | \n",
359 | " 90.0 | \n",
360 | "
\n",
361 | " \n",
362 | " 81202 | \n",
363 | " 1 | \n",
364 | " 1 | \n",
365 | " 2.0 | \n",
366 | " 9.0 | \n",
367 | " sscqhe1ttlh7fmkmuk231f | \n",
368 | " 1 | \n",
369 | " 1 | \n",
370 | " 0 | \n",
371 | " 0 | \n",
372 | " 1 | \n",
373 | " 0 | \n",
374 | " 1 | \n",
375 | " 0 | \n",
376 | " 0 | \n",
377 | " 1 | \n",
378 | " 72.0 | \n",
379 | "
\n",
380 | " \n",
381 | " 81203 | \n",
382 | " 0 | \n",
383 | " 1 | \n",
384 | " 3.0 | \n",
385 | " 3.0 | \n",
386 | " 41tse6pfdjlw0kwcm7rbo | \n",
387 | " 0 | \n",
388 | " 0 | \n",
389 | " 0 | \n",
390 | " 0 | \n",
391 | " 1 | \n",
392 | " 0 | \n",
393 | " 0 | \n",
394 | " 0 | \n",
395 | " 0 | \n",
396 | " 1 | \n",
397 | " 72.0 | \n",
398 | "
\n",
399 | " \n",
400 | "
\n",
401 | "
81204 rows × 16 columns
\n",
402 | "
\n",
403 | "
\n",
413 | " \n",
414 | " \n",
451 | "\n",
452 | " \n",
476 | "
\n",
477 | "
\n",
478 | " "
479 | ]
480 | },
481 | "metadata": {},
482 | "execution_count": 3
483 | }
484 | ],
485 | "source": [
486 | "outvote = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/outvote_2020_data.csv')\n",
487 | "outvote"
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {
493 | "id": "wksy28PnqSGU"
494 | },
495 | "source": [
496 | "**Data description:**\n",
497 | "* Core variables:\n",
498 | " * `voted_2020`: (binary) the outcome; whether subject voted in 2020 election\n",
499 | " * `messaged`: (binary) the treatment; whether subject was messaged by an Outvote user\n",
500 | " * `queue_position`: (ordinal / pos int / 1+) the instrument; the subject's position/ranking in the user's queue; this is randomized\n",
501 | " * `queue_length`: (ordinal / pos int / 2+) the block; how long the user's queue was; this is endogenous / non-randomized and must be blocked on\n",
502 | "\n",
503 | "* Optional / additional variables: \n",
504 | " * `queue_id`: (int) a unique identifier for the queue the subject was in\n",
505 | " * `voted_2018`: (binary) whether subject voted in 2018 election\n",
506 | " * `voted_2016`: (binary) whether subject voted in 2016 election\n",
507 | " * `is_Democrat`: (binary) whether subject is registered Democrat\n",
508 | " * `is_Republican`: (binary) whether subject is registered Republican\n",
509 | " * `is_Male`: (binary) whether subject identifies as Male\n",
510 | " * `is_Female`: (binary) whether subject identifies as Female\n",
511 | " * `is_Married`: (binary) whether subject is married\n",
512 | " * `is_Urban`: (binary) whether subject lives in a city\n",
513 | " * `is_Rural`: (binary) whether subject lives in a rural area\n",
514 | " * `is_Battleground`: (binary) whether subject is registered in a battleground state\n",
515 | " * `age`: (pos int, 18+), subject's age"
516 | ]
517 | },
518 | {
519 | "cell_type": "code",
520 | "execution_count": 4,
521 | "metadata": {
522 | "colab": {
523 | "base_uri": "https://localhost:8080/"
524 | },
525 | "id": "6CgdtoZr7uvL",
526 | "outputId": "39c90551-18ee-4dcd-b9d1-a8970900cf62"
527 | },
528 | "outputs": [
529 | {
530 | "output_type": "execute_result",
531 | "data": {
532 | "text/plain": [
533 | "43357"
534 | ]
535 | },
536 | "metadata": {},
537 | "execution_count": 4
538 | }
539 | ],
540 | "source": [
541 | "# select a cutoff K = 37 for the instrument\n",
542 | "# we only consider those queues with length > K\n",
543 | "outvote = outvote.loc[outvote.queue_length>37].reset_index()\n",
544 | "outvote['queue_position'] = (outvote['queue_position'] <= 37.0).astype(int)\n",
545 | "len(outvote)"
546 | ]
547 | },
548 | {
549 | "cell_type": "code",
550 | "execution_count": 5,
551 | "metadata": {
552 | "id": "2AC9TPko-hbt"
553 | },
554 | "outputs": [],
555 | "source": [
556 | "outcome = outvote['voted_2020']\n",
557 | "treatment = outvote['messaged']\n",
558 | "instrument = outvote['queue_position']\n",
559 | "block = outvote['queue_length']"
560 | ]
561 | },
562 | {
563 | "cell_type": "markdown",
564 | "metadata": {
565 | "id": "C576dWRsa3ad"
566 | },
567 | "source": [
568 | "## Specify Nuisance Function Models\n",
569 | "\n",
570 | "The next step is to specify models for \n",
571 | "\n",
572 | "* $\\mu(z,x)=\\mathbb{E}(Y|z,x)$\n",
573 | "* $m(z,x) = P(A=1|z,x)$\n",
574 | "* $p(x) = P(Z=1|x)$\n",
575 | "\n"
576 | ]
577 | },
578 | {
579 | "cell_type": "code",
580 | "execution_count": 6,
581 | "metadata": {
582 | "colab": {
583 | "base_uri": "https://localhost:8080/"
584 | },
585 | "id": "qyOhSZRQRb8W",
586 | "outputId": "4819a4f0-8268-45e2-c667-7b0c6055d4d5"
587 | },
588 | "outputs": [
589 | {
590 | "output_type": "stream",
591 | "name": "stdout",
592 | "text": [
593 | "Test Cross Entropy of fit model 0.5250216548208155\n",
594 | "Test Cross Entropy of no-covariate model 0.5270933091701285\n"
595 | ]
596 | }
597 | ],
598 | "source": [
599 | "from sklearn.neighbors import KNeighborsClassifier\n",
600 | "# specify a model for mu(z,x)\n",
601 | "\n",
602 | "# make a function that returns a sklearn model for later use in k-folding\n",
603 | "def make_mu_model():\n",
604 | " return KNeighborsClassifier(n_neighbors=300)\n",
605 | "mu_model = make_mu_model()\n",
606 | "\n",
607 | "# Sanity check that chosen model actually improves test error\n",
608 | "# A real analysis should give substantial attention to model selection and validation \n",
609 | "\n",
610 | "X_zx = outvote[['queue_position','queue_length']].copy()\n",
611 | "\n",
612 | "X_train, X_test, y_train, y_test = train_test_split(X_zx, outcome, test_size=0.2)\n",
613 | "mu_model.fit(X_train, y_train)\n",
614 | "y_pred = mu_model.predict_proba(X_test)[:,1]\n",
615 | "\n",
616 | "test_ce=log_loss(y_test, y_pred)\n",
617 | "print(f\"Test Cross Entropy of fit model {test_ce}\") \n",
618 | "baseline_ce=log_loss(y_test, y_train.mean()*np.ones_like(y_test))\n",
619 | "print(f\"Test Cross Entropy of no-covariate model {baseline_ce}\")"
620 | ]
621 | },
622 | {
623 | "cell_type": "code",
624 | "execution_count": 7,
625 | "metadata": {
626 | "colab": {
627 | "base_uri": "https://localhost:8080/"
628 | },
629 | "id": "uq6eZEBXbsaI",
630 | "outputId": "c87bc82f-22be-49f6-af8b-058eec588403"
631 | },
632 | "outputs": [
633 | {
634 | "output_type": "stream",
635 | "name": "stdout",
636 | "text": [
637 | "Test CE of fit model 0.6398686068177049\n",
638 | "Test CE of no-covariate model 0.6714289850271291\n"
639 | ]
640 | }
641 | ],
642 | "source": [
643 | "# specify a model for m(z,x)\n",
644 | "\n",
645 | "def make_m_model():\n",
646 | " return LogisticRegression(max_iter=1000, warm_start=True, random_state=RANDOM_SEED)\n",
647 | "\n",
648 | "m_model = make_m_model()\n",
649 | "# Sanity check that chosen model actually improves test error\n",
650 | "# A real analysis should give substantial attention to model selection and validation \n",
651 | "\n",
652 | "X_train, X_test, a_train, a_test = train_test_split(X_zx, treatment, test_size=0.2)\n",
653 | "m_model.fit(X_train, a_train)\n",
654 | "a_pred = m_model.predict_proba(X_test)[:,1]\n",
655 | "\n",
656 | "test_ce=log_loss(a_test, a_pred)\n",
657 | "print(f\"Test CE of fit model {test_ce}\") \n",
658 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
659 | "print(f\"Test CE of no-covariate model {baseline_ce}\")"
660 | ]
661 | },
662 | {
663 | "cell_type": "code",
664 | "execution_count": 8,
665 | "metadata": {
666 | "colab": {
667 | "base_uri": "https://localhost:8080/"
668 | },
669 | "id": "pg-7pFAYxRQ5",
670 | "outputId": "cf0a1fb1-31dd-4c9f-c5df-b8987d66a7c4"
671 | },
672 | "outputs": [
673 | {
674 | "output_type": "stream",
675 | "name": "stdout",
676 | "text": [
677 | "Test CE of fit model 0.5303714147556886\n",
678 | "Test CE of no-covariate model 0.6810169611354872\n"
679 | ]
680 | }
681 | ],
682 | "source": [
683 | "def make_p_model():\n",
684 | " return RandomForestClassifier(n_estimators=200, max_depth=5)\n",
685 | "\n",
686 | "p_model = make_p_model()\n",
687 | "# Sanity check that chosen model actually improves test error\n",
688 | "# A real analysis should give substantial attention to model selection and validation \n",
689 | "\n",
690 | "X_train, X_test, Z_train, Z_test = train_test_split(block.to_frame(), instrument, test_size=0.2)\n",
691 | "p_model.fit(X_train, Z_train)\n",
692 | "Z_pred = p_model.predict_proba(X_test)[:,1]\n",
693 | "\n",
694 | "test_ce=log_loss(Z_test, Z_pred)\n",
695 | "print(f\"Test CE of fit model {test_ce}\") \n",
696 | "baseline_ce=log_loss(Z_test, Z_train.mean()*np.ones_like(Z_test))\n",
697 | "print(f\"Test CE of no-covariate model {baseline_ce}\")"
698 | ]
699 | },
700 | {
701 | "cell_type": "markdown",
702 | "metadata": {
703 | "id": "2RkvV_4_dFWo"
704 | },
705 | "source": [
706 | "## Use cross fitting to get predicted $\\hat{\\mu}$, $\\hat{m}$, $\\hat{p}$ for each unit"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": 9,
712 | "metadata": {
713 | "id": "KA0AsEGJ_X3b"
714 | },
715 | "outputs": [],
716 | "source": [
717 | "# helper functions to implement the cross fitting\n",
718 | "\n",
719 | "def p_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, n_splits:int):\n",
720 | " \"\"\"\n",
721 | " Implements K fold cross-fitting for the model predicting the instrument Z. \n",
722 | " That is, \n",
723 | " 1. Split data into K folds\n",
724 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
725 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
726 | " Returns an array containing the predictions \n",
727 | "\n",
728 | " Args:\n",
729 | " model: function that returns sklearn model (which implements fit and predict_prob)\n",
730 | " X: dataframe of variables to adjust for\n",
731 | " Z: array of instruments\n",
732 | " n_splits: number of splits to use\n",
733 | " \"\"\"\n",
734 | " predictions = np.full_like(Z, np.nan, dtype=float)\n",
735 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
736 | " \n",
737 | " for train_index, test_index in kf.split(X, Z):\n",
738 | " X_train = X.loc[train_index]\n",
739 | " Z_train = Z.loc[train_index]\n",
740 | " g = make_model()\n",
741 | " g.fit(X_train, Z_train)\n",
742 | "\n",
743 | " # get predictions for split\n",
744 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
745 | "\n",
746 | " assert np.isnan(predictions).sum() == 0\n",
747 | " return predictions\n",
748 | "\n",
749 | "\n",
750 | "def m_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, A:np.array, n_splits:int):\n",
751 | " \"\"\"\n",
752 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n",
753 | " That is, \n",
754 | " 1. Split data into K folds\n",
755 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
756 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
757 | " Returns two arrays containing the predictions for all units untreated, all units treated \n",
758 | "\n",
759 | " Args:\n",
760 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
761 | " X: dataframe of variables to adjust for\n",
762 | " Z: array of instruments\n",
763 | " A: array of treatments\n",
764 | " n_splits: number of splits to use\n",
765 | " \"\"\"\n",
766 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n",
767 | " predictions1 = np.full_like(A, np.nan, dtype=float)\n",
768 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
769 | "\n",
770 | " # include the treatment as input feature\n",
771 | " X_zx = X.copy()\n",
772 | " X_zx[\"Z\"] = Z\n",
773 | "\n",
774 | " # for predicting A under Z=1 / Z=0 status for each data point \n",
775 | " X0 = X_zx.copy()\n",
776 | " X0[\"Z\"] = 0\n",
777 | " X1 = X_zx.copy()\n",
778 | " X1[\"Z\"] = 1\n",
779 | " \n",
780 | " for train_index, test_index in kf.split(X_zx, A):\n",
781 | " X_train = X_zx.loc[train_index]\n",
782 | " A_train = A.loc[train_index]\n",
783 | " m = make_model()\n",
784 | " m.fit(X_train, A_train)\n",
785 | " predictions0[test_index] = m.predict_proba(X0.loc[test_index])[:,1]\n",
786 | " predictions1[test_index] = m.predict_proba(X1.loc[test_index])[:,1]\n",
787 | "\n",
788 | " assert np.isnan(predictions0).sum() == 0\n",
789 | " assert np.isnan(predictions1).sum() == 0\n",
790 | " return predictions0, predictions1\n",
791 | "\n",
792 | "def mu_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, y:np.array, n_splits:int, output_type:str):\n",
793 | " \"\"\"\n",
794 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n",
795 | " That is, \n",
796 | " 1. Split data into K folds\n",
797 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
798 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
799 | " Returns two arrays containing the predictions for all units untreated, all units treated \n",
800 | "\n",
801 | " Args:\n",
802 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
803 | " X: dataframe of variables to adjust for\n",
804 | " Z: array of instruments\n",
805 | " y: array of outcomes\n",
806 | " n_splits: number of splits to use\n",
807 | " output_type: type of outcome, \"binary\" or \"continuous\"\n",
808 | "\n",
809 | " \"\"\"\n",
810 | " predictions0 = np.full_like(y, np.nan, dtype=float)\n",
811 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n",
812 | " if output_type == 'binary':\n",
813 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
814 | " elif output_type == 'continuous':\n",
815 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
816 | "\n",
817 | " # include the treatment as input feature\n",
818 | " X_zx = X.copy()\n",
819 | " X_zx[\"Z\"] = Z\n",
820 | "\n",
821 | " # for predicting effect under treatment / control status for each data point \n",
822 | " X0 = X_zx.copy()\n",
823 | " X0[\"Z\"] = 0\n",
824 | " X1 = X_zx.copy()\n",
825 | " X1[\"Z\"] = 1\n",
826 | "\n",
827 | " \n",
828 | " for train_index, test_index in kf.split(X_zx, y):\n",
829 | " X_train = X_zx.loc[train_index]\n",
830 | " y_train = y.loc[train_index]\n",
831 | " mu = make_model()\n",
832 | " mu.fit(X_train, y_train)\n",
833 | "\n",
834 | " if output_type =='binary':\n",
835 | " predictions0[test_index] = mu.predict_proba(X0.loc[test_index])[:, 1]\n",
836 | " predictions1[test_index] = mu.predict_proba(X1.loc[test_index])[:, 1]\n",
837 | " elif output_type == 'continuous':\n",
838 | " predictions0[test_index] = mu.predict(X0.loc[test_index])\n",
839 | " predictions1[test_index] = mu.predict(X1.loc[test_index])\n",
840 | "\n",
841 | " assert np.isnan(predictions0).sum() == 0\n",
842 | " assert np.isnan(predictions1).sum() == 0\n",
843 | " return predictions0, predictions1"
844 | ]
845 | },
846 | {
847 | "cell_type": "code",
848 | "execution_count": 10,
849 | "metadata": {
850 | "id": "wVcE6pRQeMNf"
851 | },
852 | "outputs": [],
853 | "source": [
854 | "p = p_k_fold_fit_and_predict(make_p_model, X=block.to_frame(), Z=instrument, n_splits=10)"
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": 11,
860 | "metadata": {
861 | "id": "GLEHlLLdWSh9"
862 | },
863 | "outputs": [],
864 | "source": [
865 | "m0,m1= m_k_fold_fit_and_predict(make_m_model, X=block.to_frame(), Z=instrument, A=treatment, n_splits=10)"
866 | ]
867 | },
868 | {
869 | "cell_type": "code",
870 | "source": [
871 | "# check relevance\n",
872 | "from matplotlib.pyplot import hist\n",
873 | "hist(m1-m0, density=True)"
874 | ],
875 | "metadata": {
876 | "id": "jpXU9DK26d6c",
877 | "outputId": "72e0d30c-46e6-48b0-fd8a-318b8b88c26d",
878 | "colab": {
879 | "base_uri": "https://localhost:8080/",
880 | "height": 390
881 | }
882 | },
883 | "execution_count": 18,
884 | "outputs": [
885 | {
886 | "output_type": "execute_result",
887 | "data": {
888 | "text/plain": [
889 | "(array([3.97620728e-02, 3.77739692e-02, 9.34408711e-02, 2.04774675e-01,\n",
890 | " 3.37977619e-02, 0.00000000e+00, 0.00000000e+00, 3.61834863e-01,\n",
891 | " 5.95297873e+01, 2.58970380e+01]),\n",
892 | " array([0.04620174, 0.05780291, 0.06940408, 0.08100525, 0.09260642,\n",
893 | " 0.10420759, 0.11580876, 0.12740993, 0.13901109, 0.15061226,\n",
894 | " 0.16221343]),\n",
895 | " )"
896 | ]
897 | },
898 | "metadata": {},
899 | "execution_count": 18
900 | },
901 | {
902 | "output_type": "display_data",
903 | "data": {
904 | "text/plain": [
905 | ""
906 | ],
907 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOiUlEQVR4nO3dfYxldX3H8fdHBopglUWmmy1oByJoSCpgRwrR2pQtlJZGaEIoStvVkuwf1samtnatf7XpH9DaWhKbNhuxDo0WcCuFiAE2K8S0scggzyzKQhZdurDDUxWaVBe//eOelcns7M7duQ/Db3y/kpt7Hn7nnu839/LZM+fec0hVIUlqz2tWugBJ0vIY4JLUKANckhplgEtSowxwSWrUxDh3dtxxx9XU1NQ4dylJzbv77rufqarJhcvHGuBTU1PMzs6Oc5eS1LwkTyy23FMoktQoA1ySGmWAS1KjDHBJalRfAZ7kmCRbkjySZHuSs5Mcm2Rrkke75zWjLlaS9Ip+j8CvAm6pqrcBpwHbgU3Atqo6GdjWzUuSxmTJAE/yBuA9wNUAVfWDqnoBuBCY6YbNABeNqkhJ0v76OQI/EZgD/jnJPUk+k+RoYG1V7e7GPAWsXWzjJBuTzCaZnZubG07VkqS+AnwCeAfwj1V1BvASC06XVO+m4oveWLyqNlfVdFVNT07udyGRJGmZ+rkScxewq6ru7Oa30Avwp5Osq6rdSdYBe0ZVpKTVZWrTzSuy351XXLAi+x2VJY/Aq+op4LtJ3totWg88DNwEbOiWbQBuHEmFkqRF9XsvlD8EPp/kCOBx4IP0wv/6JJcDTwCXjKZESdJi+grwqroXmF5k1frhliNJ6pdXYkpSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWrURD+DkuwEvg+8DOytqukkxwLXAVPATuCSqnp+NGVKkhY6lCPwX6mq06tqupvfBGyrqpOBbd28JGlMBjmFciEw003PABcNXo4kqV/9BngBtyW5O8nGbtnaqtrdTT8FrF1swyQbk8wmmZ2bmxuwXEnSPn2dAwfeXVVPJvkZYGuSR+avrKpKUottWFWbgc0A09PTi46RJB26vo7Aq+rJ7nkPcANwJvB0knUA3fOeURUpSdrfkgGe5OgkP71vGjgPeBC4CdjQDdsA3DiqIiVJ++vnFMpa4IYk+8Z/oapuSXIXcH2Sy4EngEtGV6YkaaElA7yqHgdOW2T5s8D6URQlSVqaV2JKUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqP6DvAkhyW5J8mXu/kTk9yZZEeS65IcMboyJUkLHcoR+EeA7fPmrwQ+VVVvAZ4HLh9mYZKkg+srwJOcAFwAfKabD3AOsKUbMgNcNIoCJUmL6/cI/O+BjwE/6ubfCLxQVXu7+V3A8UOuTZJ0EEsGeJLfBPZU1d3L2UGSjUlmk8zOzc0t5yUkSYvo5wj8XcB7k+wErqV36uQq4JgkE92YE4AnF9u4qjZX1XRVTU9OTg6hZEkS9BHgVfXxqjqhqqaAS4GvVtVlwO3Axd2wDcCNI6tSkrSfQX4H/mfAHyfZQe+c+NXDKUmS1I+JpYe8oqruAO7oph8Hzhx+SZKkfnglpiQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJjkzyjST3JXkoyV90y09McmeSHUmuS3LE6MuVJO3TzxH4/wHnVNVpwOnA+UnOAq4EPlVVbwGeBy4fXZmSpIWWDPDqebGbPbx7FHAOsKVbPgNcNJIKJUmL6usceJLDktwL7AG2Ao8BL1TV3m7ILuD4A2y7Mclsktm5ublh1CxJos8Ar6qXq+p04ATgTOBt/e6gqjZX1XRVTU9OTi6zTEnSQof0K5SqegG4HTgbOCbJRLfqBODJIdcmSTqIfn6FMpnkmG76tcC5wHZ6QX5xN2wDcOOoipQk7W9i6SGsA2aSHEYv8K+vqi8neRi4NslfAfcAV4+wTknSAksGeFXdD5yxyPLH6Z0PlyStAK/ElKRGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRvVzKb0krQpTm25ekf3uvOKCkbyuR+CS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJ3pTk9iQPJ3koyUe65ccm2Zrk0e55zejLlSTt088R+F7go1V1KnAW8AdJTgU2Aduq6mRgWzcvSRqTJQO8qnZX1Te76e8D24HjgQuBmW7YDHDRqIqUJO3vkM6BJ5kCzgDuBNZW1e5u1VPA2gNsszHJbJLZubm5AUqVJM3Xd4AneR3wb8AfVdX35q+rqgJqse2qanNVTVfV9OTk5EDFSpJe0VeAJzmcXnh/vqq+1C1+Osm6bv06YM9oSpQkLaafX6EEuBrYXlV/N2/VTcCGbnoDcOPwy5MkHchEH2PeBfwu8ECSe7tlfw5cAVyf5HLgCeCS0ZQoSVrMkgFeVf8B5ACr1w+3HElSv7wSU5IaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqOWDPAkn02yJ8mD85Ydm2Rrkke75zWjLVOStFA/R+CfA85fsGwTsK2qTga2dfOSpDFaMsCr6mvAcwsWXwjMdNMzwEVDrkuStITlngNfW1W7u+mngLUHGphkY5LZJLNzc3PL3J0kaaGBv8SsqgLqIOs3V9V0VU1PTk4OujtJUme5Af50knUA3fOe4ZUkSerHcgP8JmBDN70BuHE45UiS+tXPzwj/Ffg68NYku5JcDlwBnJvkUeBXu3lJ0hhNLDWgqt53gFXrh1yLJOkQeCWmJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRg0U4EnOT/KtJDuSbBpWUZKkpS07wJMcBvwD8OvAqcD7kpw6rMIkSQc3McC2ZwI7qupxgCTXAhcCDw+jMMHUpptXuoSx23nFBStdwk+Mn8TP12ozSIAfD3x33vwu4BcXDkqyEdjYzb6Y5FsD7PPV5DjgmZUuYshWvKdcOdSXW/F+RmC19bTa+oFFehrC5/rnFls4SID3pao2A5tHvZ9xSzJbVdMrXccwrbaeVls/sPp6Wm39wHh7GuRLzCeBN82bP6FbJkkag0EC/C7g5CQnJjkCuBS4aThlSZKWsuxTKFW1N8mHgVuBw4DPVtVDQ6vs1W/VnRZi9fW02vqB1dfTausHxthTqmpc+5IkDZFXYkpSowxwSWqUAb6IpW4RkOSnklzXrb8zydS8dW9P8vUkDyV5IMmR46x9McvtJ8nhSWa6PrYn+fi4az+QPnp6T5JvJtmb5OIF6zYkebR7bBhf1Qe23H6SnD7v83Z/kt8eb+UHNsh71K1/fZJdST49nooPbsDP3JuT3Nb9d/Tw/MwYSFX5mPeg94XsY8BJwBHAfcCpC8Z8CPinbvpS4LpuegK4Hzitm38jcFjD/bwfuLabPgrYCUw18h5NAW8HrgEunrf8WODx7nlNN72m4X5OAU7upn8W2A0c0/J7NG/9VcAXgE+33g9wB3BuN/064Khh1OUR+P5+fIuAqvoBsO8WAfNdCMx001uA9UkCnAfcX1X3AVTVs1X18pjqPpBB+ing6CQTwGuBHwDfG0/ZB7VkT1W1s6ruB360YNtfA7ZW1XNV9TywFTh/HEUfxLL7qapvV9Wj3fR/A3uAyfGUfVCDvEck+QVgLXDbOIrtw7L76e4RNVFVW7txL1bV/w6jKAN8f4vdIuD4A42pqr3A/9A72j4FqCS3dn9KfWwM9S5lkH62AC/RO6r7DvDJqnpu1AX3oZ+eRrHtqAylpiRn0js6fGxIdQ1i2T0leQ3wt8CfjKCu5RrkPToFeCHJl5Lck+RvupsBDswAH64J4N3AZd3zbyVZv7IlDeRM4GV6f5qfCHw0yUkrW5IWk2Qd8C/AB6tqvyPaxnwI+EpV7VrpQoZkAvglev8gvZPeaZgPDOOFDfD99XOLgB+P6U4vvAF4lt6/yl+rqme6P5G+Arxj5BUf3CD9vB+4pap+WFV7gP8EXg33rRjkNg6vxltADFRTktcDNwOfqKr/GnJtyzVIT2cDH06yE/gk8HtJrhhueYdskH52Afd2p1/2Av/OkHLBAN9fP7cIuAnY9+uFi4GvVu/biVuBn09yVBeEv8zK3153kH6+A5wDkORo4CzgkbFUfXCD3MbhVuC8JGuSrKH3vcWtI6qzX8vupxt/A3BNVW0ZYY2Hatk9VdVlVfXmqpqid9R6TVWt9P8wZpDP3F3AMUn2fTdxDsPKhZX+dvfV+AB+A/g2vXOJn+iW/SXw3m76SOCLwA7gG8BJ87b9HeAh4EHgr1e6l0H6ofdt+Re7fh4G/nSlezmEnt5J78jnJXp/TTw0b9vf73rdQe+UQ7P9dJ+3HwL3znucvtL9DPoezXuND/Aq+BXKED5z59L7hdoDwOeAI4ZRk5fSS1KjPIUiSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1Kj/h9VXn6+L6tiOAAAAABJRU5ErkJggg==\n"
908 | },
909 | "metadata": {
910 | "needs_background": "light"
911 | }
912 | }
913 | ]
914 | },
915 | {
916 | "cell_type": "code",
917 | "execution_count": 13,
918 | "metadata": {
919 | "id": "kQ7s8zWV7IgR"
920 | },
921 | "outputs": [],
922 | "source": [
923 | "mu0,mu1= mu_k_fold_fit_and_predict(make_mu_model, X=block.to_frame(), Z=instrument, y=outcome, n_splits=10, output_type=\"binary\")"
924 | ]
925 | },
926 | {
927 | "cell_type": "code",
928 | "execution_count": 14,
929 | "metadata": {
930 | "colab": {
931 | "base_uri": "https://localhost:8080/",
932 | "height": 206
933 | },
934 | "id": "_NVCV0q0g8wQ",
935 | "outputId": "d0a90b3d-966c-4830-81c8-f73c71262552"
936 | },
937 | "outputs": [
938 | {
939 | "output_type": "execute_result",
940 | "data": {
941 | "text/plain": [
942 | " p mu0 mu1 m1 m0 Z A Y\n",
943 | "0 0.147581 0.833333 0.843333 0.668028 0.513991 0 0 0\n",
944 | "1 0.143757 0.826667 0.830000 0.667640 0.513672 0 0 1\n",
945 | "2 0.143383 0.823333 0.836667 0.668948 0.512616 0 0 1\n",
946 | "3 0.143757 0.826667 0.836667 0.667598 0.514286 0 0 1\n",
947 | "4 0.143383 0.826667 0.830000 0.667870 0.513021 0 0 0"
948 | ],
949 | "text/html": [
950 | "\n",
951 | " \n",
952 | "
\n",
953 | "
\n",
954 | "\n",
967 | "
\n",
968 | " \n",
969 | " \n",
970 | " | \n",
971 | " p | \n",
972 | " mu0 | \n",
973 | " mu1 | \n",
974 | " m1 | \n",
975 | " m0 | \n",
976 | " Z | \n",
977 | " A | \n",
978 | " Y | \n",
979 | "
\n",
980 | " \n",
981 | " \n",
982 | " \n",
983 | " 0 | \n",
984 | " 0.147581 | \n",
985 | " 0.833333 | \n",
986 | " 0.843333 | \n",
987 | " 0.668028 | \n",
988 | " 0.513991 | \n",
989 | " 0 | \n",
990 | " 0 | \n",
991 | " 0 | \n",
992 | "
\n",
993 | " \n",
994 | " 1 | \n",
995 | " 0.143757 | \n",
996 | " 0.826667 | \n",
997 | " 0.830000 | \n",
998 | " 0.667640 | \n",
999 | " 0.513672 | \n",
1000 | " 0 | \n",
1001 | " 0 | \n",
1002 | " 1 | \n",
1003 | "
\n",
1004 | " \n",
1005 | " 2 | \n",
1006 | " 0.143383 | \n",
1007 | " 0.823333 | \n",
1008 | " 0.836667 | \n",
1009 | " 0.668948 | \n",
1010 | " 0.512616 | \n",
1011 | " 0 | \n",
1012 | " 0 | \n",
1013 | " 1 | \n",
1014 | "
\n",
1015 | " \n",
1016 | " 3 | \n",
1017 | " 0.143757 | \n",
1018 | " 0.826667 | \n",
1019 | " 0.836667 | \n",
1020 | " 0.667598 | \n",
1021 | " 0.514286 | \n",
1022 | " 0 | \n",
1023 | " 0 | \n",
1024 | " 1 | \n",
1025 | "
\n",
1026 | " \n",
1027 | " 4 | \n",
1028 | " 0.143383 | \n",
1029 | " 0.826667 | \n",
1030 | " 0.830000 | \n",
1031 | " 0.667870 | \n",
1032 | " 0.513021 | \n",
1033 | " 0 | \n",
1034 | " 0 | \n",
1035 | " 0 | \n",
1036 | "
\n",
1037 | " \n",
1038 | "
\n",
1039 | "
\n",
1040 | "
\n",
1050 | " \n",
1051 | " \n",
1088 | "\n",
1089 | " \n",
1113 | "
\n",
1114 | "
\n",
1115 | " "
1116 | ]
1117 | },
1118 | "metadata": {},
1119 | "execution_count": 14
1120 | }
1121 | ],
1122 | "source": [
1123 | "data_and_nuisance_estimates = pd.DataFrame({'p': p, 'mu0': mu0, 'mu1': mu1, 'm1': m1, 'm0': m0,\n",
1124 | " 'Z': instrument, 'A': treatment, 'Y': outcome})\n",
1125 | "data_and_nuisance_estimates.head()"
1126 | ]
1127 | },
1128 | {
1129 | "cell_type": "markdown",
1130 | "metadata": {
1131 | "id": "VNhM7URdgzQB"
1132 | },
1133 | "source": [
1134 | "## Combine predicted values and data into estimate of LATE"
1135 | ]
1136 | },
1137 | {
1138 | "cell_type": "code",
1139 | "execution_count": 15,
1140 | "metadata": {
1141 | "id": "Nj0veiaW4RRm"
1142 | },
1143 | "outputs": [],
1144 | "source": [
1145 | "def late_estimator(mu1, mu0, m1, m0, p, Z, A, Y, prob = None):\n",
1146 | " '''\n",
1147 | " Estimator for LATE\n",
1148 | " '''\n",
1149 | " n = len(Y)\n",
1150 | " phi_zy = mu1 - mu0 + Z*(Y-mu1)/p - (1-Z)*(Y-mu0)/(1-p)\n",
1151 | " phi_za = m1 - m0 + Z*(A-m1)/p - (1-Z)*(A-m0)/(1-p)\n",
1152 | "\n",
1153 | " tau_za = phi_za.mean()\n",
1154 | " tau_hat = phi_zy.mean()/tau_za\n",
1155 | " phi = phi_zy - phi_za * tau_hat\n",
1156 | " \n",
1157 | " std_hat = math.sqrt((phi**2).mean()/tau_za**2/n)\n",
1158 | "\n",
1159 | " return tau_hat, std_hat\n"
1160 | ]
1161 | },
1162 | {
1163 | "cell_type": "code",
1164 | "execution_count": 16,
1165 | "metadata": {
1166 | "colab": {
1167 | "base_uri": "https://localhost:8080/"
1168 | },
1169 | "id": "SjDj0F9Bm9uq",
1170 | "outputId": "262886b4-da24-4b7c-eb75-c1a609e70e6c"
1171 | },
1172 | "outputs": [
1173 | {
1174 | "output_type": "stream",
1175 | "name": "stdout",
1176 | "text": [
1177 | "The estimate is 0.047826812983712996 pm 0.07239241460155232\n"
1178 | ]
1179 | }
1180 | ],
1181 | "source": [
1182 | "tau_hat, std_hat = late_estimator(**data_and_nuisance_estimates)\n",
1183 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
1184 | ]
1185 | }
1186 | ],
1187 | "metadata": {
1188 | "colab": {
1189 | "provenance": [],
1190 | "authorship_tag": "ABX9TyMBLYRZL7Nk//toT2OnEWO8",
1191 | "include_colab_link": true
1192 | },
1193 | "kernelspec": {
1194 | "display_name": "Python 3",
1195 | "name": "python3"
1196 | },
1197 | "language_info": {
1198 | "name": "python"
1199 | }
1200 | },
1201 | "nbformat": 4,
1202 | "nbformat_minor": 0
1203 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Victor Veitch
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # causality-tutorials
2 | Short tutorials on the use of machine learning methods for causal inference
3 |
--------------------------------------------------------------------------------
/Sensitivity_Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Sensitivity_Analysis.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "language_info": {
16 | "name": "python"
17 | }
18 | },
19 | "cells": [
20 | {
21 | "cell_type": "markdown",
22 | "metadata": {
23 | "id": "view-in-github",
24 | "colab_type": "text"
25 | },
26 | "source": [
27 | "
"
28 | ]
29 | },
30 | {
31 | "cell_type": "markdown",
32 | "metadata": {
33 | "id": "QfZkNLUb4B-p"
34 | },
35 | "source": [
36 | "# Sensitivity Analysis Tutorial\n",
37 | "\n",
38 | "This tutorial gives a short example for how to assess sensitivity to unobserved confounding in causal estimation. We use the Austen plot method (https://arxiv.org/abs/2003.01747). "
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "metadata": {
44 | "colab": {
45 | "base_uri": "https://localhost:8080/"
46 | },
47 | "id": "1RrdIBTLQ9Ac",
48 | "outputId": "62c039df-51d7-4f31-a150-4b6a0e0f0b26"
49 | },
50 | "source": [
51 | "!pip install austen-plots"
52 | ],
53 | "execution_count": 218,
54 | "outputs": [
55 | {
56 | "output_type": "stream",
57 | "name": "stdout",
58 | "text": [
59 | "Requirement already satisfied: austen-plots in /usr/local/lib/python3.7/dist-packages (0.1.0)\n",
60 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.19.5)\n",
61 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.1.5)\n",
62 | "Requirement already satisfied: plotnine in /usr/local/lib/python3.7/dist-packages (from austen-plots) (0.6.0)\n",
63 | "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.0.1)\n",
64 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.4.1)\n",
65 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from austen-plots) (4.62.3)\n",
66 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2.8.2)\n",
67 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2018.9)\n",
68 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->austen-plots) (1.15.0)\n",
69 | "Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.10.2)\n",
70 | "Requirement already satisfied: descartes>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (1.1.0)\n",
71 | "Requirement already satisfied: patsy>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.5.2)\n",
72 | "Requirement already satisfied: matplotlib>=3.1.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (3.2.2)\n",
73 | "Requirement already satisfied: mizani>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.6.0)\n",
74 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (3.0.6)\n",
75 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (1.3.2)\n",
76 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (0.11.0)\n",
77 | "Requirement already satisfied: palettable in /usr/local/lib/python3.7/dist-packages (from mizani>=0.6.0->plotnine->austen-plots) (3.3.0)\n",
78 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (3.0.0)\n",
79 | "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (1.1.0)\n"
80 | ]
81 | }
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "id": "dS2X3Bq1-fxE"
88 | },
89 | "source": [
90 | "import numpy as np\n",
91 | "import pandas as pd\n",
92 | "import scipy as sp\n",
93 | "from sklearn import preprocessing\n",
94 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier\n",
95 | "from sklearn.linear_model import LogisticRegression\n",
96 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
97 | "from sklearn.metrics import mean_squared_error, log_loss\n",
98 | "import sklearn\n",
99 | "import os\n",
100 | "import pathlib\n",
101 | "\n",
102 | "from austen_plots.AustenPlot import AustenPlot"
103 | ],
104 | "execution_count": 219,
105 | "outputs": []
106 | },
107 | {
108 | "cell_type": "code",
109 | "metadata": {
110 | "id": "zNsGKVyLSRxn"
111 | },
112 | "source": [
113 | "RANDOM_SEED = 42\n",
114 | "np.random.seed(RANDOM_SEED)"
115 | ],
116 | "execution_count": 220,
117 | "outputs": []
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {
122 | "id": "H0YG8hR8RRAP"
123 | },
124 | "source": [
125 | "# Data Loading and Initial Fit\n",
126 | "\n",
127 | "Load the diastolic blood pressure data and fit models for the propensity score and conditional expected outcome model, in the same way we'd do in a standard adjustment-based treatment effect estimation. For this tutorial, we'll use random forests for both models. \n",
128 | "\n",
129 | "This section doesn't contain anything special to sensitivity analysis, and can be safely skipped if you've already read the adjustment estimation tutorial."
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {
135 | "id": "yPbJeayiEs3u"
136 | },
137 | "source": [
138 | "##Load and Format Observational Data"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "metadata": {
144 | "id": "2AC9TPko-hbt"
145 | },
146 | "source": [
147 | "nhanes = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/hbp_dbp.csv')"
148 | ],
149 | "execution_count": 221,
150 | "outputs": []
151 | },
152 | {
153 | "cell_type": "code",
154 | "metadata": {
155 | "colab": {
156 | "base_uri": "https://localhost:8080/",
157 | "height": 223
158 | },
159 | "id": "-A1LX6-t-hZD",
160 | "outputId": "2f9def4f-1348-48d2-d518-b5b5235b6e42"
161 | },
162 | "source": [
163 | "nhanes.head()"
164 | ],
165 | "execution_count": 222,
166 | "outputs": [
167 | {
168 | "output_type": "execute_result",
169 | "data": {
170 | "text/html": [
171 | "\n",
172 | "\n",
185 | "
\n",
186 | " \n",
187 | " \n",
188 | " | \n",
189 | " white | \n",
190 | " black | \n",
191 | " hisp | \n",
192 | " female | \n",
193 | " age_mo | \n",
194 | " hhsize | \n",
195 | " edu | \n",
196 | " married | \n",
197 | " widowed | \n",
198 | " divorced | \n",
199 | " separated | \n",
200 | " income | \n",
201 | " packyr | \n",
202 | " bmi | \n",
203 | " pulse | \n",
204 | " sodium | \n",
205 | " potassium | \n",
206 | " r_sodipota | \n",
207 | " alcohol | \n",
208 | " insurance | \n",
209 | " together | \n",
210 | " ave_dbp | \n",
211 | " trt_dbp | \n",
212 | "
\n",
213 | " \n",
214 | " \n",
215 | " \n",
216 | " 0 | \n",
217 | " 0 | \n",
218 | " 0 | \n",
219 | " 1 | \n",
220 | " 0 | \n",
221 | " 261 | \n",
222 | " 4 | \n",
223 | " 12 | \n",
224 | " 0 | \n",
225 | " 0 | \n",
226 | " 0 | \n",
227 | " 0 | \n",
228 | " 2.251292 | \n",
229 | " 0.0 | \n",
230 | " 25.500000 | \n",
231 | " 80 | \n",
232 | " 5216 | \n",
233 | " 4350 | \n",
234 | " 1.199080 | \n",
235 | " 0 | \n",
236 | " 0 | \n",
237 | " 24 | \n",
238 | " 60 | \n",
239 | " 0 | \n",
240 | "
\n",
241 | " \n",
242 | " 1 | \n",
243 | " 1 | \n",
244 | " 0 | \n",
245 | " 0 | \n",
246 | " 0 | \n",
247 | " 428 | \n",
248 | " 2 | \n",
249 | " 17 | \n",
250 | " 1 | \n",
251 | " 0 | \n",
252 | " 0 | \n",
253 | " 0 | \n",
254 | " 3.881564 | \n",
255 | " 0.0 | \n",
256 | " 29.400000 | \n",
257 | " 72 | \n",
258 | " 2668 | \n",
259 | " 2387 | \n",
260 | " 1.117721 | \n",
261 | " 0 | \n",
262 | " 1 | \n",
263 | " 104 | \n",
264 | " 94 | \n",
265 | " 0 | \n",
266 | "
\n",
267 | " \n",
268 | " 2 | \n",
269 | " 1 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | " 1 | \n",
273 | " 995 | \n",
274 | " 1 | \n",
275 | " 12 | \n",
276 | " 0 | \n",
277 | " 1 | \n",
278 | " 0 | \n",
279 | " 0 | \n",
280 | " 1.504077 | \n",
281 | " 0.0 | \n",
282 | " 19.100000 | \n",
283 | " 64 | \n",
284 | " 2849 | \n",
285 | " 3775 | \n",
286 | " 0.754702 | \n",
287 | " 0 | \n",
288 | " 1 | \n",
289 | " 156 | \n",
290 | " 70 | \n",
291 | " 1 | \n",
292 | "
\n",
293 | " \n",
294 | " 3 | \n",
295 | " 0 | \n",
296 | " 0 | \n",
297 | " 1 | \n",
298 | " 0 | \n",
299 | " 531 | \n",
300 | " 4 | \n",
301 | " 7 | \n",
302 | " 1 | \n",
303 | " 0 | \n",
304 | " 0 | \n",
305 | " 0 | \n",
306 | " 2.674149 | \n",
307 | " 3650.0 | \n",
308 | " 44.400002 | \n",
309 | " 92 | \n",
310 | " 3433 | \n",
311 | " 2716 | \n",
312 | " 1.263991 | \n",
313 | " 0 | \n",
314 | " 0 | \n",
315 | " 52 | \n",
316 | " 84 | \n",
317 | " 0 | \n",
318 | "
\n",
319 | " \n",
320 | " 4 | \n",
321 | " 0 | \n",
322 | " 0 | \n",
323 | " 1 | \n",
324 | " 0 | \n",
325 | " 581 | \n",
326 | " 7 | \n",
327 | " 0 | \n",
328 | " 1 | \n",
329 | " 0 | \n",
330 | " 0 | \n",
331 | " 0 | \n",
332 | " 2.602690 | \n",
333 | " 730.0 | \n",
334 | " 37.500000 | \n",
335 | " 68 | \n",
336 | " 1808 | \n",
337 | " 1883 | \n",
338 | " 0.960170 | \n",
339 | " 117 | \n",
340 | " 1 | \n",
341 | " 0 | \n",
342 | " 80 | \n",
343 | " 0 | \n",
344 | "
\n",
345 | " \n",
346 | "
\n",
347 | "
"
348 | ],
349 | "text/plain": [
350 | " white black hisp female ... insurance together ave_dbp trt_dbp\n",
351 | "0 0 0 1 0 ... 0 24 60 0\n",
352 | "1 1 0 0 0 ... 1 104 94 0\n",
353 | "2 1 0 0 1 ... 1 156 70 1\n",
354 | "3 0 0 1 0 ... 0 52 84 0\n",
355 | "4 0 0 1 0 ... 1 0 80 0\n",
356 | "\n",
357 | "[5 rows x 23 columns]"
358 | ]
359 | },
360 | "metadata": {},
361 | "execution_count": 222
362 | }
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "metadata": {
368 | "id": "fW3FdK8rJNHM"
369 | },
370 | "source": [
371 | "# scale continuous covariates\n",
372 | "cont_vars = ['age_mo', 'hhsize', 'edu', 'income', 'packyr', 'bmi',\n",
373 | " 'pulse', 'sodium', 'potassium', 'r_sodipota', 'alcohol', 'together']\n",
374 | "nhanes[cont_vars] = preprocessing.scale(nhanes[cont_vars])\n"
375 | ],
376 | "execution_count": 223,
377 | "outputs": []
378 | },
379 | {
380 | "cell_type": "code",
381 | "metadata": {
382 | "id": "APOqpHmrOGzo"
383 | },
384 | "source": [
385 | "confounders = nhanes.drop(columns=['trt_dbp', 'ave_dbp'])\n",
386 | "outcome = nhanes['ave_dbp']\n",
387 | "treatment = nhanes['trt_dbp']"
388 | ],
389 | "execution_count": 224,
390 | "outputs": []
391 | },
392 | {
393 | "cell_type": "markdown",
394 | "metadata": {
395 | "id": "C576dWRsa3ad"
396 | },
397 | "source": [
398 | "## Specify Nuisance Function Models\n",
399 | "\n",
400 | "The next step is to specify models for the conditional expected outcome and propensity score"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "metadata": {
406 | "colab": {
407 | "base_uri": "https://localhost:8080/"
408 | },
409 | "id": "qyOhSZRQRb8W",
410 | "outputId": "905a03a6-e8c0-4572-e3e0-6f195b6409ad"
411 | },
412 | "source": [
413 | "# specify a model for the conditional expected outcome\n",
414 | "\n",
415 | "# make a function that returns a sklearn model for later use in k-folding\n",
416 | "def make_Q_model():\n",
417 | "# return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=None)\n",
418 | " return GradientBoostingRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=3)\n",
419 | "Q_model = make_Q_model()\n",
420 | "\n",
421 | "# Sanity check that chosen model actually improves test error\n",
422 | "# A real analysis should give substantial attention to model selection and validation \n",
423 | "\n",
424 | "X_w_treatment = confounders.copy()\n",
425 | "X_w_treatment[\"treatment\"] = treatment\n",
426 | "\n",
427 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
428 | "Q_model.fit(X_train, y_train)\n",
429 | "y_pred = Q_model.predict(X_test)\n",
430 | "\n",
431 | "test_mse=mean_squared_error(y_pred, y_test)\n",
432 | "print(f\"Test MSE of fit model {test_mse}\") \n",
433 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
434 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
435 | ],
436 | "execution_count": 225,
437 | "outputs": [
438 | {
439 | "output_type": "stream",
440 | "name": "stdout",
441 | "text": [
442 | "Test MSE of fit model 188.37465077057507\n",
443 | "Test MSE of no-covariate model 196.715556166321\n"
444 | ]
445 | }
446 | ]
447 | },
448 | {
449 | "cell_type": "code",
450 | "metadata": {
451 | "colab": {
452 | "base_uri": "https://localhost:8080/"
453 | },
454 | "id": "uq6eZEBXbsaI",
455 | "outputId": "2a544732-9c9c-4ec6-be9c-8ef28610365c"
456 | },
457 | "source": [
458 | "# specify a model for the propensity score\n",
459 | "\n",
460 | "def make_g_model():\n",
461 | " return LogisticRegression(max_iter=1000)\n",
462 | " # return RandomForestClassifier(n_estimators=100, max_depth=5)\n",
463 | " # return GradientBoostingClassifier(n_estimators=200, max_depth=3)\n",
464 | "\n",
465 | "g_model = make_g_model()\n",
466 | "# Sanity check that chosen model actually improves test error\n",
467 | "# A real analysis should give substantial attention to model selection and validation \n",
468 | "\n",
469 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2, stratify=treatment)\n",
470 | "g_model.fit(X_train, a_train)\n",
471 | "a_pred = g_model.predict_proba(X_test)[:,1]\n",
472 | "\n",
473 | "test_ce=log_loss(a_test, a_pred)\n",
474 | "print(f\"Test CE of fit model {test_ce}\") \n",
475 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
476 | "print(f\"Test CE of no-covariate model {baseline_ce}\")"
477 | ],
478 | "execution_count": 226,
479 | "outputs": [
480 | {
481 | "output_type": "stream",
482 | "name": "stdout",
483 | "text": [
484 | "Test CE of fit model 0.4844169173325631\n",
485 | "Test CE of no-covariate model 0.6785695199678788\n"
486 | ]
487 | }
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {
493 | "id": "2RkvV_4_dFWo"
494 | },
495 | "source": [
496 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "metadata": {
502 | "id": "KA0AsEGJ_X3b"
503 | },
504 | "source": [
505 | "# helper functions to implement the cross fitting\n",
506 | "\n",
507 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
508 | " \"\"\"\n",
509 | " Implements K fold cross-fitting for the model predicting the treatment A. \n",
510 | " That is, \n",
511 | " 1. Split data into K folds\n",
512 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
513 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
514 | " Returns an array containing the predictions \n",
515 | "\n",
516 | " Args:\n",
517 | " model: function that returns sklearn model (which implements fit and predict_prob)\n",
518 | " X: dataframe of variables to adjust for\n",
519 | " A: array of treatments\n",
520 | " n_splits: number of splits to use\n",
521 | " \"\"\"\n",
522 | " predictions = np.full_like(A, np.nan, dtype=float)\n",
523 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
524 | " \n",
525 | " for train_index, test_index in kf.split(X, A):\n",
526 | " X_train = X.loc[train_index]\n",
527 | " A_train = A.loc[train_index]\n",
528 | " g = make_model()\n",
529 | " g.fit(X_train, A_train)\n",
530 | "\n",
531 | " # get predictions for split\n",
532 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
533 | "\n",
534 | " assert np.isnan(predictions).sum() == 0\n",
535 | " return predictions\n",
536 | "\n",
537 | "\n",
538 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
539 | " \"\"\"\n",
540 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n",
541 | " That is, \n",
542 | " 1. Split data into K folds\n",
543 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
544 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
545 | " Returns two arrays containing the predictions for all units untreated, all units treated \n",
546 | "\n",
547 | " Args:\n",
548 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
549 | " X: dataframe of variables to adjust for\n",
550 | " y: array of outcomes\n",
551 | " A: array of treatments\n",
552 | " n_splits: number of splits to use\n",
553 | " output_type: type of outcome, \"binary\" or \"continuous\"\n",
554 | "\n",
555 | " \"\"\"\n",
556 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n",
557 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n",
558 | " if output_type == 'binary':\n",
559 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
560 | " elif output_type == 'continuous':\n",
561 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
562 | "\n",
563 | " # include the treatment as input feature\n",
564 | " X_w_treatment = X.copy()\n",
565 | " X_w_treatment[\"A\"] = A\n",
566 | "\n",
567 | " # for predicting effect under treatment / control status for each data point \n",
568 | " X0 = X_w_treatment.copy()\n",
569 | " X0[\"A\"] = 0\n",
570 | " X1 = X_w_treatment.copy()\n",
571 | " X1[\"A\"] = 1\n",
572 | "\n",
573 | " \n",
574 | " for train_index, test_index in kf.split(X_w_treatment, y):\n",
575 | " X_train = X_w_treatment.loc[train_index]\n",
576 | " y_train = y.loc[train_index]\n",
577 | " q = make_model()\n",
578 | " q.fit(X_train, y_train)\n",
579 | "\n",
580 | " if output_type =='binary':\n",
581 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
582 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
583 | " elif output_type == 'continuous':\n",
584 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n",
585 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n",
586 | "\n",
587 | " assert np.isnan(predictions0).sum() == 0\n",
588 | " assert np.isnan(predictions1).sum() == 0\n",
589 | " return predictions0, predictions1"
590 | ],
591 | "execution_count": 227,
592 | "outputs": []
593 | },
594 | {
595 | "cell_type": "code",
596 | "metadata": {
597 | "id": "wVcE6pRQeMNf"
598 | },
599 | "source": [
600 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
601 | ],
602 | "execution_count": 228,
603 | "outputs": []
604 | },
605 | {
606 | "cell_type": "code",
607 | "metadata": {
608 | "id": "GLEHlLLdWSh9"
609 | },
610 | "source": [
611 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
612 | ],
613 | "execution_count": 229,
614 | "outputs": []
615 | },
616 | {
617 | "cell_type": "code",
618 | "metadata": {
619 | "colab": {
620 | "base_uri": "https://localhost:8080/",
621 | "height": 203
622 | },
623 | "id": "_NVCV0q0g8wQ",
624 | "outputId": "9f4fb865-3b5f-4e46-f7a3-c343b1924e9e"
625 | },
626 | "source": [
627 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
628 | "data_and_nuisance_estimates.head()"
629 | ],
630 | "execution_count": 230,
631 | "outputs": [
632 | {
633 | "output_type": "execute_result",
634 | "data": {
635 | "text/html": [
636 | "\n",
637 | "\n",
650 | "
\n",
651 | " \n",
652 | " \n",
653 | " | \n",
654 | " g | \n",
655 | " Q0 | \n",
656 | " Q1 | \n",
657 | " A | \n",
658 | " Y | \n",
659 | "
\n",
660 | " \n",
661 | " \n",
662 | " \n",
663 | " 0 | \n",
664 | " 0.009122 | \n",
665 | " 69.928410 | \n",
666 | " 72.999765 | \n",
667 | " 0 | \n",
668 | " 60 | \n",
669 | "
\n",
670 | " \n",
671 | " 1 | \n",
672 | " 0.153910 | \n",
673 | " 84.867654 | \n",
674 | " 90.188475 | \n",
675 | " 0 | \n",
676 | " 94 | \n",
677 | "
\n",
678 | " \n",
679 | " 2 | \n",
680 | " 0.777998 | \n",
681 | " 73.341629 | \n",
682 | " 67.400933 | \n",
683 | " 1 | \n",
684 | " 70 | \n",
685 | "
\n",
686 | " \n",
687 | " 3 | \n",
688 | " 0.113439 | \n",
689 | " 83.498759 | \n",
690 | " 82.388082 | \n",
691 | " 0 | \n",
692 | " 84 | \n",
693 | "
\n",
694 | " \n",
695 | " 4 | \n",
696 | " 0.221874 | \n",
697 | " 87.987430 | \n",
698 | " 86.811025 | \n",
699 | " 0 | \n",
700 | " 80 | \n",
701 | "
\n",
702 | " \n",
703 | "
\n",
704 | "
"
705 | ],
706 | "text/plain": [
707 | " g Q0 Q1 A Y\n",
708 | "0 0.009122 69.928410 72.999765 0 60\n",
709 | "1 0.153910 84.867654 90.188475 0 94\n",
710 | "2 0.777998 73.341629 67.400933 1 70\n",
711 | "3 0.113439 83.498759 82.388082 0 84\n",
712 | "4 0.221874 87.987430 86.811025 0 80"
713 | ]
714 | },
715 | "metadata": {},
716 | "execution_count": 230
717 | }
718 | ]
719 | },
720 | {
721 | "cell_type": "markdown",
722 | "metadata": {
723 | "id": "VNhM7URdgzQB"
724 | },
725 | "source": [
726 | "## Combine predicted values and data into estimate of ATE"
727 | ]
728 | },
729 | {
730 | "cell_type": "code",
731 | "metadata": {
732 | "id": "O_F5r0SSkzzK"
733 | },
734 | "source": [
735 | "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
736 | " \"\"\"\n",
737 | " # Double ML estimator for the ATE\n",
738 | " \"\"\"\n",
739 | "\n",
740 | " tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n",
741 | " \n",
742 | " scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n",
743 | " n = Y.shape[0] # number of observations\n",
744 | " std_hat = np.std(scores) / np.sqrt(n)\n",
745 | "\n",
746 | " return tau_hat, std_hat\n"
747 | ],
748 | "execution_count": 231,
749 | "outputs": []
750 | },
751 | {
752 | "cell_type": "code",
753 | "metadata": {
754 | "colab": {
755 | "base_uri": "https://localhost:8080/"
756 | },
757 | "id": "SjDj0F9Bm9uq",
758 | "outputId": "e3a2f168-1bf8-47fd-f046-dd05390bf0d5"
759 | },
760 | "source": [
761 | "tau_hat, std_hat = ate_aiptw(**data_and_nuisance_estimates)\n",
762 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
763 | ],
764 | "execution_count": 232,
765 | "outputs": [
766 | {
767 | "output_type": "stream",
768 | "name": "stdout",
769 | "text": [
770 | "The estimate is -2.6682535341413107 pm 1.52254875417939\n"
771 | ]
772 | }
773 | ]
774 | },
775 | {
776 | "cell_type": "markdown",
777 | "metadata": {
778 | "id": "L_GUa-5vMmL4"
779 | },
780 | "source": [
781 | "#Sensitivity Analysis\n",
782 | "\n",
783 | "We found an average treatment effect of diastolic blood pressure medication of about 2, significant at the 0.95 level. We'd now conduct some analysis to decide how sensitive this conclusions is to possible unobserved confounding."
784 | ]
785 | },
786 | {
787 | "cell_type": "code",
788 | "metadata": {
789 | "id": "zEv_RlkUNQZ9"
790 | },
791 | "source": [
792 | "# the first step is to choose a level of bias that would undermine the qualitative conclusion of the study\n",
793 | "# we'll go with the nominal effect\n",
794 | "target_bias = 2.00 # note: bias is specified as an absolute number"
795 | ],
796 | "execution_count": 233,
797 | "outputs": []
798 | },
799 | {
800 | "cell_type": "markdown",
801 | "metadata": {
802 | "id": "bDeSqyvvN3mg"
803 | },
804 | "source": [
805 | "## Compute influence strength of covariates\n",
806 | "Our task is to assess whether it's plausible that an unobserved confounder could be strong enough to induce a bias of 2 or more. To make that easier, we'd like to know how strong the observed confounders are. Austen plots computes these reference strengths by seeing how much model performance degrades when the covariates are removed. Accordingly, we refit the models with each (group of) reference covariate removed. "
807 | ]
808 | },
809 | {
810 | "cell_type": "code",
811 | "metadata": {
812 | "id": "b_51ei-JOIrn"
813 | },
814 | "source": [
815 | "# First, lets define the groups of covariates we'll measure the strength of.\n",
816 | "# Note: it's important to group the covariates into meaningful groups, because strength is measured conditional on all remaining covariates\n",
817 | "# E.g., if we remove only \"black\" (and not \"hispanic, white\") then we will measure no effect, because we can infer the removed variable from the remaining ones\n",
818 | "\n",
819 | "covariate_groups = {\n",
820 | " 'socioeconomic': ['white', 'black', 'hisp' , 'hhsize', 'edu',\n",
821 | " 'married', 'widowed', 'divorced', 'separated', 'income', 'packyr', 'alcohol',\n",
822 | " 'insurance', 'together'],\n",
823 | " 'sex': ['female'],\n",
824 | " 'age': ['age_mo'],\n",
825 | " 'health': ['bmi', 'pulse', 'sodium', 'potassium', 'r_sodipota']}"
826 | ],
827 | "execution_count": 234,
828 | "outputs": []
829 | },
830 | {
831 | "cell_type": "code",
832 | "metadata": {
833 | "id": "eLskfBqCQlwZ"
834 | },
835 | "source": [
836 | "# For each covariate group, refit the models without using that group\n",
837 | "nuisance_estimates = {}\n",
838 | "for group, covs in covariate_groups.items():\n",
839 | " remaining_confounders = confounders.drop(columns=covs)\n",
840 | "\n",
841 | " g = treatment_k_fold_fit_and_predict(make_g_model, X=remaining_confounders, A=treatment, n_splits=5)\n",
842 | " Q0, Q1 = outcome_k_fold_fit_and_predict(make_Q_model, X=remaining_confounders, y=outcome, A=treatment, n_splits=5, output_type=\"continuous\")\n",
843 | "\n",
844 | " data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
845 | " nuisance_estimates[group] = data_and_nuisance_estimates\n"
846 | ],
847 | "execution_count": 235,
848 | "outputs": []
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "metadata": {
853 | "id": "73aTyanbTihM"
854 | },
855 | "source": [
856 | "## Save computed estimates as CSVs\n",
857 | "The Austen plot code expects the nuisance function estimates to be provided as csvs with columns 'g', 'Q', 't', 'y'"
858 | ]
859 | },
860 | {
861 | "cell_type": "code",
862 | "metadata": {
863 | "id": "Am4bdBMGXhqr"
864 | },
865 | "source": [
866 | "data_and_nuisance_path = 'data_and_nuisance_estimates.csv'\n",
867 | "covariate_dir_path = 'covariates/'"
868 | ],
869 | "execution_count": 236,
870 | "outputs": []
871 | },
872 | {
873 | "cell_type": "code",
874 | "metadata": {
875 | "id": "YJ-QU3gXSqiz"
876 | },
877 | "source": [
878 | "def _convert_to_austen_format(nuisance_estimate_df: pd.DataFrame):\n",
879 | " austen_df = pd.DataFrame()\n",
880 | " austen_df['y']=nuisance_estimate_df['Y']\n",
881 | " austen_df['t']=nuisance_estimate_df['A']\n",
882 | " austen_df['g']=nuisance_estimate_df['g']\n",
883 | " A = nuisance_estimate_df['A']\n",
884 | " austen_df['Q']=A*nuisance_estimate_df['Q1'] + (1-A)*nuisance_estimate_df['Q0'] # use Q1 when A=1, and Q0 when A=0\n",
885 | "\n",
886 | " return austen_df"
887 | ],
888 | "execution_count": 237,
889 | "outputs": []
890 | },
891 | {
892 | "cell_type": "code",
893 | "metadata": {
894 | "id": "31BWqKXmVAQr"
895 | },
896 | "source": [
897 | "austen_data_and_nuisance = _convert_to_austen_format(data_and_nuisance_estimates)\n",
898 | "austen_data_and_nuisance.to_csv(data_and_nuisance_path, index=False)\n",
899 | "\n",
900 | "pathlib.Path(covariate_dir_path).mkdir(exist_ok=True)\n",
901 | "for group, nuisance_estimate in nuisance_estimates.items():\n",
902 | " austen_nuisance_estimate = _convert_to_austen_format(nuisance_estimate)\n",
903 | " austen_nuisance_estimate.to_csv(os.path.join(covariate_dir_path,group+\".csv\"), index=False)"
904 | ],
905 | "execution_count": 238,
906 | "outputs": []
907 | },
908 | {
909 | "cell_type": "markdown",
910 | "metadata": {
911 | "id": "C84zSBeIVe0L"
912 | },
913 | "source": [
914 | "## Make plots"
915 | ]
916 | },
917 | {
918 | "cell_type": "code",
919 | "metadata": {
920 | "id": "2C0cixtvVhmD"
921 | },
922 | "source": [
923 | "ap = AustenPlot(data_and_nuisance_path, covariate_dir_path)"
924 | ],
925 | "execution_count": 239,
926 | "outputs": []
927 | },
928 | {
929 | "cell_type": "code",
930 | "metadata": {
931 | "colab": {
932 | "base_uri": "https://localhost:8080/"
933 | },
934 | "id": "JFgN5L6YW0oF",
935 | "outputId": "9bb60caf-daef-4ba3-8ab2-8a4f6d3b43eb"
936 | },
937 | "source": [
938 | "p, plot_coords, variable_coords = ap.fit(bias=target_bias) # recall we set target_bias=2.0"
939 | ],
940 | "execution_count": 240,
941 | "outputs": [
942 | {
943 | "output_type": "stream",
944 | "name": "stdout",
945 | "text": [
946 | "Fitting main dataset\n"
947 | ]
948 | }
949 | ]
950 | },
951 | {
952 | "cell_type": "code",
953 | "metadata": {
954 | "colab": {
955 | "base_uri": "https://localhost:8080/",
956 | "height": 396
957 | },
958 | "id": "lfPaV4IDckdS",
959 | "outputId": "56a1085c-cc7d-45e7-d61b-832125c05857"
960 | },
961 | "source": [
962 | "p"
963 | ],
964 | "execution_count": 241,
965 | "outputs": [
966 | {
967 | "output_type": "display_data",
968 | "data": {
969 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAikAAAFqCAYAAADft8pBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVhV1f4/8PcGZZZ5EEgFQUHERCC8OCKpgEMO5Zii+FVMMiPLORUnFKfKuUKcS8vZSjMVblpKCloaRFgMiSMIB5kR9u8Pfpzr6YAC+yAHeb+eh+dy9l57rc9ZN8/5sNbaawuiKIogIiIiUjMaDR0AERERUVWYpBAREZFaYpJCREREaolJChEREaklJilERESklpikEBERkVpikkJERERqiUkKERERqSUmKURERKSWmKTQC0MQBISFhTV0GEREpCJMUkht7dy5E4IgKPxYWlqiT58+OHnyZEOHp1L//PMPlixZAi8vL5iYmMDc3Bw+Pj44c+ZMjesoLy/H6tWrYW9vDx0dHbz88sv48ssv6zFqIqL61ayhAyB6lqVLl8Le3h6iKOLevXvYuXMnBgwYgBMnTmDQoEHycoWFhWjWrHH+J33s2DFERERg6NChmDBhAh4/fozdu3ejX79+iIqKQlBQ0DPrWLBgAVatWoUpU6bglVdewbFjxzB27FgIgoDRo0c/h3dBRKRaAh8wSOpq586dCAoKwuXLl+Hp6Sk/np2dDSsrK4wYMQL79u1rwAhV5/fff4eVlRXMzc3lx4qLi+Hm5oa8vDz8888/T70+IyMD9vb2CA4OxqZNmwAAoiiid+/eSElJQWpqKjQ1Nev1PRARqRqne6jRMTY2hq6urtKoyb/XpKSlpSEkJAROTk7Q1dWFmZkZRowYgdTUVIXrSktLsWTJErRr1w46OjowMzNDjx498MMPPzyHd1OhY8eOCgkKAGhra2PAgAG4desWHj169NTrjx07htLSUoSEhMiPCYKAadOm4datW7h48WK9xE1EVJ8a59g4NSkymQyZmZkQRRH379/Hxo0bkZeXh3Hjxj31usuXL+Pnn3/G6NGj8dJLLyE1NRVbt26Fj48PEhISoKenBwAICwvDypUrMXnyZHh5eSE3NxdXrlxBfHw8+vXrV2395eXlePjwYY3eg5GREZo3b17zN/3/3b17F3p6evJYq3P16lXo6+ujQ4cOCse9vLzk53v06FHr9omIGhKTFFJ7ffv2VXitra2NqKiopyYQADBw4EC88cYbCscGDx4Mb29vHDp0COPHjwcAfPvttxgwYAA+++yzWsWVnp4Oe3v7GpWNjo6Gj49Preq/efMmDh8+jBEjRjxzqubOnTuwsrKCIAgKx62trQEAt2/frlXbRETqgEkKqb3Nmzejffv2AIB79+5h7969mDx5Mlq0aIHhw4dXe52urq7899LSUuTm5sLR0RHGxsaIj4+XJynGxsb4/fffkZycjHbt2tU4rpYtW9Z4Sqhz5841rhcACgoKMGLECOjq6mLVqlXPLF9YWAhtbW2l4zo6OvLzRESNDZMUUnteXl4KC2fHjBmDLl26YPr06Rg0aBC0tLSqvK6wsBArV67Ejh07kJGRgSfXiMtkMvnvS5cuxZAhQ9C+fXu4urrC398f48ePx8svv/zUuHR0dJRGeVShrKwMo0ePRkJCAk6ePAkbG5tnXqOrq4vi4mKl40VFRfLzRESNDRfOUqOjoaGBPn364M6dO0hOTq623DvvvIMVK1Zg5MiR+Oqrr3D69Gn88MMPMDMzQ3l5ubxcr1698NdffyEqKgqurq6IjIyEu7s7IiMjnxpHWVkZ7t69W6OfkpKSGr+/KVOm4JtvvsHOnTvh6+tbo2usra1x9+5d/PtmvTt37gBAjRIdIiJ1w5EUapQeP34MAMjLy6u2zMGDBzFhwgSsW7dOfqyoqAg5OTlKZU1NTREUFISgoCDk5eWhV69eCAsLw+TJk6ut/59//lH5mpRZs2Zhx44d+PjjjzFmzJga1Q0Abm5uiIyMRGJiIlxcXOTHY2Nj5eeJiBobJinU6JSWluL06dPQ0tJSupvlSZqamkojCxs3bkRZWZnCsaysLJiZmclfGxgYwNHR8Zl7k6h6TcqaNWuwdu1azJ8/H++++2615WQyGe7cuQNra2sYGRkBAIYMGYL33nsPW7ZsUdgnZdu2bbC1tUW3bt1qFCcRkTphkkJq7+TJk/jjjz8AAPfv38cXX3yB5ORkzJ07F4aGhtVeN2jQIOzZswdGRkZwcXHBxYsXcebMGYWEBABcXFzg4+MDDw8PmJqa4sqVKzh48CCmT5/+1LhUuSblyJEjmD17Ntq1a4cOHTpg7969Cuf79esHKysredmgoCDs2LEDEydOBAC89NJLCA0NxZo1a1BaWopXXnkFR48exfnz57Fv3z5u5EZEjRKTFFJ7ixYtkv+uo6MDZ2dnbN26FVOnTn3qdZ988gk0NTWxb98+FBUVoXv37jhz5gz8/PwUys2YMQPHjx/H6dOnUVxcjDZt2mD58uWYNWtWvbyfqvz6668AgOTkZPldR0+Kjo6WJynVWbVqFUxMTPDpp59i586daNeuHfbu3YuxY8fWS8xERPWN2+ITERGRWuLdPURERKSWmKQQERGRWmKSQkRERGqJSQoRERGpJSYpREREpJZUmqQUFRVV+fwQdfbjjz9i8ODBsLGxgSAIOHr0aEOHRERERJCYpMTExOC9996Dl5cXDAwMoK+vDz09PbRo0QJeXl4IDQ1FTEyMikKtH/n5+ejcuTM2b97c0KEQERHRE2q9T0ppaSk+/fRTrF+/HqmpqTA1NYW7uzvatm0LExMTiKKI7OxspKSkID4+Hg8fPkSbNm3w/vvvY+rUqWjevHl9vRfJBEHAkSNHMHTo0IYOhYiIqMmr9Y6zjo6OKCkpwYQJEzBy5Ei4u7s/tXxcXBy+/vprhIeHY+3atUhNTa1rrERERNSE1DpJmT9/PiZOnAhtbe0alffw8ICHhweWLl2KHTt21DpAIiIiapq4Lf4TajPdU1hYiJKSkucQFRG9SLS0tKCrq9vQYRA1CnzAYA1lZGTg9u3bAICysjLk5+fDxMSkyrKffPIJSkpK4OXlhd69ez/PMOUqn3pbVlbWIO0DQLNmzfD48eMGa18d+gBgP1RiP1S4d+8efHx8mKgQ1UCtkpSCggIkJSXB0dERLVq0UDj3008/oXv37ioNTp3Y2trC1tYWACCTyXD+/Hk4OTkp9QNQ8WFcUlICY2NjODg4PO9QAQCVA2SCIDRI+wCQnZ1dbSL3PKhDHwDsh0rsB+DRo0e4ffs2SkpKmKQQ1UCNk5RLly5h8ODB0NLSQnZ2NubPn48PP/xQfj4gIAC5ubn1EmR9ysvLw82bN+WvU1JScO3aNZiamqJ169ZPvbZFixYwMjJSOq6hUXFnt5aWVpXnn4fKv1ibNWu4wbK8vLwGe/+AevQBwH6oxH4gotqq8T4pM2fOxKZNm5CRkYFff/0V33zzDQIDA+V/nTTWpS1XrlxBly5d0KVLFwAV77NLly5YtGhRA0dGRETUtNU4SUlISMCoUaMAAO3atUNMTAwePnyIYcOGNeoFpD4+PhBFUeln586dda6zcji5sSZuRERE6qDGSYqRkREyMjLkr3V0dHD06FHo6urCz88P5eXl9RJgY8QkhYiISLoaJyl9+/ZV2uekWbNm+OKLL+Do6IjCwkKVB0dERERNV41XkG3durXK2wcFQcDnn3+OhQsXqjSwxowjKURERNLVOEnR0tKClpZWteefdSdMU8IkhYiISDpJT0EmIiIiqi9MUuoBR1KIiIikq1OSUlpaigULFsDR0REWFhYYOHAgrl69qlQuKSkJ69evR79+/SQH2pgwSSEiIpKuTlsvhoeHY+XKlWjZsiXatGmD6Oho9O7dGxcuXICRkRG2bNmCQ4cOISUlBaIowtDQUNVxNwpMUoiIiOquTknK3r170a9fP3zzzTdo3rw5bt++jYEDB+K9995DXFwccnNz4enpiTFjxqB///7w9vZWddxqrXJbfCYpREREdVenJCUtLQ1z5sxB8+bNAQA2NjZYtWoVAgIC4Orqiq+++grOzs4qDZSIiIialjqtSXn8+DH09PQUjnXu3BkAMG/evCafoHBNChERkXR1vrsnMzNT4Uu4clTFyspKelSNXEM+Cp6IiOhFUednlr/33nuYN28eOnbsCDc3N9jZ2UEQBJSWlqoyvkaNIylERER1V6ck5dSpU/j111/x22+/4ddff8WuXbvkycmAAQPQpk0bdOrUCa6urvL/dXV1VWng6ozTPURERNLVKUnp378/+vfvL39dWlqKhIQEedLy22+/ITY2FidOnABQ8aVdVlammogbAU73EBERSVfn6Z4nNW/eHJ07d0bnzp0xfvx4+fF79+7h2rVruH79uiqaaXQ4kkJERFR3KklSqmNlZQU/Pz/4+fnVZzNqh9M9RERE0vHZPfWA0z1ERETSMUmpRxxJISIiqjsmKfWA0z1ERETSMUmpB0xSiIiIpGOSUg+YpBAREUlX67t70tPT69RQ69at63QdERERNU21TlIqt7+vraa4mRtHUoiIiOqu1klKVFQUb7F9BiYpRERE0tU6SZk4cWI9hEFERESkqF53nH2R6enpQRRFPH78uNoy5eXlTz1fn9Rhek0QhAZ7/4B69AHAfqjEfuDoKlFtqSRJKSoqwqFDhxAfHw+ZTIby8nKF84IgYPv27apoqsFlZGQgLS3tqR+2nO4hIiKSTnKSkpaWhj59+iA1NRXGxsaQyWQwNTVFTk4OysrKYG5uDgMDA1XEqhZsbW1hYGCA8+fPQxAENGum3IWVSUp155+nhmxfFMUGf/9Aw/YBwH6oxH7gIzOIakvyPimzZs2CTCbDpUuX8Oeff0IURRw4cAB5eXmIiIiArq4uvv/+e1XE2mhwJIWIiEg6yUnKuXPnEBISAi8vL2hoVFQniiK0tbUxa9YsvPrqqwgNDZUcaGPCv5aIiIikk5ykFBQUwM7ODgBgaGgIQRAgk8nk5729vXHhwgWpzTQqlUnKv9fmEBERUc1JTlJat26NW7duAaiY67W1tcWlS5fk5xMSEqCjoyO1mUaFIylERETSSV5B5uvri2PHjmHx4sUAKvZRWblyJbKzs1FeXo49e/YgMDBQcqCNEdekEBER1Z3kJGXu3Lm4fPkyiouLoa2tjfnz5+P27ds4ePAgNDU1MXbsWKxfv14VsTYaXDhLREQkneQkpXXr1goPD9TR0UFkZCQiIyOlVt1ocbqHiIhIOslrUqh6HEkhIiKqu1qPpKSnpwOAfPSk8vWzPDna8qLjdA8REZF0tU5S7OzsIAgCCgsLoaWlJX/9LOrw3AwiIiJqPGqdpERFRUEQBDRv3lzhNf0PR1KIiIikq3WSMnHiRIXXvr6+sLCwgK6ubpXlCwsL8eDBgzoF11gxSSEiIpJO8sJZe3t7HDlypNrzx48fh729vdRmiIiIqImRnKQ8a7SgtLRU/kyfpqLy/XJbfCIiorqr0z4pubm5yMnJkb/Oysqq8i6fnJwc7N+/H9bW1nWPkIiIiJqkOiUpH330EZYuXQqgYv1FaGhotU86FkURy5cvr3uEjRDXpBAREUlXpySlf//+MDAwgCiKmD17NsaMGQN3d3eFMoIgQF9fHx4eHvD09FRJsI0FkxQiIiLp6pSkeHt7w9vbGwCQn5+P119/Ha6urioNjIiIiJo2SStaCwoKcPz4cVy4cEFV8bwQOJJCREQknaQkRU9PDykpKdzM7V+YpBAREUkn+d5gf39/fP/996qIhYiIiEhOcpKycOFC/Pnnnxg/fjwuXLiAjIwMPHz4UOmnKeFIChERkXR1Wjj7pI4dOwIAEhIS8MUXX1Rbrik9YJDTX0RERNJJTlIWLVrEL+VqcCSFiIio7iQnKWFhYSoI48XC6R4iIiLpGtVDdTZv3gw7Ozvo6Oiga9eu+OWXX6ot6+PjA0EQlH4GDhwoLzNx4kSl8/7+/pLjZJJCREQkneSRFAAoKirCoUOHEB8fD5lMpvRgPUEQsH37dkltHDhwADNnzsS2bdvQtWtXfPzxx/Dz80NSUhIsLS2Vyh8+fBglJSXy11lZWejcuTNGjBihUM7f3x87duyQv9bW1pYUJ8AkhYiISBUkJylpaWno06cPUlNTYWxsDJlMBlNTU+Tk5KCsrAzm5uYwMDCQHOj69esxZcoUBAUFAQC2bduGb7/9FlFRUZg7d65SeVNTU4XX+/fvh56enlKSoq2tjZYtW0qO70lco0NERCSd5OmeWbNmQSaT4dKlS/jzzz8hiiIOHDiAvLw8REREQFdXV/I+KiUlJYiLi0Pfvn3/F7iGBvr27YuLFy/WqI7t27dj9OjR0NfXVzgeExMDS0tLODk5Ydq0acjKypIU65M4kkJERFR3kpOUc+fOISQkBF5eXtDQqKhOFEVoa2tj1qxZePXVV6t9QnJNZWZmoqysDFZWVgrHrayscPfu3Wde/8svv+DGjRuYPHmywnF/f3/s3r0bZ8+eRUREBP773/8iICBA8u3SnO4hIiKSTvJ0T0FBAezs7AAAhoaGEAQBMplMft7b2xsffPCB1GYk2b59Ozp16gQvLy+F46NHj5b/3qlTJ7z88stwcHBATEwMXn311ecdJhERET1BcpLSunVr3Lp1q6KyZs1ga2uLS5cuYfjw4QAqNnnT0dGR1Ia5uTk0NTVx7949heP37t175nqS/Px87N+/H0uXLn1mO23btoW5uTlu3ryplKRkZGTg9u3bAIDS0lLo6OggOzsbeXl5SvUUFRUBqJimysjIeGa79aFyFKch18cUFRU12PsH1KMPAPZDJfZDxWcHEdWc5CTF19cXx44dw+LFiwFU3Na7cuVKZGdno7y8HHv27EFgYKCkNrS0tODh4YGzZ89i6NChAIDy8nKcPXsW06dPf+q1X3/9NYqLizFu3LhntnPr1i1kZWXB2tpa6ZytrS1sbW0BADKZDOfPn4eJiQmMjIyUyurp6QH4X9LWEB4/fiyPoaFkZGQ02PsH1KMPAPZDJfYDFEaZiejZJP9rnTt3Li5fvozi4mJoa2tj/vz5uH37Ng4ePAhNTU2MHTsW69evlxzozJkzMWHCBHh6esLLywsff/wx8vPz5Xf7BAYGwtbWFitXrlS4bvv27Rg6dCjMzMwUjufl5WHJkiV4/fXX0bJlS/z111+YPXs2HB0d4efnJzleIiIikkYl0z2tW7eWv9bR0UFkZCQiIyOlVq1g1KhRePDgARYtWoS7d+/Czc0Np06dki+mTU9Ply/crZSUlIQLFy7g9OnTSvVpamrit99+w65du5CTkwMbGxv0798fy5Ytk7xXChfOEhERSdew47+1NH369Gqnd2JiYpSOOTk5VZsoqOLW6OowSSEiIpJOZUnKjRs38N133yE1NRUAYGdnh4CAAHTq1ElVTTQaTFKIiIikk5ykFBcXY+rUqdizZw9EUZRPuZSXl2PevHl48803ERkZCS0tLcnBNhZMUoiIiKSTvJnbnDlzsHv3bkybNg2JiYkoKipCcXExEhMT8dZbb2Hv3r2YPXu2KmIlIiKiJkTySMrevXsxfvx4bNq0SeG4k5MTNm/ejNzcXOzduxcff/yx1KYaDY6kEBERSSd5JKW0tBT/+c9/qj3frVs3+f4ETUVDb5pFRET0IpCcpPj5+T31LplTp06hf//+UptplDiSQkREVHeSp3uWLVuGkSNHYvjw4Xj77bfh6OgIAEhOTsbmzZuRlpaGAwcO4OHDhwrXmZqaSm1abXG6h4iISDrJSUqHDh0AANevX8exY8cUzlV+Sbu4uChdJ/VJw+qM0z1ERETSSU5SFi1axC/lanAkhYiIqO4kJylhYWEqCOPFwukeIiIi6SQvnCVlHFkiIiKSrtZJiouLC3bv3o2SkpIaX1NcXIwdO3ZUuTblRcSRFCIiIulqPd0zceJEzJw5E++++y5ee+019O3bF+7u7rC3t4eenh4AID8/HykpKbhy5QrOnDmDEydOQEtLC7NmzVL5G1BnTFKIiIjqrtZJyuzZszFt2jRs374dO3fuxJ49e+QjB82aVVRXuXmbKIpwdXXFkiVLMGnSJBgaGqowdPXFkRQiIiLp6rRwtkWLFggNDUVoaChSU1Px888/448//kBWVhYAwMzMDM7OzvD29oa9vb1KA24MmKQQERFJJ/nuHjs7O9jZ2akgFCIiIqL/4d099YAjKURERNIxSakHTFKIiIikY5JCREREaolJSj3gSAoREZF0TFLqAZMUIiIi6ZikEBERkVqSfAtypeLiYsTHx+P+/fvo3r07zM3NVVV1o8ORFCIiIulUMpKyYcMGWFtbo0ePHhg+fDh+++03AEBmZibMzc0RFRWlimYaDSYpRERE0klOUnbs2IHQ0FD4+/tj+/btCl/M5ubm8PX1xf79+6U206hoaFR0K5MUIiKiupOcpKxbtw5DhgzBF198gcGDByud9/DwwO+//y61mUalMkkpLy9v4EiIiIgaL8lJys2bNxEQEFDteVNTU/kzfZqKyukeJilERER1J3nhrLGxMTIzM6s9n5CQgJYtW0ptRu3o6elBFEX5E5+r8qzz9amsrKxB2n2SIAgN9v4B9egDgP1Qif3AKWCi2pKcpAwYMACfffYZQkJClM79/vvv+PzzzzFp0iSpzaiNjIwMpKWlPfXDliMpRNRYiKKIK1euIDo6GrmyHBgaGaNPnz7w9PSUf5YRNRTJScry5cvRtWtXuLq6YvDgwRAEAbt27UJUVBQOHToEa2trLFq0SBWxqgVbW1sYGBjg/PnzEAQBzZopd2HlMVEUqzz/PDVk++rw/oGG7QOA/VCJ/QC1+9JPSEjAh3PeR9qfN+BmLsJEW8SdYgEHtm9Em/auWB6xDi4uLjWuLy0tDRERETh79izS09Ohp6cHX19frFmzBnZ2dgplf/vtN7zzzjv45ZdfYGZmhrfeegu2traYNGkSUlJSFMqfPHkS4eHhiI+Ph4aGBnr16oXVq1ejY8eOKuoJUleS/7Xa2NggLi4O8+fPx4EDByCKIvbs2YMWLVpgzJgxWLVqVZPbM4UjKUSk7hISEjB5/Ch0NcrB/NfMYKr3v6+DhwWPsS/uBiaPH4XIPQdqnKhcvnwZP//8M0aPHo2XXnoJqamp2Lp1K3x8fJCQkAA9PT0AFSPSffr0gSAImDdvHvT19REZGQltbW2lOvfs2YMJEybAz88PERERKCgowNatW9GjRw9cvXpVKfmhF4tK/qSwtLREZGQkIiMj8eDBA5SXl8PCwkJ+l0tT8+QtyKIoqt1fT0TUtImiiA/nvI+uRjmY3sNS6TPKVK8ZpvewBC7cx4dz3seh49/V6HNs4MCBeOONNxSODR48GN7e3jh06BDGjx8PAIiIiEB2djbi4+Ph5uYGAAgKCkK7du0Urs3Ly8OMGTMwefJkfPbZZ/LjEyZMgJOTE8LDwxWO04tH5VmEhYUFrKysmmyCAkDhvXOhHBGpmytXriDtzxt408Os2uRDEASMdTdF2p83EBcXV6N6dXV15b+XlpYiKysLjo6OMDY2Rnx8vPzcqVOn4O3tLU9QgIo7Qd98802F+n744Qfk5ORgzJgxyMzMlP9oamqia9euiI6Ors3bpkZIJSMpFy5cQFRUFP7++29kZ2crfTELgoBff/1VFU01Ck/+o2eSQkTqJjo6Gm7mosIUT1XM9JvDzVzEuXPn4Onp+cx6CwsLsXLlSuzYsQMZGRkKn38ymUz+e1paGry9vZWud3R0VHidnJwMAPD19a2yPUNDw2fGRI2b5CRl/fr1mDVrFnR0dODk5ARTU1NVxNWoPZmklJeXQ1NTswGjISJSlCvLgYl2zf6AMtEWkftEgvE077zzjnwXcm9vbxgZGUEQBIwePbpOa/Qqr9mzZ0+VW1mow0Jsql+S/x9es2YNunfvjhMnTsDIyEgVMTV6nO4hInVmaGSMO8U1WyuXXSzAqYaf7QcPHsSECROwbt06+bGioiLk5OQolGvTpg1u3rypdP2/jzk4OACoWPfYt2/fGsVALxbJC0cKCgrw5ptvMkF5wpNJCu/wISJ106dPH1zLFPCw4Omb62Xll+JaplDtdMu/aWpqKv1htnHjRqWN9Pz8/HDx4kVcu3ZNfuzhw4fYt2+fUjlDQ0OEh4ejtLRUqb0HDx7UKC5qvCSPpPTp0wfXr19XRSwvjH9P9xARqRNPT0+0ae+KfXE3qry7B6gYBf4i/iHsnDrBw8OjRvUOGjQIe/bsgZGREVxcXHDx4kWcOXMGZmZmCuVmz56NvXv3ol+/fnjnnXfktyC3bt0aDx8+lMdjaGiIrVu3Yvz48XB3d8fo0aNhYWGB9PR0fPvtt+jevTs2bdokvUNIbUlOUjZu3Ij+/ftj7dq1mDRpEtekgNM9RKTeBEHA8oh1mDx+FHDhPt70qGqflCzEyowRuWltjbdR+OSTT6CpqYl9+/ahqKgI3bt3x5kzZ+Dn56dQrlWrVoiOjsaMGTMQHh4OCwsLvP3229DX18eMGTOgo6MjLzt27FjY2Nhg1apVWLNmDYqLi2Fra4uePXsiKChINR1CaktyktKqVStMnToVH3zwAebMmQMdHR2lhaKCICis7H7R8e4eIlJ3Li4uiNxzAB/OeR9Tj/9vx9nsYgHXMgW0ae+KyE2123HW2NgYUVFRSsdTU1OVjrm5ueHHH39UOBYaGgodHR2lDUB9fHzg4+NT4zjoxSE5SVm0aBFWrFgBW1tbeHp6cm0KON1DRI2Di4sLDh3/DnFxcTh37hxyZTI4GRlhmq8vPDw86nUjysLCQoV9VbKysrBnzx706NGDd0SSnOQkZdu2bRg4cCCOHj3apDdwexIXzhJRYyEIAjw9PWu0D4oqeXt7w8fHBx06dMC9e/ewfft25ObmYuHChc81DlJvkpOUkpISDBw4kAnKE7gmhYjo6QYMGICDBw/is88+gyAIcHd3x/bt29GrV6+GDo3UiOTMYtCgQTh//rwqYnlhcLqHiOjpwsPD8eeff6KgoAD5+fk4f/4890IhJZKTlMWLFyMhIQEhISGIi4vDgwcP8PDhQy7rkroAACAASURBVKWfpoQjKURERNJJnu5xcnICAFy7dg2ffvppteX+vZnPi4x39xAREUmnkrt76nMFeGPEhbNERETSSU5SwsLCVBDGi4VJChERkXQqfYRkXl4e/vnnHwAVm7wZGBiosvpGg9M9RERE0qnkvuHLly+jT58+MDExgaurK1xdXWFiYgJfX19cuXJFFU00Kry7h4iISDrJIymxsbHw8fGBlpYWJk+ejA4dOgAAEhMT8eWXX6JXr16IiYmBl5eX5GAbC97dQ0REJJ3kkZQFCxbA1tYWSUlJ2Lp1K2bMmIEZM2Zg69atSEpKgo2NDRYsWKCKWBsNjqQQUVMVFhYGQRCQmZnZIO3WpuzzjpFqT3KSEhsbi6lTp6Jly5ZK56ysrBAcHIxLly5JbaZR4cJZIqKGFx4ejqNHjzZ0GCSB5CRFQ0MDjx8/rvZ8WVlZk9syn9M9REQNj0lK4yc5e+jWrRs2b96MtLQ0pXPp6enYsmULunfvLrUZAMDmzZthZ2cHHR0ddO3aFb/88ku1ZXfu3AlBEBR+dHR0FMqIoohFixbB2toaurq66Nu3L5KTkyXHyekeImosRFHE5cuXsXr1anz44XysXr0aly9f5h9YpBYkJynh4eGQyWRwdnbG2LFjERYWhrCwMIwZMwbOzs6QyWRYuXKl5EAPHDiAmTNnYvHixYiPj0fnzp3h5+eH+/fvV3uNoaEh7ty5I//5dyK1evVqbNiwAdu2bUNsbCz09fXh5+eHoqIiSbHyFmQiagwSEhLwxrABCJk0HH9f3AAxbRf+vrgBIZOG441hA5CQkFDnunNycjBx4kQYGxvDyMgIQUFBKCgoUCizd+9eeHh4QFdXF6amphg9erR8G4tK58+fx4gRI9C6dWtoa2ujVatWeO+991BYWPjU9gVBQH5+Pnbt2iX/Q3XixIm1jpEaluS7e7p06YLY2FgsWLAAx48fl/8frKenB39/fyxfvhwuLi6SA12/fj2mTJmCoKAgAMC2bdvw7bffIioqCnPnzq3yGkEQqlwrA1QkDx9//DE+/PBDDBkyBACwe/duWFlZ4ejRoxg9enSdY+V0DxGpu4SEBEwJGoWe7XIQPtcMZob/+zrIyn2MHSdvYErQKHy+40CdPsNHjhwJe3t7rFy5EvHx8YiMjISlpSUiIiIAACtWrMDChQsxcuRITJ48GQ8ePMDGjRvRq1cvXL16FcbGxgCAr7/+GgUFBZg2bRrMzMzwyy+/YOPGjbh16xa+/vrratvfs2cPJk+eDC8vLwQHBwMAHBwcahUjNTyVbObm4uKCI0eOoLy8HA8ePAAAWFhYqGwtSklJCeLi4jBv3jz5MQ0NDfTt2xcXL16s9rq8vDy0adMG5eXlcHd3R3h4ODp27AgASElJwd27dxWeumlkZISuXbvi4sWLKktSON1DROpGFEUsnP8+erbLwfsjLZXuijEzbIb3R1oCX93Hwvnv4+CR72r9+JMuXbpg+/bt8tdZWVnYvn07IiIikJaWhsWLF2P58uWYP3++vMzw4cPRpUsXbNmyRX48IiICurq68jLBwcFwdHTE/PnzkZ6ejtatW1fZ/rhx4/DWW2+hbdu2GDduXK1jJPUgOYt4/PgxcnNzKyrT0ICVlRWsrKzkX9S5ublPXVhbE5mZmSgrK4OVlZXCcSsrK9y9e7fKa5ycnBAVFYVjx45h7969KC8vR7du3XDr1i0AkF9XmzprikkKEamzK1euIP2vGwgKMKs2+RAEARP9TZH+1w3ExcXVuo233npL4XXPnj2RlZWF3NxcHD58GOXl5Rg5ciQyMzPlPy1btkS7du0QHR0tv+7JBCU/Px+ZmZno1q0bRFHE1atXax1XTWMk9SA5SZkxYwa6detW7fnu3bvj/fffl9pMrXl7eyMwMBBubm7o3bs3Dh8+DAsLi6c+qVlVuCaFiNRZdHQ0PBxFhSmeqpgbNYeHo4hz587Vuo1/j3CYmJgAALKzs5GcnAxRFNGuXTtYWFgo/CQmJiqsNUxPT8fEiRNhamoKAwMDWFhYoHfv3gAAmUxW67hqGiOpB8nTPadOnUJgYGC159944w3s3bsXn3zySZ3bMDc3h6amJu7du6dw/N69e9WuOfm35s2bo0uXLrh58yYAyK+7d+8erK2tFep0c3NTuj4jIwO3b98GAJSWlkJHRwfZ2dnIy8tTKvvkBkH379+Xz60+T5XJUUM+obqoqAgZGRkN1r469AHAfqjEfqj47FAHubk5MDOo2R9QZgYicnNrnwxoampWeVwURZSXl0MQBJw8ebLKcpXPfSsrK0O/fv3w8OFDzJkzB87OztDX10dGRgYmTpwoeaT6aTGSepCcpNy+fRu2trbVnrexsZH8waSlpQUPDw+cPXsWQ4cOBVAxjXL27FlMnz69RnWUlZXh+vXrGDBgAADA3t4eLVu2xNmzZ+VJSW5uLmJjYzFt2jSl621tbeXvUyaT4fz58zAxMYGRkZFS2Sc/iMzMzJ7aP/WlcoqtWTOVPkOyVjIyMhrkvVdShz4A2A+V2A/S//JXFUNDY/ydV7NkLStPQFtD5c85KRwcHCCKIuzt7dG+fftqy12/fh1//vkndu3apfDH8A8//FCjdho6MSfpJE/3mJmZISkpqdrziYmJMDQ0lNoMZs6cic8//xy7du1CYmIipk2bhvz8fPndPoGBgQoLa5cuXYrTp0/j77//Rnx8PMaNG4e0tDRMnjwZQMV/vKGhoVi+fDmOHz+O69evIzAwEDY2NvJEqK64TwoRqbM+ffog7qaArNynrxfMlJUi7qYAX19flbY/fPhwaGpqYsmSJUqjFqIoIisrC8D/RjqeLCOKYo1H5vX19ZGTk6OiqKkhSP6Twt/fH59++inefPNNdOnSReFcfHw8PvvsM4wYMUJqMxg1ahQePHiARYsW4e7du3Bzc8OpU6fkC1/T09MVFqxmZ2djypQpuHv3LkxMTODh4YGff/5Z4Va62bNnIz8/H8HBwcjJyUGPHj1w6tQppU3faosLZ4lInXl6eqK1gyt2nLxR5d09QEUysPPUQ7Rx7AQPDw+Vtu/g4IDly5dj3rx5SE1NxdChQ9GiRQukpKTgyJEjCA4OxgcffABnZ2c4ODjggw8+QEZGBgwNDXHo0KEarxnx8PDAmTNnsH79etjY2MDe3h5du3ZV6Xuh+iU5SVm2bBlOnToFLy8vvPbaa/JbfG/cuIETJ07A0tISy5YtkxwoAEyfPr3a6Z2YmBiF1x999BE++uijp9YnCAKWLl2KpUuXqiS+StwnhYjUmSAIWBa+DlOCRgFf3UdQQFX7pGThfLIxPt+xtl6mTebOnYv27dvjo48+wpIlSwAArVq1Qv/+/fHaa68BqFhLeOLECcyYMQMrV66Ejo4Ohg0bhunTp6Nz587PbGP9+vUIDg7Ghx9+iMLCQkyYMIFJSiMjiCr4Fr1z5w7mzp2LY8eOyW/dMjQ0xNChQxEeHg4bGxvJgaqTyjUpPXv2rHJNyu3bt+V3NL3//vvw9PR83iGqxfw71yBUYD9UYD88+7PjeUtISMDC+e8j/a8bFXf7GIjIyhMQd1NAawdXLAtfp5LNOInqSiX/Wq2trbFr1y6IoqiwmVtTXbT05EhKWVlZA0ZCRFQ9FxcXHDzyHeLi4nDu3Dnk5srQ1tAIkxf4wsPDo8l+hpP6kJykpKenw8LCArq6uhAEAZaWlgrnCwsL8eDBg2p3BXwRPXlbG9ekEJE6EwQBnp6eDTLiS/Qsku/usbe3x5EjR6o9f/z4cdjb20ttplF5MknhSAoREVHdSE5SnrWkpbS0VGXP8GksON1DREQkXZ2me3JzcxXuPc/KykJ6erpSuZycHOzfv19hR9em4MmFeUxSiIiI6qZOScpHH30kv223clO00NDQKsuKoojly5fXPcJGiCMpRERE0tUpSenfvz8MDAwgiiJmz56NMWPGwN3dXaGMIAjQ19eHh4dHk1uQxTUpRERE0tUpSfH29oa3tzeAikdnDx8+HJ06dVJpYI0Z7+4hIiKSTvItyIsXL1ZFHC+UJ6d7KjeQIiIiotqRnKRoaGjUaMOfpjTtUdknlY8kJyIiotqTnKQsWrRIKUkpKytDamoqjh49CicnJwwaNEhqM42OpqYmHj9+3KSSMyIiIlWSnKSEhYVVe+7OnTv4z3/+g/bt20ttptGpnPJhkkJERFQ39brLmrW1Nd566y2VPQW5MalcPMskhYiIqG7qfStYfX19pKSk1HczaqdyJIVrUoiIiOqmXp9ZfuPGDWzYsKFJT/fw7h4iUmeiKOLKlSuIjo5GjiwXxkaG6NOnDzw9PfkUZGpwKnnAYNu2bZV+TE1N0blzZ9y7dw/r169XRayNSuV0D0dSiEhdJSQkYMCQYRgeOBkbTsVh17V72HAqDsMDJ2PAkGFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMrGxsfD394eRkRH09PTQu3dv/PTTT/LziYmJ0NXVRWBgoELdFy5cgKamJubMmVP3N02NiuSRlN69eytl24IgwMTEBA4ODhg9ejRMTU2lNtPocOEsEamzhIQEjAqchBzLjjAbOwnNDIzl5x7n5eDGxWMYFTgJB3ZHwcXFpcb1vvXWWzh48CCmT58OFxcXZGVl4cKFC0hMTIS7uzvOnTuHgIAAeHh4YPHixdDQ0MCOHTvg6+uL8+fPw8vLCx06dMCyZcswa9YsvPHGG3jttdeQn5+PiRMnwtnZWf5YFnrxCeKzHmNMSmQyGc6fP4+ePXvCyMioyjLvvPMOMjMz4ePjg6lTpz7nCP83zfTkww6ft4yMDNja2jZY++rQBwD7oRL7oWafHc+DKIoYMGQYbjw2g2XfwCqndURRxP0zu+HaLAvfHTtS46kfY2NjjBs3Dps2baqyTicnJ7Rt2xYnT56U11lYWIiOHTvC0dERp0+fBlAxCt27d28kJyfj999/x+LFi/Hpp5/i4sWLTe5RK01ZvS+cbap4dw8RqasrV67gRnIKzLyHVJt8CIIAU+8huJGcgri4uBrXbWxsjNjYWNy+fVvp3LVr15CcnIyxY8ciKysLmZmZyMzMRH5+Pl599VX8+OOP8ilyDQ0N7Ny5E3l5eQgICMCWLVswb948JihNjEqTlLy8PCQmJiIxMRF5eXmqrLrR4XQPEamr6OhoiNbOClM8VWluYAzR2hnnzp2rcd2rV6/GjRs30KpVK3h5eSEsLAx///03ACA5ORkAMGHCBFhYWCj8REZGori4GDKZTF6Xg4MDwsLCcPnyZXTs2BELFy6sw7ulxkwl456XL1/G7NmzceHCBYUsuGfPnli9enWTzHw5kkJE6ipHlgtR17BGZUVdQ8hkuTWue+TIkejZsyeOHDmC06dPY82aNYiIiMDhw4fl3w9r1qyBm5tbldcbGBgovK6c/rl9+zaysrLQsmXLGsdCjZ/kJCU2NhY+Pj7Q0tLC5MmT0aFDBwAVq7O//PJL9OrVCzExMfDy8pIcbGPCfVKISF0ZGxlCKEyuUVmhMBdGRu1qVb+1tTVCQkIQEhKC+/fvw93dHStWrMBHH30EADA0NETfvn2fWc+2bdvwww8/YMWKFVi5ciWmTp2KY8eO1SoWatwkT/csWLAAtra2SEpKwtatWzFjxgzMmDEDW7duRVJSEmxsbLBgwQJVxNqocLqHiNRVnz59INz5A4/zcp5arjQvB8KdP+Dr61ujesvKyhSmawDA0tISNjY2KC4uhoeHBxwcHLB27doqlwQ8ePBA/ntKSgpmzZqF119/HfPnz8fatWtx/Phx7N69u0ax0ItBJSMpixYtqnIIzsrKCsHBwdwWn4hIjXh6esK1nT1uXDz21Lt7Hl48hk7t28LDw6NG9T569AgvvfQS3njjDXTu3BkGBgY4c+YMLl++jHXr1kFDQwORkZEICAhAx44dERQUBFtbW2RkZCA6OhqGhoY4ceIERFHEpEmToKuri61btwIApk6dikOHDuHdd99F3759YWNjo9I+IfUkOUnR0NB46q6qZWVl8lGFpoQjKUSkrgRBwLpV4RgVOAn3z+yGmfcQpX1Ssi4eg/H937F2d1SNbz/W09NDSEgITp8+LV+D4ujoiC1btmDatGkAAB8fH1y8eBHLli3Dpk2bkJeXh5YtW6Jr167y7Ro2btyImJgYHDp0CBYWFvL6t2/fDldXV0yZMgXffvutCnuE1JXkJKVbt27YvHkzxo4dizZt2iicS09Px5YtW9C9e3epzTQ6TFKISJ25uLjgwO4ovD93Pm58sRCitTNEXUMIhbkQ7vwB13b2WFfLjdy0tLSwevVqrF69+qnl3NzccOjQoWrPVy4b+LdWrVopTSfRi01ykhIeHo5evXrB2dkZw4YNkz+nJykpCceOHUOzZs2wcuVKyYGqGz09PYiiWO0o0pPTPQ3x/B51SI4EQWjQZxepQx8A7IdK7IeKKRR14uLigu+OHUFcXBzOnTsHmaxikayv7yx4eHjw2T3U4CQnKV26dEFsbCwWLFiA48ePo6CgAEDFl7i/vz+WL19eq0xc3WVkZCAtLe2ZH7aVSQofMEhE6kwQBHh6ejbJrSJI/alknxQXFxccOXIE5eXl8tXZFhYWL+RaFFtbWxgYGOD8+fMQBKHaLbYr/wIRRbFBt+FuyLYb+r1XaugY2A8V2A/gyARRLan0X6uGhgasrKxUWWWjxbt7iIiIpHnxhjrUBBfOEhERScMkpZ5UDilzTQoREVHdMEmpJ5VJSmlpaQNHQkRE1DgxSaknTFKIiIikUenC2by8PGRnZ1e5F0Dr1q1V2ZTaY5JCREQkjeQkpaioCEuWLMH27duRlZVVbbmmtoCUSQoREZE0kpOUkJAQ7Nq1C0OHDkXPnj1hYmKiirgavcokpaysDOXl5S/knjFERET1SXKScvjwYUyePBmffvqpKuJ5YTy5YdTjx4+hpaXVgNEQEb14du7ciaCgIKSkpMDOzq6hw2mUYmJi0KdPH0RHR8PHx6ehw1Ei+c97QRDg7u6uilheKM2bN5f/XlJS0oCREBERNU6Sk5QhQ4bgzJkzqojlhVK54yzAvVKIiOrD+PHjUVhYiDZt2jR0KI1Wr169UFhYiF69ejV0KFWSPN2zcOFCjBw5EsHBwZg6dSpat26t8AVdydTUVGpTjcqTIylcPEtE6koURVy5cgXR0dHIyZXB2NAIffr0gaenp9o/a0hTU7PK7xuqOQ0NDejo6DR0GNWSPJLSrl07XL16FZGRkfDy8kLLli1hYWGh9NPUPLkmhUkKEamjhIQEDBg2BMMnBWLDz6ewK+0aNvx8CsMnBWLAsCFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMl9//TU8PDygq6sLc3NzjBs3DhkZGUp1/fHHHxg5ciQsLCygq6sLJycnLFiwQH5+586dEAQBqampCtdt2bIFHTt2hLa2NmxsbPD2228jJydHqf7Y2Fj4+/vDyMgIenp66N27N3766SelchkZGfi///s/2NjYQFtbG/b29pg2bZrCVP7ff/+NESNGwNTUFHp6evjPf/6Db7/9VqGemJgYCIKAr776CitWrMBLL70EHR0dvPrqq7h586ZSuzXpp4kTJ8LAwADp6ekYNGgQDAwMYGtri82bNwMArl+/Dl9fX+jr66NNmzb44osvqowpJiZGqW8GDBgAExMT6Ovr4+WXX8Ynn3yiFGN9kzySsmjRIrXPthsCkxQiUmcJCQkYFRSIHEdLmM15E80MDeTnHufm4cbJnzEqKBAHduyGi4tLjet96623cPDgQUyfPh0uLi7IysrChQsXkJiYCHd3d/li11deeQUrV67EvXv38Mknn+Cnn37C1atXYWxsDAD47bff0LNnTzRv3hzBwcGws7PDX3/9hRMnTmDFihXVth8WFoYlS5agb9++mDZtGpKSkrB161ZcvnwZP/30k3yU+9y5cwgICICHhwcWL14MDQ0N7NixA76+vjh//jy8vLwAALdv34aXlxdycnIQHBwMZ2dnZGRk4ODBgygoKICWlhbu3buHbt26oaCgADNmzICZmRl27dqF1157DQcPHsSwYcMUYly1ahU0NDTwwQcfQCaTYfXq1XjzzTcRGxsrL1PTfgIq7iINCAhAr169sHr1auzbtw/Tp0+Hvr4+FixYgDfffBPDhw/Htm3bEBgYCG9vb9jb21fbhz/88AMGDRoEa2trvPvuu2jZsiUSExPxzTff4N13363xfwsqIVKt5eTkiCdOnBBzcnKqLRMTEyOOHj1aHD16tJicnPwco6tQWloqlpaWPvd2n3Tr1q0GbV8d+kAU2Q+V2A81++x4HsrLy0X/IYPFl8YGiO5H14sexz5S+nE/ul58aWyA6D9ksFheXl7juo2MjMS33367ynMlJSWipaWl6OrqKhYWFsqPf/PNNyIAcdGiRfJjvXr1Elu0aCGmpaUpxV5px44dIgAxJSVFFEVRvH//vqilpSX2799fLCsrk5fbtGmTCECMioqS19GuXTvRz89Pob6CggLR3t5e7Nevn/xYYGCgqKGhIV6+fFnp/VReGxoaKgIQz58/Lz/36NEj0d7eXrSzs5PHEh0dLQIQO3ToIBYXF8vLfvLJJyIA8fr167XupwkTJogAxPDwcPmx7OxsUVdXVxQEQdy/f7/8+B9//CECEBcvXiw/VhlTdHS0KIqi+PjxY9He3l5s06aNmJ2dXW3fPy/cvKOecCSFiNTVlStXcOOvZJgFdKt2JFwQBJj6d8ONv5IRFxdX47qNjY0RGxuL27dvV9nu/fv3ERISorAOYuDAgXB2dpZPjzx48AA//vgjJk2apLRb+dNG7s+cOYOSkhKEhoYq7E01ZcoUGBoayuu/du0akpOTMXbsWGRlZSEzMxOZmZnIz8/Hq6++ih9//BHl5eUoLy/H0aNHMXjwYHh6elbZRwDw3XffwcvLCz169JCfMzAwQHBwMFJTU5WmzYKCghS2pejZsyeAiimj2vTTkyZPniz/3djYGE5OTtDX18fIkSPlx52cnGBsbCxvpypXr15FSkoKQkNDFUZrnny/z5NKk5S8vDwkJiYiMTEReXl5qqy60eHCWSJSV9HR0RAdrBWmeKrS3MgAooM1zp07V+O6V69ejRs3bqBVq1bw8vJCWFiY/EsxLS0NQMWX5b85OzvLz1eWd3V1rXG7T6tfS0sLbdu2lZ9PTk4GAEyYMEFp/WRkZCSKi4shk8nw4MED5ObmPjOOtLS0Kt9Thw4dFOKq9O/Eq3IT1Ozs7Ke+D0Cxnyrp6Ogorf00MjLCSy+9pJRYGBkZydupyl9//QWg9n1fX1Ty7J7Lly9j9uzZuHDhAsrLywFUrBju2bMnVq9eXWUG+qJ7csU5kxQiUic5uTKILXRrVFZsoQtZrqzGdY8cORI9e/bEkSNHcPr0aaxZswYRERE4fPhwXcNVucrvqTVr1sDNza3KMgYGBnj48GG9tF/dHUliFc+9k1KfqttpCJKTlNjYWPj4+EBLSwuTJ0+WZ46JiYn48ssv0atXL8TExMgXITUVHEkhInVlbGgE4VFhjcoKjwphZGhUq/qtra0REhKCkJAQ3L9/H+7u7lixYgXWrFkDAEhKSoKvr6/CNUlJSfL9Ttq2bQsAuHHjRq3arbw+KSlJXgdQsaFmSkoK+vbtCwBwcHAAABgaGsqPVcXCwgKGhobPjKNNmzZISkpSOv7HH38oxFWX9/G0fqoPlX1z48aNp/bN8yJ5umfBggWwtbWVr6CeMWMGZsyYga1btyIpKQk2NjYKt4w1FVyTQkTqqk+fPhD+uoPHuU+fli+V5UH4647SF2V1ysrKIJMpjrpYWlrCxsYGxcXF8PT0hKWlJbZt24bi4mJ5mZMnTyIxMREDBw4EUJEc9OrVC1FRUUhPT1eo72mjAH379oWWlhY2bNigUG779u2QyWTy+j08PODg4IC1a9dWuTThwYMHACpmBIYOHYoTJ07gypUrSuUq2xgwYAB++eUXXLx4UX4uPz8fn332Gezs7Gp1dxSAGvdTfXB3d4e9vT0+/vhjpdu2G2IERiUjKYsWLULLli2VzllZWSE4OBjLli2T2kyj8+RIypP/kRERNTRPT0+4OrTDjZM/w3JkvyoXRIqiiIenfkYnx/bw8PCoUb2PHj3CSy+9hDfeeAOdO3eGgYEBzpw5g8uXL2PdunVo3rw5IiIiEBQUhN69e2PMmDHyW2vt7Ozw3nvvyevasGEDevToAXd3dwQHB8Pe3h6pqan49ttvce3atSrbt7CwwLx587BkyRL4+/vjtddeQ1JSErZs2YJXXnkF48aNA1CRfERGRiIgIAAdO3ZEUFAQbG1tkZGRgejoaBgaGuLEiRMAgPDwcJw+fRq9e/dGcHAwOnTogDt37uDrr7/GhQsXYGxsjLlz5+LLL79EQEAAZsyYAVNTU+zatQspKSk4dOhQrR8wW5t+UjUNDQ1s3boVgwcPhpubG4KCgmBtbY0//vgDv//+O77//vt6a7sqkpMUDQ2Np277XlZW1iSfAKytrS3/vaioqAEjISJSJAgC1oWvwqigQNz/6geYBXRT2icl6+TPML55H2t37K7xXR16enoICQnB6dOncfjwYZSXl8PR0RFbtmzBtGnTAFRsPqanp4dVq1Zhzpw50NfXx7BhwxAREaFwN0nnzp1x6dIlLFy4EFu3bkVRURHatGmjcLdKVcLCwmBhYYFNmzbhvffeg6mpKYKDgxEeHq7wx6OPjw8uXryIZcuWYdOmTcjLy0PLli3RtWtXTJ06VV7O1tYWsbGxWLhwIfbt24fc3FzY2toiICAAenp6ACr+IP/5558xZ84cbNy4EUVFRXj55Zdx4sSJOo961LSf6oOfnx+io6OxZMkSrFu3DuXl5XBwcMCUKVPqtd2qCKLE8ZuAgABcv34dP/30k9I8WXp6Orp3745OnTrh6fFx+AAAIABJREFUu+++kxSoOpHJZDh//jx69uwJI6Oq52ozMjIwb948lJaW4vXXX8cbb7zxXGOsTByfnHZ63jIyMmBra9tg7atDHwDsh0rsh5p9djxPCQkJeH/+XNz4KxmigzXEFroQHhVC+OsOXB3aYV34qlpPVRCpkuR/reHh4ejVqxecnZ0xbNgwtG/fHkDF4p5jx46hWbNmWLlypeRAGyNtbW2UlpZyuoeI1JKLiwu+O3IMcXFxOHfuHGS5MhgZGsH3Q194eHhwN3FqcJKTlC5duiA2NhYLFizA8ePHUVBQAKBi2M/f3x/Lly9vspm4jo4O8vLyON1DRGpLEAR4eno2ya0iSP2pZLGIi4sLjhw5gtzcXNy5cwd37txBbm4uDh8+rNIEZfPmzbCzs4OOjg66du2KX375pdqyn3/+OXr27AkTExOYmJigb9++SuUnTpwIQRAUfvz9/VUWb+VOgUxSiIiIak+lK1o1NDRgZWUFKysrlS+WPXDgAGbOnInFixcjPj4enTt3hp+fH+7fv19l+ZiYGIwZMwbR0dG4ePEiWrVqhf79+ys9QdLf31+eWN25cwdffvmlymJmkkJERFR3jea2m/Xr12PKlCkICgqCi4sLtm3bBj09PURFRVVZft++fQgJCYGbmxucnZ0RGRmJ8vJynD17VqGctrY2WrZsKf+p3J5YFZikEBER1V2jSFJKSkoQFxensPudhoYG+vbtq7B5ztMUFBSgtLQUpqamCsdjYmJgaWkJJycnTJs2DVlZWSqLuzJJ4cJZIiKi2msUSUpmZibKyspgZWWlcNzKygp3796tUR1z5syBjY2NQqLj7++P3bt34+zZs4iIiMB///tfBAQEoKysTCVxV+6VwpEUIiKi2mvYjROek1WrVmH//v2IiYlReOz16NGj5b936tQJL7/8MhwcHBATE4NXX31Vcruc7iEiIqo7lSUpxcXFiI+Px/3799G9e3eYm5urqmqYm5tDU1MT9+7dUzh+7969Krfjf9LatWuxatUqnDlzBi+//PJTy7Zt2xbm5ua4efOmUpKSkZGB27dvA6h4Fo+Ojg6ys7OrfO4DUJGYVG4eVVBQoLRgt75V7tHXkPscFBUVPff3/SR16AOA/VCJ/cDneBHVlkqSlA0bNiAsLEz+YKkffvgBvr6+yMzMhLOzM1avXo1JkybVuX4tLS14eHjg7NmzGDp0KADIF8FOnz692utWr16NFStW4Pvvv6/RHgC3bt1CVlYWrK2tlc7Z2trKd8us3DXSxMTkqTvOViZqJSUlz32nTXXYXZM7jFZgP1RgP0Dp4XtE9HSS16Ts2LEDoaGh8Pf3x/bt2xWekmhubg5fX1/s379fajOYOXMmPv/8c+zatQuJiYmYNm0a8vPzERQUBAAIDAzEvHnz5OUjIiKwcOFCREVFwc7ODnfv3sXdu3flIx95eXmYNWsWLl26hNTUVJw9exZDhgyBo6Mj/Pz8JMcL/G+6p7S0VGXrXIiIiJoKyX9SrFu3DkOGDMEXX3xR5Z0xHh4e2LBhg9RmMGrUKDx48ACLFi3C3bt34ebmhlP/r707j4riSvsH/q1u9n0HMQpoDKK4gYG4EFGJu2IyMaIxwbiQxDjGyWTUOYlLRl+DmuhEX4/ihmgS9WcS9ThOHINHE41oVHSMGM1oEBNEwiqbLN19f3/wdg9tN0ov2N34/ZzTB7rqVt3n3i7goarurcOHNTfT3rp1S2tulg0bNqC+vl7nmTmLFy/GkiVLIJfLcenSJWRkZKC8vBzBwcEYNmwYli5dqvVwQFOoHz4FND6228PDwyz7JSIiehyYnKRcv34dc+bMaXa9j4+P2Yb1zp49u9nLO8ePH9d6f/PmzQfuy9nZudUfOe3m9t+nijJJISIiMozJl3u8vLxQXFzc7PorV6489ObWtqppklJZWWnBSIiIiGyPyUnKqFGjsGnTJpSXl+usy8nJwebNmzFu3DhTq7FJTZOU5kYBERERkX4mJynLli2DUqlEZGQk3n//fUiShIyMDEyZMgV9+/ZFQEAAFi1aZI5YbQ6TFCIiIuOZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36tFnnTLElTFKIiIiMZ5YJAwICArBlyxZs2bIFRUVFUKlU8Pf3N/uTkG2Ng4MDHBwcUF9fzySFiIjIQCZnEQqFAhUVFZr3/v7+CAwM1CQoFRUVmkmUHkfqsylMUoiIiAxjcpIyZ84c9O/fv9n1AwYMwJ///GdTq7FZTFKIiIiMY3KScvjwYZ0J05p68cUX8c9//tPUamyWem4UTodNRERkGJOTlNu3bz/weRzBwcEWfaiYpXl7ewMAysrKLBwJERGRbTE5SfH19cW1a9eaXf/TTz891jOtMkkhIiIyjslJyogRI5CWloYLFy7orMvOzsamTZswcuRIU6uxWT4+PgAaH1N/7949C0dDRERkO0wegrx06VIcPnwYMTExGDduHLp37w4AuHz5Mg4ePIiAgAAsXbrU5EBtlfpMCtB4NsXZ2dmC0RAREdkOk5OU4OBgnDt3DgsWLMCBAwewb98+AI03jL788stYvnw5goODTQ7UVjVNUkpLSx/rviAiIjKEWSZza9euHTIyMiCEQFFREYDG+VIkSTLH7m2a+nIPALM9DZqIiOhxYJYkRU2SJAQEBJhzlzbP29sb9vb2aGhoQGFhoaXDISIishlmTVKqqqpQVlYGIYTOuo4dO5qzKpshk8kQEBCA/Px83Llzx9LhEBER2QyTk5Ta2lp88MEH2Lp16wMvZyiVSlOrsllBQUFMUoiIiAxkcpIya9YsZGRkYPz48YiLi9O6UZQaBQUFAQAKCwshhOC9OkRERC1gcpLy1VdfYcaMGUhLSzNHPG1SYGAgAKCmpgaVlZWP9eR2RERELWXyZG6SJCEqKsocsbRZTR8bcOvWLQtGQkREZDtMTlISExORmZlpjljarNDQUM33N2/etFgcREREtsTkJGXhwoX45ZdfkJKSgvPnz6OoqAilpaU6r8eZi4uL5pJPbm6uhaMhIiKyDSbfk9KlSxcAwIULF7B169Zmyz3Oo3uAxrMphYWFTFKIiIhayOQkZdGiRRyt0gKdO3fGmTNnUFBQgPLycnh5eVk6JCIiIqtmcpKyZMkSM4TR9qkfvAgAOTk5GDBggAWjISIisn4m35OiVldXh6ysLBw4cADFxcXm2m2bERoaCldXVwCNSQoRERE9mFmmxV+7di2WLFmCu3fvAgC++eYbDBkyBMXFxejatStWrlyJadOmmaMqq+Hi4gIhBBQKhd71kiTprIuIiMC5c+fw73//G/X19ZDJzJYj6rCGe4D09cGjZA19ALAf1NgP0PvIECJqnsl/JdPT0zF37lyMGDECW7du1foh9PPzw5AhQ7B7925Tq7Ea+fn5yMnJMeqXrXo+mdLSUly/ft3coREREbUpJp9J+fjjj5GYmIjPP/9c77N7oqOjsXbtWlOrsRrt27eHm5sbTpw4AUmSYGenvwuFEDrrYmNjsW3bNigUCpw5cwbdunVr9Xibi+9R0NcHlmDpGNgPjdgP4CADIgOZfCbl+vXrGDlyZLPrfXx8HvjgwceJi4sLevfuDQA4efIkamtrLRwRERGR9TI5SfHy8nrgjbJXrlzRPGCPgOeeew5A43N8jh8/btlgiIiIrJjJScqoUaOwadMmlJeX66zLycnB5s2bMW7cOFOraTN69OiBDh06AAD+8Y9/oK6uzsIRERERWSeTk5Rly5ZBqVQiMjIS77//PiRJQkZGBqZMmYK+ffsiICAAixYtMkesbYIkSUhMTAQAlJSU4NChQxaOiIiIyDqZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36NPz8/MwRa5vRv39/zeME9u3bh7y8PAtHREREZH3MMlFHQEAAtmzZgtLSUhQWFqKgoABlZWXYtm0bAgICzFFFmyJJEqZNmwa5XA6FQoG1a9eiqqrK0mERERFZFbPPJubv74/AwMBWnaisLQgNDUVSUhIA4Pbt2/joo4842oeIiKgJZhIWNHr0aDz77LMAgGvXruFvf/ub3huQiYiIHkcmJykymQxyufyhL9IlSRJmzpyJmJgYAEBubi7mz5+Pc+fOWTgyIiIiyzN56sVFixbpzKKoVCpx8+ZN7N+/H+Hh4RgzZoyp1bRZdnZ2ePvtt/Hpp5/i66+/RkVFBT7++GP07t0bSUlJCAkJsXSIREREFmFykrJkyZJm1xUUFOCZZ57BU089ZWo1bZpMJsOrr76KiIgIbNmyBRUVFbh48SIuXryIyMhIDBs2DL1794a9vb2lQyUiInpkWvUhFu3atcMbb7yBpUuXYtKkSa1ZVZvw9NNPIyIiAvv27cORI0egUChw+fJlXL58Gc7OzoiOjkbv3r3RrVs3eHt762yvUqlw+vRpnDx5ElVVVfD29sbIkSPRtWtXC7SGiIjINK3+pC1XV1fk5ua2djVthpubG1555RWMGjUKmZmZyMzMRFVVFe7du4eTJ0/i5MmTABrnp+ncuTNCQ0MREhKCoqIifPzhUhTc/Bm9/QTc7FW4fk+GjA1r0DtmAJalrsITTzxh4dYRERG1XKsmKZcvX8batWt5uccIvr6+mDhxIl544QX8+OOPOHPmDM6fP4/q6moAjcOWb9++jRMnTqC4uBg/XTiNsWECb8a4wsfNEXZ2DrCzs8PvNQI7Lx7DyxOex47dX/AeFyIishkmJylhYWF6Hz9eXl6Ou3fvwsXFBfv37ze1mseWvb09oqKiEBUVBZVKhby8PFy5cgVXr17FzZs3UVhYiJ9/vIDxnVQYH+4IoAFVVQ2a7WUApjwp8L/ncjAiYTAGxg+Fu7s7XF1d4erqChcXF7i6usLNzQ0uLi5wcXGBk5MTHB0dH/jiiC0iImptJicpgwYN0klSJEmCt7c3OnfujKSkJPj4+JhaDaHxBtuwsDCEhYVh9OjRAIAvv/wSuRe+w5TYdpALJRoaGqBQKNCgUECoVAAAuUzC8+H2WPR9IQoLC80yu62dnR3s7e1hZ2en8736pVQq4erqqnedTCbTDF9Xf3//e/X3LSkjk8kgSZLWS6VSQZIkzQ3H+sqoJx1s+r2+cveXb66MWtP31dXVqKio0ClzfzljljX92twyIYQhHy0RkdUwOUnZvn27GcIgY2V9fwKDOtohwMdLa7kQAiqVCkqlEkqlEgEBSnS/UYKgoCBERUWhurpa52XIjLcKhQIKhcLczaFH5GGJjb5l6vf6zpy2pC4hRLPbGrrPltZprnLmqtfX1xcDBw40a51EbZnBScqtW7eMqqhjx45GbUcPdre0GJ1c9H+M6jMNau29axAZGYk33nhDb3mlUomamhrU1dU99FVbWwul8r9nbpq+1MuqqqpgZ2entUyhUECpVGoSKPVXIYTOcp4BaD1N+5b9/Og4OztbOgQim2JwkhIaGmrUfx9KpdLgbejhPLx8UPZby/q2vA5wd3dvdr1cLoe7u/sDyxgiPz8f7du3N3p7lUql87o/kWn6vXob9VeFQgEhBGQyGYQQWq+m5e5f11yZlpYH/vuHXwiB8vJyeHl5aS3TV+7+ZU3f61vW0n2pY1ZfqjI0DlMSmqblKysr9R5b5k6Smtuf+vO5vx9au96m7OxafUAlUZti8E9Menp6a8RBRhqcMAyp3+zDawoVHO2af8rBzdJa5FbIEB8f/+iCM5H6XhNjqS9HWfoPg6nJmqnYD42soR/u3r2LEydOWKx+Iltj8E9rnz59EBISAk9Pz9aIhwz03HPP4ePlQfjqUhEmRfnpLaMSAp9ll+Hpgc8hLCzsEUdIRERkHIP/Te3Tpw8OHTqkeT9kyBAcPXrUrEFRyzk6OuJvK1bjq1/s8Om5IlTVaV/6KapqwKpjv+MXEYz3F//NQlESEREZzuAzKc7OzqipqdG8P378OGbMmGHWoMgwcXFxWLspA0sXLsCh/TcRHaCCuwPwe42Ef5fI8FSPaGxbtYY3LxMRkU0xOEnp1asXVq9eDblcrrnkc/bsWTg5OT1wuxdeeMG4CKlFBgwYgEPfHMd3332HEydOoLqyAuG+fpg7ciR69uxp9qGWRERErc3gJOWTTz7Biy++iOnTpwNonBfgk08+wSeffNLsNpIkcXTPIyCXyzF48GAMHjzYKm4SJCIiMoXBf8H69u2L69ev48aNGygsLER8fDzee+89JCQktEZ8j8T69euxatUq3LlzB7169cK6desQExNj0D7OnDmD5cv/B2fPfIu62lpACDTIneHu7Y2OISGYMCYREyZMQIcOHVqpFURERG2LUf9m29nZITw8HOHh4UhOTsaYMWMQGxtr7tgeiT179uCdd97Bxo0bERsbi7///e8YPnw4rl27hoCAgBbtY968eUjf/Am6hciQMhzwcVGiWmmH73+uxw//+R2Xql3w69dfYENGOlYs/oCXvoiIiFrA5GsBtj5vyurVqzFz5ky89tprAICNGzfi0KFD2LZtGxYsWPDQ7VNTU7Fj69/xp5fcEdtVDkVtBSQfH8DeHtHPADd/q8eKz26hNCQIzv26450l78Pd3R3PPfdcazeNiIjIphk/U5YeVVVV+PXXX3Hr1i2dlzWqr6/H+fPntS5VyWQyJCQkICsr66HbKxQK/O/aVZic4IQBUd5Q1NwFvNyB/3ugHQCEPuGA2S84Qjp9Dg5hwRBxkVi++iOt2UmJiIhIl8lJSm1tLf76178iICAAnp6eCA0N1Typt+nLGhUXF0OpVCIwMFBreWBgIO7cufPQ7Xfs2AFJWYkh/fygvFcF2MkhOTrolOv2pCM6+atQcOwHeA/og+u3f8Xp06fN1g4iIqK2yOTLPbNmzUJGRgbGjx+PuLg4eHt7myMum5CVlYWIjnbwcHNAfVkp4KSboACNo5ue6SrD9Zu3YOfmDKmDP3788Uf079//EUdMRERkO0xOUr766ivMmDEDaWlp5ojnkfLz84NcLkdhYaHW8sLCQgQFBWkty8/Px+3btwEADQ0NcHJyQnV1Nezk//cAOAjgAXOR2MkBoVCgoaEeKgBFRUXIz883e5vU1A87s+T8KLW1ta3axoexhj4A2A9q7IfG3x1E1HImJymSJCEqKsocsTxyDg4OiI6OxtGjRzF+/HgAjU9KPXr0KGbPnq1Vtn379pqHo6kfEtalSxccycmEUikgk9tD1VDbbF03bqsAX1/YyeSQiu6ie/furfqwNWuYJ4UPlGvEfmjEfmj83UFELWfyPSmJiYnIzMw0RywW8c4772Dz5s3IyMjATz/9hDfffBPV1dWa0T4PMmvWLJRUyXEhpwxyZzegrgHQM2ldSbkCP/wH8Ivri8rLN+AJO47uISIiegiT/6VYuHAhXnrpJaSkpOD1119Hx44dIZfLdcr5+PiYWlWrmDhxIoqKirBo0SLcuXMHvXv3xuHDh3VuptUnMDAQMf2GIP0fRxD6hAs87JygKq+E5OOpufRTVy+wZV817rVrjwA/H5Rt2o+ZL02Bm5tbazeNiIjIpklCfaHWSDLZf0/GPOhab1uaFl99uScuLg729vbo3y8WteU/Y/xAR8Q8qYCrC9Dg5ITsGwIHv2/AtTovuPfrC9m/czEmZiDWrvk77JsMU24N1nBqm6f3G7EfGrEftH93qJ99RkTNM/mnddGiRRa/Ic+SXFxccPrMWfzpT3/Cpwf+H3b86x6c7AVq62tRrZCh3sEd3r5OeOq3e5j2x3cxefJkvWeaiIiISJvJScqSJUvMEIZtc3JywoYNG7Bu3Trs378fBQUFuHfvHqKjo2Fvbw8vLy9ERkZqnXUiIiKiBzMqScnOzjZ4G1sdAfQglZWVOsvUN8SWlZVpzRmjr2xrspbhlpYczWANfQCwH9TYD4/+9wCRrTMqSenbt2+Lf9CFEJAkqU3dk+Lg4AC5XI6LFy9aOhQisjGSJMHBQf/Ej0SkzagkxdYfKmgqZ2dnxMfHo76+vtkyOTk56N69+yOMyvpisHT9jIExWGMMP/30E5ydnS0aA5GtMCpJSU5ONnccNsfZ2fmBv2js7e0tfvd+SEiIRWNgHzRiPzRiPzTijfNELcc7OdswSw73tBbsg0bsh0bsByLbwiSllQQHB1s6BItjHzRiPzRiPzRiPxC1nMmTuRERERG1Bp5JISIiIqvEJIWIiIisEpMUM1u/fj1CQ0Ph5OSE2NhY/PDDD5YOyWwMadvmzZsRFxcHb29veHt7IyEhQaf81KlTIUmS1mvEiBGt3QyTGdIP27dv12mjk5OTVhkhBBYtWoR27drB2dkZCQkJ+M9//tPazTCZIf0QHx+v0w+SJGH06NGaMrZ6PLTEd999h7FjxyI4OBiSJGH//v2WDonIJjBJMaM9e/bgnXfeweLFi5GdnY1evXph+PDh+P333y0dmskMbdvx48cxadIkHDt2DFlZWejQoQOGDRuG/Px8rXIjRoxAQUGB5rVr165H0RyjGfMZe3h4aLUxLy9Pa/3KlSuxdu1abNy4EWfOnIGrqyuGDx+O2tra1m6O0Qzth6+++kqrDy5fvgy5XI4JEyZolbO146Glqqur0atXL6xfv97SoRDZFkFmExMTI9566y3Ne6VSKYKDg8WHH35owajMw9S2KRQK4e7uLjIyMjTLkpOTRWJiotljbU2G9kN6errw9PRsdn8qlUoEBQWJVatWaZaVl5cLR0dHsWvXLvMFbmamHg9r1qwR7u7uoqqqSrPMFo8HYwAQ+/bts3QYRDaBZ1LMpL6+HufPn0dCQoJmmUwmQ0JCArKysiwYmenM0baamho0NDTAx8dHa/nx48cREBCA8PBwvPnmmygpKTFr7OZkbD9UVVUhJCQEHTp0QGJiInJycjTrcnNzcefOHa19enp6IjY21mqPG3McD1u3bkVSUhJcXV21ltvS8UBErY9JipkUFxdDqVQiMDBQa3lgYCDu3LljoajMwxxtmz9/PoKDg7X+sI0YMQI7duzA0aNHsWLFCnz77bcYOXKk1T7nyZh+CA8Px7Zt23DgwAF8+umnUKlU6N+/P3777TcA0GxnS8eNqcfDDz/8gMuXL2PGjBlay23teCCi1mfUtPhEhkhNTcXu3btx/PhxrZtGk5KSNN/36NEDPXv2ROfOnXH8+HEMHTrUEqGaXb9+/dCvXz/N+/79+yMiIgJpaWlYunSpBSOznK1bt6JHjx6IiYnRWv44HA9EZBieSTETPz8/yOVyFBYWai0vLCxEUFCQhaIyD1Pa9tFHHyE1NRVHjhxBz549H1i2U6dO8PPzw/Xr102OuTWY4zO2t7dHnz59NG1Ub2dLx40p/VBdXY3du3dj+vTpD63H2o8HImp9TFLMxMHBAdHR0Th69KhmmUqlwtGjR7X+k7ZFxrZt5cqVWLp0KQ4fPoy+ffs+tJ7ffvsNJSUlaNeunVniNjdzfMZKpRI//vijpo1hYWEICgrS2mdFRQXOnDljtceNKf2wd+9e1NXVYcqUKQ+tx9qPByJ6BCx9525bsnv3buHo6Ci2b98urly5IlJSUoSXl5e4c+eOpUMz2cPa9sorr4gFCxZoyqempgoHBwfxxRdfiIKCAs2rsrJSCCFEZWWlePfdd0VWVpbIzc0VmZmZIioqSnTp0kXU1tZapI0tYWg/fPDBB+Jf//qXuHHjhjh//rxISkoSTk5OIicnR1MmNTVVeHl5iQMHDohLly6JxMREERYWJu7du/fI29dShvaD2sCBA8XEiRN1ltvq8dBSlZWV4sKFC+LChQsCgFi9erW4cOGCyMvLs3RoRFaNSYqZrVu3TnTs2FE4ODiImJgYcfr0aUuHZDYPatugQYNEcnKy5n1ISIgAoPNavHixEEKImpoaMWzYMOHv7y/s7e1FSEiImDlzpk0kdIb0w9y5czVlAwMDxahRo0R2drbW/lQqlVi4cKEIDAwUjo6OYujQoeLatWuPqjlGM6QfhBDi6tWrAoA4cuSIzr5s+XhoiWPHjun9ebi/j4hIGx8wSERERFaJ96QQERGRVWKSQkRERFaJSQoRERFZJSYpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUlKG3P27Fn0798frq6ukCQJFy9exPbt2yFJEm7evGnp8IiMsnLlSnTt2hUqlcqo7Tdu3IiOHTuirq7OzJERUWtikmJl1AnFuXPnDN62oaEBEyZMQGlpKdasWYOdO3ciJCSkFaIkADh16hSWLFmC8vJyxmEAQ+OtqKjAihUrMH/+fMhkxv3Kmjp1Kurr65GWlmbU9kRkGUxS2pAbN24gLy8P7777LlJSUjBlyhR4e3tbOqw269SpU/jggw8snhxYSxwtZWi827Ztg0KhwKRJk4yu08nJCcnJyVi9ejX4TFUi28EkpQ35/fffAQBeXl4WjoT0qa6utnQINik9PR3jxo2Dk5OTSft56aWXkJeXh2PHjpkpMiJqbUxSrNySJUsgSRKuX7+OqVOnwsvLC56ennjttddQU1OjKTd16lQMGjQIADBhwgRIkoT4+Hi9+5w6dSpCQ0Obret++fn5mDZtGgIDA+Ho6Iju3btj27ZtRsXZdJ/Tp09HcHAwHB0dERYWhjfffBP19fUG1ducCxcuYOTIkfDw8ICbmxuGDh2K06dPN9vmlsbddLu//OUvAICwsDBIkqR13496v1euXMHkyZPh7e2NgQMHGtS2vLw8zJo1C+Hh4XB2doavry8mTJigdW9RS+P4+eefMWXKFHh6esLf3x8LFy6EEAK//vorEhMT4eHhgaCgIHz88cc6bTXn5/+weO+Xm5uLS5cuISEhQe/6EydO4JlnnoGzszPCwsKwfv16AMD48ePx8ssva5WNjo6Gj48PDhw4oHdfRGR97CwdALXMSy+9hLCwMHz44YfIzs7Gli1bEBAQgBUrVgAAXn907/MmAAAIVklEQVT9dbRv3x7Lly/HnDlz8PTTTyMwMNDkegsLC/HMM89AkiTMnj0b/v7++PrrrzF9+nRUVFRg7ty5BsUJALdv30ZMTAzKy8uRkpKCrl27Ij8/H1988QVqamrg4OBgcL1N5eTkIC4uDh4eHpg3bx7s7e2RlpaG+Ph4fPvtt4iNjTW4f+/3wgsv4Oeff8auXbuwZs0a+Pn5AQD8/f21yk2YMAFdunTB8uXLNZcZWtq2s2fP4tSpU0hKSsITTzyBmzdvYsOGDYiPj8eVK1fg4uLS4jgmTpyIiIgIpKam4tChQ1i2bBl8fHyQlpaGIUOGYMWKFfjss8/w7rvv4umnn8azzz7bKp9/S+NVO3XqFAAgKipK77qEhAT06NEDq1atwqlTpzB79my0a9cOR44c0ZvQRkVF4fvvv9dbFxFZIUFWJT09XQAQZ8+eFUIIsXjxYgFATJs2Tavc888/L3x9fbWWHTt2TAAQe/fu1bvP3NxcIYQQycnJIiQkRKdudV1NTZ8+XbRr104UFxdrLU9KShKenp6ipqbG4DhfffVVIZPJNG1sSqVSGVSvPuPHjxcODg7ixo0bmmW3b98W7u7u4tlnn9Xb5pbEfb9Vq1Zp9au+/U6aNElnXUvbpq+NWVlZAoDYsWOHQXGkpKRolikUCvHEE08ISZJEamqqZnlZWZlwdnYWycnJBsdqSD8+KN77vf/++wKAqKys1Fk3ZMgQ4ebmJkpLS4UQjcdO7969RVBQkLCzsxNlZWU626SkpAhnZ+eH1ktE1oGXe2zEG2+8ofU+Li4OJSUlqKioaLU6hRD48ssvMXbsWAghUFxcrHkNHz4cd+/eRXZ2tkFxqlQq7N+/H2PHjkXfvn116pQkyah61ZRKJY4cOYLx48ejU6dOmuXt2rXD5MmTcfLkSb191lr9e/9+DWmbs7OzZruGhgaUlJTgySefhJeXV7Ptb86MGTM038vlcvTt2xdCCEyfPl2z3MvLC+Hh4fjll18MjrW59prajyUlJbCzs4Obm5vW8oaGBpw8eRJjxozR3BwuSRLGjBmDO3fuIC4uTu+9Wd7e3rh3794DL+URkfXg5R4b0bFjR6336l/MZWVl8PDwaJU6i4qKUF5ejk2bNmHTpk16y6hv1m1pnEVFRaioqEBkZKRZ6226bU1NDcLDw3XWRUREQKVS4ddff0X37t0NittYYWFhOvG1tG337t3Dhx9+iPT0dOTn52uNSrl7965BcdzfPk9PTzg5OWkutzRdXlJSYnCszdXTWsdpcXEx6uvr8dRTT2kt79OnDwBg7NixerdT96G+e6+IyPowSbERcrlc73JhxHDK5n5BK5VKrffqibOmTJmC5ORkvdv07NlT67054jSmXlOZs3+bano2BDCsbX/84x+Rnp6OuXPnol+/fvD09IQkSUhKSjJ4UjN97XtYmy31+Tfl6+sLhUKByspKuLu7a5arR/rcfyyrz56o76m5X1lZGVxcXHQ+FyKyTkxSHkPe3t5656jIy8vTeu/v7w93d3colcpmR1cYyt/fHx4eHrh8+fIDyxhbr7+/P1xcXHDt2jWddVevXoVMJkOHDh0MjlsfY/4bN6RtX3zxBZKTk7VG3NTW1up8dq11VqA1Pn/AsHi7du0KoHGUT9OEyNvbG66urrh165ZW+YMHDwJoHJEUHR2ts7/c3FxEREQYEzYRWQDvSXkMde7cGXfv3sWlS5c0ywoKCrBv3z6tcnK5HH/4wx/w5Zdf6k0qioqKDK5bJpNh/PjxOHjwoN5ZdYUQJtUrl8sxbNgwHDhwQGtYa2FhIT7//HMMHDjQbJcdXF1dAcCgSdQMaZtcLtc5A7Fu3TqdM17GxGHuWA1hSLz9+vUDAL3HyqBBg7B//37N/S7V1dX47LPPAABnzpzRu7/s7Gz079/fqLiJ6NHjmZTHUFJSEubPn4/nn38ec+bMQU1NDTZs2ICnnnpK50bI1NRUHDt2DLGxsZg5cya6deuG0tJSZGdnIzMzE6WlpQbXv3z5chw5cgSDBg1CSkoKIiIiUFBQgL179+LkyZPw8vIyqd5ly5bhm2++wcCBAzFr1izY2dkhLS0NdXV1WLlypcHxNkf9n/p7772HpKQk2NvbY+zYsZo/ws1padvGjBmDnTt3wtPTE926dUNWVhYyMzPh6+trljhaojU+f0Pi7dSpEyIjI5GZmYlp06ZprZs3bx4GDx6M+Ph4TJs2DQcOHEBFRQVGjx6NDRs2IDQ0FJMnT9bs9/z58ygtLUViYqIRPUFEFvGohxPRgzU3BLmoqEhvuabDOFs6BFkIIY4cOSIiIyOFg4ODCA8PF59++qneIchCCFFYWCjeeust0aFDB2Fvby+CgoLE0KFDxaZNmzRlDIlTCCHy8vLEq6++Kvz9/YWjo6Po1KmTeOutt0RdXZ1B9TYnOztbDB8+XLi5uQkXFxcxePBgcerUKZ1yhsZ9v6VLl4r27dsLmUymVb65/RrStrKyMvHaa68JPz8/4ebmJoYPHy6uXr0qQkJCtIYJGxNHcnKycHV11Ylr0KBBonv37gbHamg/NhevPqtXrxZubm56h2Tv2rVLdOvWTRPX3r17xe3bt8Wzzz4rJEnS2u/8+fNFx44dNcPcicj6SULwQRZEZL3u3r2LTp06YeXKlVpDpg1RV1eH0NBQLFiwAG+//baZIySi1sJ7UojIqnl6emLevHlYtWqVwaOa1NLT02Fvb68zjwsRWTeeSSEiIiKrxDMpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUkKERERWSUmKURERGSVmKQQERGRVWKSQkRERFbp/wMeP0o3HlQD8wAAAABJRU5ErkJggg==\n",
970 | "text/plain": [
971 | ""
972 | ]
973 | },
974 | "metadata": {}
975 | },
976 | {
977 | "output_type": "execute_result",
978 | "data": {
979 | "text/plain": [
980 | ""
981 | ]
982 | },
983 | "metadata": {},
984 | "execution_count": 241
985 | }
986 | ]
987 | }
988 | ]
989 | }
--------------------------------------------------------------------------------
/data/ditella-crime-2004/CrimebyBlock.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/CrimebyBlock.dta
--------------------------------------------------------------------------------
/data/ditella-crime-2004/MonthlyPanel.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/MonthlyPanel.dta
--------------------------------------------------------------------------------
/data/ditella-crime-2004/README:
--------------------------------------------------------------------------------
1 | Data from
2 |
3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack
4 | Rafael Di Tella
5 | Ernesto Schargrodsky
6 | AMERICAN ECONOMIC REVIEW
7 | VOL. 94, NO. 1, MARCH 2004
8 | https://www.aeaweb.org/articles?id=10.1257/000282804322970733
9 |
10 | Also includes a notebook to pre-process the data for the tutorial.
11 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice.
--------------------------------------------------------------------------------
/data/ditella-crime-2004/README~:
--------------------------------------------------------------------------------
1 | Data from
2 |
3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack
4 | Rafael Di Tella
5 | Ernesto Schargrodsky
6 | AMERICAN ECONOMIC REVIEW
7 | VOL. 94, NO. 1, MARCH 2004
8 |
9 | Also includes a notebook to pre-process the data for the tutorial.
10 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice.
--------------------------------------------------------------------------------
/data/ditella-crime-2004/WeeklyPanel.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/WeeklyPanel.dta
--------------------------------------------------------------------------------
/data/ditella-crime-2004/data_cleaning.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 11,
6 | "id": "35cf30ec",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# Data cleaning for demo of difference-in-differences estimation w/ machine learning methods\n",
11 | "# data from \"Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack\" Rafael Di Tella \n",
12 | "# https://www.aeaweb.org/articles?id=10.1257/000282804322970733"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 57,
18 | "id": "efebdb5e",
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "import pandas as pd\n",
23 | "import numpy as np\n",
24 | "import pyreadstat "
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 66,
30 | "id": "59563136",
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "name": "stderr",
35 | "output_type": "stream",
36 | "text": [
37 | ":1: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n",
38 | "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
39 | " cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n",
40 | ":2: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n",
41 | "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
42 | " panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')\n"
43 | ]
44 | }
45 | ],
46 | "source": [
47 | "cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n",
48 | "panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 67,
54 | "id": "0fa52bad",
55 | "metadata": {},
56 | "outputs": [],
57 | "source": [
58 | "panel = panel.merge(cbb[['observ','educjefe','ocupado']], on='observ') # education of head of household, unemployment rate"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 68,
64 | "id": "6c908a03",
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/html": [
70 | "\n",
71 | "\n",
84 | "
\n",
85 | " \n",
86 | " \n",
87 | " | \n",
88 | " observ | \n",
89 | " barrio | \n",
90 | " calle | \n",
91 | " altura | \n",
92 | " institu1 | \n",
93 | " institu3 | \n",
94 | " distanci | \n",
95 | " edpub | \n",
96 | " estserv | \n",
97 | " banco | \n",
98 | " totrob | \n",
99 | " mes | \n",
100 | " educjefe | \n",
101 | " ocupado | \n",
102 | "
\n",
103 | " \n",
104 | " \n",
105 | " \n",
106 | " 0 | \n",
107 | " 870.0 | \n",
108 | " Once | \n",
109 | " Cordoba | \n",
110 | " a2300 | \n",
111 | " 0.0 | \n",
112 | " 1.0 | \n",
113 | " 1.0 | \n",
114 | " 1.0 | \n",
115 | " 0.0 | \n",
116 | " 0.0 | \n",
117 | " 0.00 | \n",
118 | " 4.0 | \n",
119 | " 10.846611 | \n",
120 | " 0.949495 | \n",
121 | "
\n",
122 | " \n",
123 | " 1 | \n",
124 | " 870.0 | \n",
125 | " Once | \n",
126 | " Cordoba | \n",
127 | " a2300 | \n",
128 | " 0.0 | \n",
129 | " 1.0 | \n",
130 | " 1.0 | \n",
131 | " 1.0 | \n",
132 | " 0.0 | \n",
133 | " 0.0 | \n",
134 | " 0.00 | \n",
135 | " 5.0 | \n",
136 | " 10.846611 | \n",
137 | " 0.949495 | \n",
138 | "
\n",
139 | " \n",
140 | " 2 | \n",
141 | " 870.0 | \n",
142 | " Once | \n",
143 | " Cordoba | \n",
144 | " a2300 | \n",
145 | " 0.0 | \n",
146 | " 1.0 | \n",
147 | " 1.0 | \n",
148 | " 1.0 | \n",
149 | " 0.0 | \n",
150 | " 0.0 | \n",
151 | " 0.00 | \n",
152 | " 6.0 | \n",
153 | " 10.846611 | \n",
154 | " 0.949495 | \n",
155 | "
\n",
156 | " \n",
157 | " 3 | \n",
158 | " 870.0 | \n",
159 | " Once | \n",
160 | " Cordoba | \n",
161 | " a2300 | \n",
162 | " 0.0 | \n",
163 | " 1.0 | \n",
164 | " 1.0 | \n",
165 | " 1.0 | \n",
166 | " 0.0 | \n",
167 | " 0.0 | \n",
168 | " 0.00 | \n",
169 | " 7.0 | \n",
170 | " 10.846611 | \n",
171 | " 0.949495 | \n",
172 | "
\n",
173 | " \n",
174 | " 4 | \n",
175 | " 870.0 | \n",
176 | " Once | \n",
177 | " Cordoba | \n",
178 | " a2300 | \n",
179 | " 0.0 | \n",
180 | " 1.0 | \n",
181 | " 1.0 | \n",
182 | " 1.0 | \n",
183 | " 0.0 | \n",
184 | " 0.0 | \n",
185 | " 0.00 | \n",
186 | " 8.0 | \n",
187 | " 10.846611 | \n",
188 | " 0.949495 | \n",
189 | "
\n",
190 | " \n",
191 | " ... | \n",
192 | " ... | \n",
193 | " ... | \n",
194 | " ... | \n",
195 | " ... | \n",
196 | " ... | \n",
197 | " ... | \n",
198 | " ... | \n",
199 | " ... | \n",
200 | " ... | \n",
201 | " ... | \n",
202 | " ... | \n",
203 | " ... | \n",
204 | " ... | \n",
205 | " ... | \n",
206 | "
\n",
207 | " \n",
208 | " 9631 | \n",
209 | " 140.0 | \n",
210 | " Belgrano | \n",
211 | " Virrey del Pino | \n",
212 | " a1600 | \n",
213 | " 0.0 | \n",
214 | " 0.0 | \n",
215 | " 3.0 | \n",
216 | " 0.0 | \n",
217 | " 0.0 | \n",
218 | " 0.0 | \n",
219 | " 0.25 | \n",
220 | " 10.0 | \n",
221 | " 12.771961 | \n",
222 | " 0.950423 | \n",
223 | "
\n",
224 | " \n",
225 | " 9632 | \n",
226 | " 140.0 | \n",
227 | " Belgrano | \n",
228 | " Virrey del Pino | \n",
229 | " a1600 | \n",
230 | " 0.0 | \n",
231 | " 0.0 | \n",
232 | " 3.0 | \n",
233 | " 0.0 | \n",
234 | " 0.0 | \n",
235 | " 0.0 | \n",
236 | " 0.00 | \n",
237 | " 11.0 | \n",
238 | " 12.771961 | \n",
239 | " 0.950423 | \n",
240 | "
\n",
241 | " \n",
242 | " 9633 | \n",
243 | " 140.0 | \n",
244 | " Belgrano | \n",
245 | " Virrey del Pino | \n",
246 | " a1600 | \n",
247 | " 0.0 | \n",
248 | " 0.0 | \n",
249 | " 3.0 | \n",
250 | " 0.0 | \n",
251 | " 0.0 | \n",
252 | " 0.0 | \n",
253 | " 0.00 | \n",
254 | " 12.0 | \n",
255 | " 12.771961 | \n",
256 | " 0.950423 | \n",
257 | "
\n",
258 | " \n",
259 | " 9634 | \n",
260 | " 140.0 | \n",
261 | " Belgrano | \n",
262 | " Virrey del Pino | \n",
263 | " a1600 | \n",
264 | " 0.0 | \n",
265 | " 0.0 | \n",
266 | " 3.0 | \n",
267 | " 0.0 | \n",
268 | " 0.0 | \n",
269 | " 0.0 | \n",
270 | " 0.00 | \n",
271 | " 72.0 | \n",
272 | " 12.771961 | \n",
273 | " 0.950423 | \n",
274 | "
\n",
275 | " \n",
276 | " 9635 | \n",
277 | " 140.0 | \n",
278 | " Belgrano | \n",
279 | " Virrey del Pino | \n",
280 | " a1600 | \n",
281 | " 0.0 | \n",
282 | " 0.0 | \n",
283 | " 3.0 | \n",
284 | " 0.0 | \n",
285 | " 0.0 | \n",
286 | " 0.0 | \n",
287 | " 0.00 | \n",
288 | " 73.0 | \n",
289 | " 12.771961 | \n",
290 | " 0.950423 | \n",
291 | "
\n",
292 | " \n",
293 | "
\n",
294 | "
9636 rows × 14 columns
\n",
295 | "
"
296 | ],
297 | "text/plain": [
298 | " observ barrio calle altura institu1 institu3 distanci \\\n",
299 | "0 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n",
300 | "1 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n",
301 | "2 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n",
302 | "3 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n",
303 | "4 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n",
304 | "... ... ... ... ... ... ... ... \n",
305 | "9631 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n",
306 | "9632 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n",
307 | "9633 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n",
308 | "9634 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n",
309 | "9635 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n",
310 | "\n",
311 | " edpub estserv banco totrob mes educjefe ocupado \n",
312 | "0 1.0 0.0 0.0 0.00 4.0 10.846611 0.949495 \n",
313 | "1 1.0 0.0 0.0 0.00 5.0 10.846611 0.949495 \n",
314 | "2 1.0 0.0 0.0 0.00 6.0 10.846611 0.949495 \n",
315 | "3 1.0 0.0 0.0 0.00 7.0 10.846611 0.949495 \n",
316 | "4 1.0 0.0 0.0 0.00 8.0 10.846611 0.949495 \n",
317 | "... ... ... ... ... ... ... ... \n",
318 | "9631 0.0 0.0 0.0 0.25 10.0 12.771961 0.950423 \n",
319 | "9632 0.0 0.0 0.0 0.00 11.0 12.771961 0.950423 \n",
320 | "9633 0.0 0.0 0.0 0.00 12.0 12.771961 0.950423 \n",
321 | "9634 0.0 0.0 0.0 0.00 72.0 12.771961 0.950423 \n",
322 | "9635 0.0 0.0 0.0 0.00 73.0 12.771961 0.950423 \n",
323 | "\n",
324 | "[9636 rows x 14 columns]"
325 | ]
326 | },
327 | "execution_count": 68,
328 | "metadata": {},
329 | "output_type": "execute_result"
330 | }
331 | ],
332 | "source": [
333 | "panel"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 69,
339 | "id": "64777226",
340 | "metadata": {},
341 | "outputs": [],
342 | "source": [
343 | "panel = panel.drop(columns=['altura','institu3','distanci']) # unsure what these are"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 70,
349 | "id": "64ecee42",
350 | "metadata": {},
351 | "outputs": [],
352 | "source": [
353 | "english_translation = {'observ': 'block',\n",
354 | " 'barrio': 'neighbourhood',\n",
355 | " 'calle': 'street',\n",
356 | " 'institu1': 'jewish_insitute',\n",
357 | " 'edpub': 'public_institution',\n",
358 | " 'estserv': 'gas_station',\n",
359 | " 'banco': 'bank',\n",
360 | " 'totrob': 'car_thefts',\n",
361 | " 'mes': 'month',\n",
362 | " 'educjefe': 'education',\n",
363 | " 'ocupado': 'employment_rate'\n",
364 | " }\n",
365 | "panel = panel.rename(columns=english_translation)"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 71,
371 | "id": "8e49470e",
372 | "metadata": {},
373 | "outputs": [],
374 | "source": [
375 | "panel.to_csv(\"DiTella_crime.csv\", index=False)"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": null,
381 | "id": "98cd4270",
382 | "metadata": {},
383 | "outputs": [],
384 | "source": []
385 | }
386 | ],
387 | "metadata": {
388 | "kernelspec": {
389 | "display_name": "Python 3",
390 | "language": "python",
391 | "name": "python3"
392 | },
393 | "language_info": {
394 | "codemirror_mode": {
395 | "name": "ipython",
396 | "version": 3
397 | },
398 | "file_extension": ".py",
399 | "mimetype": "text/x-python",
400 | "name": "python",
401 | "nbconvert_exporter": "python",
402 | "pygments_lexer": "ipython3",
403 | "version": "3.8.8"
404 | }
405 | },
406 | "nbformat": 4,
407 | "nbformat_minor": 5
408 | }
409 |
--------------------------------------------------------------------------------
/difference_in_differences.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "difference-in-differences.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true,
10 | "include_colab_link": true
11 | },
12 | "kernelspec": {
13 | "name": "python3",
14 | "display_name": "Python 3"
15 | },
16 | "language_info": {
17 | "name": "python"
18 | }
19 | },
20 | "cells": [
21 | {
22 | "cell_type": "markdown",
23 | "metadata": {
24 | "id": "view-in-github",
25 | "colab_type": "text"
26 | },
27 | "source": [
28 | "
"
29 | ]
30 | },
31 | {
32 | "cell_type": "markdown",
33 | "metadata": {
34 | "id": "QfZkNLUb4B-p"
35 | },
36 | "source": [
37 | "# Difference-in-Differences Estimation Tutorial\n",
38 | "\n",
39 | "A short example on how to estimate the difference-in-differences ATT with 2 period panel data using using machine learning methods.\n",
40 | "\n",
41 | "Data from this paper: https://www.aeaweb.org/articles?id=10.1257/000282804322970733\n",
42 | "\n",
43 | "In brief: following a terrorist attack on a synagogue in Buenos Aires, additional police officers were stationed on blocks containing Jewish institutions. This provides a natural experiment for the effect of policing on deterring crime. The data includes the number of car thefts in many city blocks the months before and after the increase in policing. Comparing the change in thefts for blocks with Jewish institutions (hence, increased police) to the other blocks gives a measurement. However, blocks with Jewish institutions may differ in significant ways---e.g., they may tend to be better educated or located in certain neighbourhoods. We want to use machine learning methods to control for such potential issues. "
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "metadata": {
49 | "id": "dS2X3Bq1-fxE"
50 | },
51 | "source": [
52 | "import numpy as np\n",
53 | "import pandas as pd\n",
54 | "import scipy as sp\n",
55 | "from sklearn import preprocessing\n",
56 | "from sklearn.linear_model import LinearRegression, LogisticRegression\n",
57 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n",
58 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n",
59 | "from sklearn.metrics import mean_squared_error, log_loss\n",
60 | "import sklearn\n",
61 | "import os"
62 | ],
63 | "execution_count": 197,
64 | "outputs": []
65 | },
66 | {
67 | "cell_type": "code",
68 | "metadata": {
69 | "id": "nxJ46X9cFJ9X"
70 | },
71 | "source": [
72 | "RANDOM_SEED=42\n",
73 | "np.random.seed(RANDOM_SEED)"
74 | ],
75 | "execution_count": 198,
76 | "outputs": []
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {
81 | "id": "yPbJeayiEs3u"
82 | },
83 | "source": [
84 | "##Load and Format Data\n",
85 | "\n",
86 | "We reformat the data so that there is an \"outcome\" column equal to the difference in car thefts after and before the time period, a \"treatment\" column indictaing the presence of a jewish institute, and \"confounders\" denoting variables that may differ between jewish and non-jewish blocks, and which may also affect the change in crime rate. \n",
87 | "\n",
88 | "After doing this formatting, the estimation procedure is identical to computing the ATT with a regression adjustment"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "metadata": {
94 | "id": "2AC9TPko-hbt"
95 | },
96 | "source": [
97 | "panel = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/ditella-crime-2004/DiTella_crime.csv')\n"
98 | ],
99 | "execution_count": 199,
100 | "outputs": []
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "colab": {
106 | "base_uri": "https://localhost:8080/",
107 | "height": 203
108 | },
109 | "id": "-A1LX6-t-hZD",
110 | "outputId": "b94e3198-8c72-423d-a01d-7fc26cc26856"
111 | },
112 | "source": [
113 | "panel.head()"
114 | ],
115 | "execution_count": 200,
116 | "outputs": [
117 | {
118 | "output_type": "execute_result",
119 | "data": {
120 | "text/html": [
121 | "\n",
122 | "\n",
135 | "
\n",
136 | " \n",
137 | " \n",
138 | " | \n",
139 | " block | \n",
140 | " neighbourhood | \n",
141 | " street | \n",
142 | " jewish_insitute | \n",
143 | " public_institution | \n",
144 | " gas_station | \n",
145 | " bank | \n",
146 | " car_thefts | \n",
147 | " month | \n",
148 | " education | \n",
149 | " employment_rate | \n",
150 | "
\n",
151 | " \n",
152 | " \n",
153 | " \n",
154 | " 0 | \n",
155 | " 870.0 | \n",
156 | " Once | \n",
157 | " Cordoba | \n",
158 | " 0.0 | \n",
159 | " 1.0 | \n",
160 | " 0.0 | \n",
161 | " 0.0 | \n",
162 | " 0.0 | \n",
163 | " 4.0 | \n",
164 | " 10.846611 | \n",
165 | " 0.949495 | \n",
166 | "
\n",
167 | " \n",
168 | " 1 | \n",
169 | " 870.0 | \n",
170 | " Once | \n",
171 | " Cordoba | \n",
172 | " 0.0 | \n",
173 | " 1.0 | \n",
174 | " 0.0 | \n",
175 | " 0.0 | \n",
176 | " 0.0 | \n",
177 | " 5.0 | \n",
178 | " 10.846611 | \n",
179 | " 0.949495 | \n",
180 | "
\n",
181 | " \n",
182 | " 2 | \n",
183 | " 870.0 | \n",
184 | " Once | \n",
185 | " Cordoba | \n",
186 | " 0.0 | \n",
187 | " 1.0 | \n",
188 | " 0.0 | \n",
189 | " 0.0 | \n",
190 | " 0.0 | \n",
191 | " 6.0 | \n",
192 | " 10.846611 | \n",
193 | " 0.949495 | \n",
194 | "
\n",
195 | " \n",
196 | " 3 | \n",
197 | " 870.0 | \n",
198 | " Once | \n",
199 | " Cordoba | \n",
200 | " 0.0 | \n",
201 | " 1.0 | \n",
202 | " 0.0 | \n",
203 | " 0.0 | \n",
204 | " 0.0 | \n",
205 | " 7.0 | \n",
206 | " 10.846611 | \n",
207 | " 0.949495 | \n",
208 | "
\n",
209 | " \n",
210 | " 4 | \n",
211 | " 870.0 | \n",
212 | " Once | \n",
213 | " Cordoba | \n",
214 | " 0.0 | \n",
215 | " 1.0 | \n",
216 | " 0.0 | \n",
217 | " 0.0 | \n",
218 | " 0.0 | \n",
219 | " 8.0 | \n",
220 | " 10.846611 | \n",
221 | " 0.949495 | \n",
222 | "
\n",
223 | " \n",
224 | "
\n",
225 | "
"
226 | ],
227 | "text/plain": [
228 | " block neighbourhood street ... month education employment_rate\n",
229 | "0 870.0 Once Cordoba ... 4.0 10.846611 0.949495\n",
230 | "1 870.0 Once Cordoba ... 5.0 10.846611 0.949495\n",
231 | "2 870.0 Once Cordoba ... 6.0 10.846611 0.949495\n",
232 | "3 870.0 Once Cordoba ... 7.0 10.846611 0.949495\n",
233 | "4 870.0 Once Cordoba ... 8.0 10.846611 0.949495\n",
234 | "\n",
235 | "[5 rows x 11 columns]"
236 | ]
237 | },
238 | "metadata": {},
239 | "execution_count": 200
240 | }
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "metadata": {
246 | "id": "zUpUVabW59bS"
247 | },
248 | "source": [
249 | "# Terrorist attack occurred in July 18, and increased police presence begins July 25. Data before this is before period, and after is after period \n",
250 | "first_period = panel['month'].isin([4., 5., 6., 71.])\n",
251 | "panel['first_period']=first_period"
252 | ],
253 | "execution_count": 201,
254 | "outputs": []
255 | },
256 | {
257 | "cell_type": "code",
258 | "metadata": {
259 | "id": "D-8dY5W8Q-PJ"
260 | },
261 | "source": [
262 | "# code neighbourhood as integer for later convenience\n",
263 | "panel['neighbourhood']=panel['neighbourhood'].astype('category').cat.codes"
264 | ],
265 | "execution_count": 202,
266 | "outputs": []
267 | },
268 | {
269 | "cell_type": "code",
270 | "metadata": {
271 | "colab": {
272 | "base_uri": "https://localhost:8080/",
273 | "height": 447
274 | },
275 | "id": "ZBR4R-90DmRn",
276 | "outputId": "a121111a-1d58-4426-dd0c-354c147453d6"
277 | },
278 | "source": [
279 | "# We need to reduce the multiple before and after months in some fashion\n",
280 | "# There is not a clear canonical way to do this, but an average seems reasonable\n",
281 | "panel = panel.groupby(['block', 'first_period']).mean()\n",
282 | "panel = panel.reset_index(level='first_period')\n",
283 | "panel"
284 | ],
285 | "execution_count": 203,
286 | "outputs": [
287 | {
288 | "output_type": "execute_result",
289 | "data": {
290 | "text/html": [
291 | "\n",
292 | "\n",
305 | "
\n",
306 | " \n",
307 | " \n",
308 | " | \n",
309 | " first_period | \n",
310 | " neighbourhood | \n",
311 | " jewish_insitute | \n",
312 | " public_institution | \n",
313 | " gas_station | \n",
314 | " bank | \n",
315 | " car_thefts | \n",
316 | " month | \n",
317 | " education | \n",
318 | " employment_rate | \n",
319 | "
\n",
320 | " \n",
321 | " block | \n",
322 | " | \n",
323 | " | \n",
324 | " | \n",
325 | " | \n",
326 | " | \n",
327 | " | \n",
328 | " | \n",
329 | " | \n",
330 | " | \n",
331 | " | \n",
332 | "
\n",
333 | " \n",
334 | " \n",
335 | " \n",
336 | " 1.0 | \n",
337 | " False | \n",
338 | " 0 | \n",
339 | " 0.0 | \n",
340 | " 0.0 | \n",
341 | " 0.0 | \n",
342 | " 0.0 | \n",
343 | " 0.000000 | \n",
344 | " 25.25 | \n",
345 | " 11.919889 | \n",
346 | " 0.926594 | \n",
347 | "
\n",
348 | " \n",
349 | " 1.0 | \n",
350 | " True | \n",
351 | " 0 | \n",
352 | " 0.0 | \n",
353 | " 0.0 | \n",
354 | " 0.0 | \n",
355 | " 0.0 | \n",
356 | " 0.000000 | \n",
357 | " 5.00 | \n",
358 | " 11.919889 | \n",
359 | " 0.926594 | \n",
360 | "
\n",
361 | " \n",
362 | " 2.0 | \n",
363 | " False | \n",
364 | " 0 | \n",
365 | " 0.0 | \n",
366 | " 0.0 | \n",
367 | " 0.0 | \n",
368 | " 0.0 | \n",
369 | " 0.156250 | \n",
370 | " 25.25 | \n",
371 | " 11.919889 | \n",
372 | " 0.926594 | \n",
373 | "
\n",
374 | " \n",
375 | " 2.0 | \n",
376 | " True | \n",
377 | " 0 | \n",
378 | " 0.0 | \n",
379 | " 0.0 | \n",
380 | " 0.0 | \n",
381 | " 0.0 | \n",
382 | " 0.000000 | \n",
383 | " 5.00 | \n",
384 | " 11.919889 | \n",
385 | " 0.926594 | \n",
386 | "
\n",
387 | " \n",
388 | " 3.0 | \n",
389 | " False | \n",
390 | " 0 | \n",
391 | " 0.0 | \n",
392 | " 0.0 | \n",
393 | " 0.0 | \n",
394 | " 0.0 | \n",
395 | " 0.031250 | \n",
396 | " 25.25 | \n",
397 | " 11.919889 | \n",
398 | " 0.926594 | \n",
399 | "
\n",
400 | " \n",
401 | " ... | \n",
402 | " ... | \n",
403 | " ... | \n",
404 | " ... | \n",
405 | " ... | \n",
406 | " ... | \n",
407 | " ... | \n",
408 | " ... | \n",
409 | " ... | \n",
410 | " ... | \n",
411 | " ... | \n",
412 | "
\n",
413 | " \n",
414 | " 874.0 | \n",
415 | " True | \n",
416 | " 1 | \n",
417 | " 0.0 | \n",
418 | " 0.0 | \n",
419 | " 0.0 | \n",
420 | " 0.0 | \n",
421 | " 0.000000 | \n",
422 | " 5.00 | \n",
423 | " 10.898485 | \n",
424 | " 0.939759 | \n",
425 | "
\n",
426 | " \n",
427 | " 875.0 | \n",
428 | " False | \n",
429 | " 1 | \n",
430 | " 0.0 | \n",
431 | " 0.0 | \n",
432 | " 0.0 | \n",
433 | " 0.0 | \n",
434 | " 0.000000 | \n",
435 | " 25.25 | \n",
436 | " 10.898485 | \n",
437 | " 0.939759 | \n",
438 | "
\n",
439 | " \n",
440 | " 875.0 | \n",
441 | " True | \n",
442 | " 1 | \n",
443 | " 0.0 | \n",
444 | " 0.0 | \n",
445 | " 0.0 | \n",
446 | " 0.0 | \n",
447 | " 0.083333 | \n",
448 | " 5.00 | \n",
449 | " 10.898485 | \n",
450 | " 0.939759 | \n",
451 | "
\n",
452 | " \n",
453 | " 876.0 | \n",
454 | " False | \n",
455 | " 1 | \n",
456 | " 0.0 | \n",
457 | " 0.0 | \n",
458 | " 0.0 | \n",
459 | " 0.0 | \n",
460 | " 0.000000 | \n",
461 | " 25.25 | \n",
462 | " 10.898485 | \n",
463 | " 0.939759 | \n",
464 | "
\n",
465 | " \n",
466 | " 876.0 | \n",
467 | " True | \n",
468 | " 1 | \n",
469 | " 0.0 | \n",
470 | " 0.0 | \n",
471 | " 0.0 | \n",
472 | " 0.0 | \n",
473 | " 0.083333 | \n",
474 | " 5.00 | \n",
475 | " 10.898485 | \n",
476 | " 0.939759 | \n",
477 | "
\n",
478 | " \n",
479 | "
\n",
480 | "
1752 rows × 10 columns
\n",
481 | "
"
482 | ],
483 | "text/plain": [
484 | " first_period neighbourhood ... education employment_rate\n",
485 | "block ... \n",
486 | "1.0 False 0 ... 11.919889 0.926594\n",
487 | "1.0 True 0 ... 11.919889 0.926594\n",
488 | "2.0 False 0 ... 11.919889 0.926594\n",
489 | "2.0 True 0 ... 11.919889 0.926594\n",
490 | "3.0 False 0 ... 11.919889 0.926594\n",
491 | "... ... ... ... ... ...\n",
492 | "874.0 True 1 ... 10.898485 0.939759\n",
493 | "875.0 False 1 ... 10.898485 0.939759\n",
494 | "875.0 True 1 ... 10.898485 0.939759\n",
495 | "876.0 False 1 ... 10.898485 0.939759\n",
496 | "876.0 True 1 ... 10.898485 0.939759\n",
497 | "\n",
498 | "[1752 rows x 10 columns]"
499 | ]
500 | },
501 | "metadata": {},
502 | "execution_count": 203
503 | }
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "metadata": {
509 | "id": "APOqpHmrOGzo",
510 | "colab": {
511 | "base_uri": "https://localhost:8080/"
512 | },
513 | "outputId": "397aa5fc-b7c1-4617-dc99-765a18ceda5f"
514 | },
515 | "source": [
516 | "# now create a version of the data w/ \"outcome\" = after - before thefts, and \n",
517 | "compact_df = panel[~panel['first_period']]\n",
518 | "car_thefts = panel['car_thefts'].values\n",
519 | "compact_df['Y1-Y0']=car_thefts[~panel['first_period']] - car_thefts[panel['first_period']]"
520 | ],
521 | "execution_count": 204,
522 | "outputs": [
523 | {
524 | "output_type": "stream",
525 | "name": "stderr",
526 | "text": [
527 | "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
528 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
529 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
530 | "\n",
531 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
532 | " after removing the cwd from sys.path.\n"
533 | ]
534 | }
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "metadata": {
540 | "id": "uN-97eQ3FvW5"
541 | },
542 | "source": [
543 | "# format this in a manner sympatico with ATT estimation\n",
544 | "compact_df = compact_df.reset_index()\n",
545 | "\n",
546 | "outcome = compact_df['Y1-Y0']\n",
547 | "treatment = compact_df['jewish_insitute']\n",
548 | "confounders = compact_df[['neighbourhood','public_institution', 'gas_station', 'bank', 'education', 'employment_rate']]"
549 | ],
550 | "execution_count": 205,
551 | "outputs": []
552 | },
553 | {
554 | "cell_type": "code",
555 | "metadata": {
556 | "colab": {
557 | "base_uri": "https://localhost:8080/"
558 | },
559 | "id": "cYl3bRQ4HliO",
560 | "outputId": "7379f923-f388-400e-8557-3848238cc675"
561 | },
562 | "source": [
563 | "# finally, do some light data cleaning\n",
564 | "treatment=treatment.astype(int)\n",
565 | "\n",
566 | "# scale continuous covariates\n",
567 | "cont_vars = ['education', 'employment_rate']\n",
568 | "confounders[cont_vars] = preprocessing.scale(confounders[cont_vars])\n",
569 | "\n"
570 | ],
571 | "execution_count": 206,
572 | "outputs": [
573 | {
574 | "output_type": "stream",
575 | "name": "stderr",
576 | "text": [
577 | "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
578 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
579 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
580 | "\n",
581 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
582 | " \n",
583 | "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py:1734: SettingWithCopyWarning: \n",
584 | "A value is trying to be set on a copy of a slice from a DataFrame.\n",
585 | "Try using .loc[row_indexer,col_indexer] = value instead\n",
586 | "\n",
587 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
588 | " isetter(loc, value[:, i].tolist())\n"
589 | ]
590 | }
591 | ]
592 | },
593 | {
594 | "cell_type": "markdown",
595 | "metadata": {
596 | "id": "C576dWRsa3ad"
597 | },
598 | "source": [
599 | "## Specify Nuisance Function Models\n",
600 | "\n",
601 | "The next step is to specify models for the conditional expected outcome and propensity score"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "metadata": {
607 | "colab": {
608 | "base_uri": "https://localhost:8080/"
609 | },
610 | "id": "qyOhSZRQRb8W",
611 | "outputId": "63ed01b0-48af-41b4-d878-909b06470000"
612 | },
613 | "source": [
614 | "# specify a model for the conditional expected outcome\n",
615 | "\n",
616 | "# TODO(victorveitch) the covariates have basically no predictive power, replace this example with something better\n",
617 | "\n",
618 | "# make a function that returns a sklearn model for later use in k-folding\n",
619 | "def make_Q_model():\n",
620 | " # return LinearRegression()\n",
621 | " return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=100, max_depth=2)\n",
622 | "Q_model = make_Q_model()\n",
623 | "\n",
624 | "# Sanity check that chosen model actually improves test error\n",
625 | "# A real analysis should give substantial attention to model selection and validation \n",
626 | "\n",
627 | "X_w_treatment = confounders.copy()\n",
628 | "X_w_treatment[\"treatment\"] = treatment\n",
629 | "\n",
630 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n",
631 | "Q_model.fit(X_train, y_train)\n",
632 | "y_pred = Q_model.predict(X_test)\n",
633 | "\n",
634 | "test_mse=mean_squared_error(y_pred, y_test)\n",
635 | "print(f\"Test MSE of fit model {test_mse}\") \n",
636 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n",
637 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")"
638 | ],
639 | "execution_count": 207,
640 | "outputs": [
641 | {
642 | "output_type": "stream",
643 | "name": "stdout",
644 | "text": [
645 | "Test MSE of fit model 0.027801530904389606\n",
646 | "Test MSE of no-covariate model 0.028564516759592096\n"
647 | ]
648 | }
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "metadata": {
654 | "colab": {
655 | "base_uri": "https://localhost:8080/"
656 | },
657 | "id": "uq6eZEBXbsaI",
658 | "outputId": "5b82bcee-03a4-48db-8a16-8c68168245b4"
659 | },
660 | "source": [
661 | "# specify a model for the propensity score\n",
662 | "\n",
663 | "def make_g_model():\n",
664 | "# return LogisticRegression(max_iter=1000)\n",
665 | " return RandomForestClassifier(n_estimators=100, max_depth=2)\n",
666 | "\n",
667 | "g_model = make_g_model()\n",
668 | "# Sanity check that chosen model actually improves test error\n",
669 | "# A real analysis should give substantial attention to model selection and validation \n",
670 | "\n",
671 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n",
672 | "g_model.fit(X_train, a_train)\n",
673 | "a_pred = g_model.predict_proba(X_test)[:,1]\n",
674 | "\n",
675 | "test_ce=log_loss(a_test, a_pred)\n",
676 | "print(f\"Test CE of fit model {test_ce}\") \n",
677 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n",
678 | "print(f\"Test CE of no-covariate model {baseline_ce}\")"
679 | ],
680 | "execution_count": 208,
681 | "outputs": [
682 | {
683 | "output_type": "stream",
684 | "name": "stdout",
685 | "text": [
686 | "Test CE of fit model 0.1597166570168377\n",
687 | "Test CE of no-covariate model 0.16733990853941555\n"
688 | ]
689 | }
690 | ]
691 | },
692 | {
693 | "cell_type": "markdown",
694 | "metadata": {
695 | "id": "2RkvV_4_dFWo"
696 | },
697 | "source": [
698 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit"
699 | ]
700 | },
701 | {
702 | "cell_type": "code",
703 | "metadata": {
704 | "id": "KA0AsEGJ_X3b"
705 | },
706 | "source": [
707 | "# helper functions to implement the cross fitting\n",
708 | "\n",
709 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n",
710 | " \"\"\"\n",
711 | " Implements K fold cross-fitting for the model predicting the treatment A. \n",
712 | " That is, \n",
713 | " 1. Split data into K folds\n",
714 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
715 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
716 | " Returns an array containing the predictions \n",
717 | "\n",
718 | " Args:\n",
719 | " model: function that returns sklearn model (which implements fit and predict_prob)\n",
720 | " X: dataframe of variables to adjust for\n",
721 | " A: array of treatments\n",
722 | " n_splits: number of splits to use\n",
723 | " \"\"\"\n",
724 | " predictions = np.full_like(A, np.nan, dtype=float)\n",
725 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
726 | " \n",
727 | " for train_index, test_index in kf.split(X, A):\n",
728 | " X_train = X.loc[train_index]\n",
729 | " A_train = A.loc[train_index]\n",
730 | " g = make_model()\n",
731 | " g.fit(X_train, A_train)\n",
732 | "\n",
733 | " # get predictions for split\n",
734 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n",
735 | "\n",
736 | " assert np.isnan(predictions).sum() == 0\n",
737 | " return predictions\n",
738 | "\n",
739 | "\n",
740 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n",
741 | " \"\"\"\n",
742 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n",
743 | " That is, \n",
744 | " 1. Split data into K folds\n",
745 | " 2. For each fold j, the model is fit on the other K-1 folds\n",
746 | " 3. The fitted model is used to make predictions for each data point in fold j\n",
747 | " Returns two arrays containing the predictions for all units untreated, all units treated \n",
748 | "\n",
749 | " Args:\n",
750 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n",
751 | " X: dataframe of variables to adjust for\n",
752 | " y: array of outcomes\n",
753 | " A: array of treatments\n",
754 | " n_splits: number of splits to use\n",
755 | " output_type: type of outcome, \"binary\" or \"continuous\"\n",
756 | "\n",
757 | " \"\"\"\n",
758 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n",
759 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n",
760 | " if output_type == 'binary':\n",
761 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
762 | " elif output_type == 'continuous':\n",
763 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n",
764 | "\n",
765 | " # include the treatment as input feature\n",
766 | " X_w_treatment = X.copy()\n",
767 | " X_w_treatment[\"A\"] = A\n",
768 | "\n",
769 | " # for predicting effect under treatment / control status for each data point \n",
770 | " X0 = X_w_treatment.copy()\n",
771 | " X0[\"A\"] = 0\n",
772 | " X1 = X_w_treatment.copy()\n",
773 | " X1[\"A\"] = 1\n",
774 | "\n",
775 | " \n",
776 | " for train_index, test_index in kf.split(X_w_treatment, y):\n",
777 | " X_train = X_w_treatment.loc[train_index]\n",
778 | " y_train = y.loc[train_index]\n",
779 | " q = make_model()\n",
780 | " q.fit(X_train, y_train)\n",
781 | "\n",
782 | " if output_type =='binary':\n",
783 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n",
784 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n",
785 | " elif output_type == 'continuous':\n",
786 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n",
787 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n",
788 | "\n",
789 | " assert np.isnan(predictions0).sum() == 0\n",
790 | " assert np.isnan(predictions1).sum() == 0\n",
791 | " return predictions0, predictions1"
792 | ],
793 | "execution_count": 209,
794 | "outputs": []
795 | },
796 | {
797 | "cell_type": "code",
798 | "metadata": {
799 | "id": "wVcE6pRQeMNf"
800 | },
801 | "source": [
802 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)"
803 | ],
804 | "execution_count": 210,
805 | "outputs": []
806 | },
807 | {
808 | "cell_type": "code",
809 | "metadata": {
810 | "id": "GLEHlLLdWSh9"
811 | },
812 | "source": [
813 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")"
814 | ],
815 | "execution_count": 211,
816 | "outputs": []
817 | },
818 | {
819 | "cell_type": "code",
820 | "metadata": {
821 | "colab": {
822 | "base_uri": "https://localhost:8080/",
823 | "height": 203
824 | },
825 | "id": "_NVCV0q0g8wQ",
826 | "outputId": "625e4e9d-8ea4-4e57-c684-6037a4ce3b3f"
827 | },
828 | "source": [
829 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n",
830 | "data_and_nuisance_estimates.head()"
831 | ],
832 | "execution_count": 212,
833 | "outputs": [
834 | {
835 | "output_type": "execute_result",
836 | "data": {
837 | "text/html": [
838 | "\n",
839 | "\n",
852 | "
\n",
853 | " \n",
854 | " \n",
855 | " | \n",
856 | " g | \n",
857 | " Q0 | \n",
858 | " Q1 | \n",
859 | " A | \n",
860 | " Y | \n",
861 | "
\n",
862 | " \n",
863 | " \n",
864 | " \n",
865 | " 0 | \n",
866 | " 0.027920 | \n",
867 | " -0.065413 | \n",
868 | " -0.133397 | \n",
869 | " 0 | \n",
870 | " 0.000000 | \n",
871 | "
\n",
872 | " \n",
873 | " 1 | \n",
874 | " 0.027276 | \n",
875 | " -0.070393 | \n",
876 | " -0.118878 | \n",
877 | " 0 | \n",
878 | " 0.156250 | \n",
879 | "
\n",
880 | " \n",
881 | " 2 | \n",
882 | " 0.028456 | \n",
883 | " -0.076041 | \n",
884 | " -0.142226 | \n",
885 | " 0 | \n",
886 | " -0.302083 | \n",
887 | "
\n",
888 | " \n",
889 | " 3 | \n",
890 | " 0.028456 | \n",
891 | " -0.065413 | \n",
892 | " -0.133397 | \n",
893 | " 0 | \n",
894 | " 0.062500 | \n",
895 | "
\n",
896 | " \n",
897 | " 4 | \n",
898 | " 0.025655 | \n",
899 | " -0.020747 | \n",
900 | " -0.085449 | \n",
901 | " 0 | \n",
902 | " 0.062500 | \n",
903 | "
\n",
904 | " \n",
905 | "
\n",
906 | "
"
907 | ],
908 | "text/plain": [
909 | " g Q0 Q1 A Y\n",
910 | "0 0.027920 -0.065413 -0.133397 0 0.000000\n",
911 | "1 0.027276 -0.070393 -0.118878 0 0.156250\n",
912 | "2 0.028456 -0.076041 -0.142226 0 -0.302083\n",
913 | "3 0.028456 -0.065413 -0.133397 0 0.062500\n",
914 | "4 0.025655 -0.020747 -0.085449 0 0.062500"
915 | ]
916 | },
917 | "metadata": {},
918 | "execution_count": 212
919 | }
920 | ]
921 | },
922 | {
923 | "cell_type": "markdown",
924 | "metadata": {
925 | "id": "VNhM7URdgzQB"
926 | },
927 | "source": [
928 | "## Combine predicted values and data into estimate of ATT"
929 | ]
930 | },
931 | {
932 | "cell_type": "code",
933 | "metadata": {
934 | "id": "J-vONC5ejwh2"
935 | },
936 | "source": [
937 | "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n",
938 | " \"\"\"\n",
939 | " # Double ML estimator for the ATT\n",
940 | " This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n",
941 | " \"\"\"\n",
942 | "\n",
943 | " if prob_t is None:\n",
944 | " prob_t = A.mean() # estimate marginal probability of treatment\n",
945 | "\n",
946 | " tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n",
947 | " \n",
948 | " scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n",
949 | " n = Y.shape[0] # number of observations\n",
950 | " std_hat = np.std(scores) / np.sqrt(n)\n",
951 | "\n",
952 | " return tau_hat, std_hat\n"
953 | ],
954 | "execution_count": 213,
955 | "outputs": []
956 | },
957 | {
958 | "cell_type": "code",
959 | "metadata": {
960 | "colab": {
961 | "base_uri": "https://localhost:8080/"
962 | },
963 | "id": "SjDj0F9Bm9uq",
964 | "outputId": "bfbca9bb-c2e0-4171-d65f-bebb71fd0da1"
965 | },
966 | "source": [
967 | "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n",
968 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")"
969 | ],
970 | "execution_count": 214,
971 | "outputs": [
972 | {
973 | "output_type": "stream",
974 | "name": "stdout",
975 | "text": [
976 | "The estimate is -0.0777691984649497 pm 0.05810535308191231\n"
977 | ]
978 | }
979 | ]
980 | },
981 | {
982 | "cell_type": "code",
983 | "metadata": {
984 | "colab": {
985 | "base_uri": "https://localhost:8080/"
986 | },
987 | "id": "R3YqKD60UElw",
988 | "outputId": "b4dad931-c970-429e-8c83-ece9db655c9f"
989 | },
990 | "source": [
991 | "# for comparison, the point estimate without any covariate correction\n",
992 | "outcome[treatment==1].mean()-outcome[treatment==0].mean()"
993 | ],
994 | "execution_count": 215,
995 | "outputs": [
996 | {
997 | "output_type": "execute_result",
998 | "data": {
999 | "text/plain": [
1000 | "-0.06683773314434818"
1001 | ]
1002 | },
1003 | "metadata": {},
1004 | "execution_count": 215
1005 | }
1006 | ]
1007 | },
1008 | {
1009 | "cell_type": "code",
1010 | "metadata": {
1011 | "id": "37ep7LyGUHH9"
1012 | },
1013 | "source": [
1014 | ""
1015 | ],
1016 | "execution_count": 215,
1017 | "outputs": []
1018 | }
1019 | ]
1020 | }
--------------------------------------------------------------------------------