├── .gitignore ├── ATE_Estimation_with_Machine_Learning.ipynb ├── IV_Strategies.ipynb ├── LICENSE ├── README.md ├── Sensitivity_Analysis.ipynb ├── data ├── ditella-crime-2004 │ ├── CrimebyBlock.dta │ ├── DiTella_crime.csv │ ├── MonthlyPanel.dta │ ├── README │ ├── README~ │ ├── WeeklyPanel.dta │ └── data_cleaning.ipynb ├── hbp_dbp.csv └── outvote_2020_data.csv └── difference_in_differences.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /ATE_Estimation_with_Machine_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "ATE-Estimation-with-Machine-Learning.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true, 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "QfZkNLUb4B-p" 35 | }, 36 | "source": [ 37 | "# ATT Estimation Tutorial\n", 38 | "\n", 39 | "This tutorial gives a short example for how to estimate average treatment effect on the treated using machine learning methods" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "metadata": { 45 | "id": "dS2X3Bq1-fxE" 46 | }, 47 | "source": [ 48 | "import numpy as np\n", 49 | "import pandas as pd\n", 50 | "import scipy as sp\n", 51 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", 52 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n", 53 | "from sklearn.metrics import mean_squared_error, log_loss\n", 54 | "import sklearn\n", 55 | "import os" 56 | ], 57 | "execution_count": 6, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "nxJ46X9cFJ9X" 64 | }, 65 | "source": [ 66 | "RANDOM_SEED=42\n", 67 | "np.random.seed(RANDOM_SEED)" 68 | ], 69 | "execution_count": 7, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": { 75 | "id": "yPbJeayiEs3u" 76 | }, 77 | "source": [ 78 | "##Load and Format LaLonde Observational Data" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "metadata": { 84 | "id": "2AC9TPko-hbt" 85 | }, 86 | "source": [ 87 | "def make_data_lalonde(df):\n", 88 | " df_new = df.drop(['nodegree'], axis=1)\n", 89 | " df_new['pos74'] = (df_new['RE74'] > 0).astype(int)\n", 90 | " df_new['pos75'] = (df_new['RE75'] > 0).astype(int)\n", 91 | " df_new['treatment'] = df_new['treatment'].astype(int)\n", 92 | " return df_new\n", 93 | "\n", 94 | "\n", 95 | "col_names = ['treatment', 'age', 'education', 'black',\n", 96 | " 'hispanic', 'married', 'nodegree', 'RE74', 'RE75', 'RE78']\n", 97 | "control = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/psid_controls.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n", 98 | "treatment = pd.read_csv('https://raw.githubusercontent.com/anishazaveri/austen_plots/master/data/imbens-raw/nswre74_treated.txt', header=None, sep=r\"\\s\\s\", names=col_names, engine='python')\n", 99 | "\n", 100 | "lalonde1 = pd.concat([control, treatment]).reset_index(drop=True)\n", 101 | "lalonde1 = make_data_lalonde(lalonde1)" 102 | ], 103 | "execution_count": 8, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "metadata": { 109 | "colab": { 110 | "base_uri": "https://localhost:8080/", 111 | "height": 203 112 | }, 113 | "id": "-A1LX6-t-hZD", 114 | "outputId": "b0e276e2-dce3-424d-ffc7-e2b992ad62ec" 115 | }, 116 | "source": [ 117 | "lalonde1.head()" 118 | ], 119 | "execution_count": 9, 120 | "outputs": [ 121 | { 122 | "output_type": "execute_result", 123 | "data": { 124 | "text/html": [ 125 | "
\n", 126 | "\n", 139 | "\n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | "
treatmentageeducationblackhispanicmarriedRE74RE75RE78pos74pos75
0047.012.00.00.00.00.00.00.000
1050.012.01.00.01.00.00.00.000
2044.012.00.00.00.00.00.00.000
3028.012.01.00.01.00.00.00.000
4054.012.00.00.01.00.00.00.000
\n", 229 | "
" 230 | ], 231 | "text/plain": [ 232 | " treatment age education black hispanic ... RE74 RE75 RE78 pos74 pos75\n", 233 | "0 0 47.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n", 234 | "1 0 50.0 12.0 1.0 0.0 ... 0.0 0.0 0.0 0 0\n", 235 | "2 0 44.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n", 236 | "3 0 28.0 12.0 1.0 0.0 ... 0.0 0.0 0.0 0 0\n", 237 | "4 0 54.0 12.0 0.0 0.0 ... 0.0 0.0 0.0 0 0\n", 238 | "\n", 239 | "[5 rows x 11 columns]" 240 | ] 241 | }, 242 | "metadata": {}, 243 | "execution_count": 9 244 | } 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "metadata": { 250 | "id": "APOqpHmrOGzo" 251 | }, 252 | "source": [ 253 | "confounders = lalonde1.drop(columns=['RE78', 'treatment'])\n", 254 | "outcome = lalonde1['RE78']\n", 255 | "treatment = lalonde1['treatment']" 256 | ], 257 | "execution_count": 10, 258 | "outputs": [] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "metadata": { 263 | "id": "C576dWRsa3ad" 264 | }, 265 | "source": [ 266 | "## Specify Nuisance Function Models\n", 267 | "\n", 268 | "The next step is to specify models for the conditional expected outcome and propensity score" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "metadata": { 274 | "colab": { 275 | "base_uri": "https://localhost:8080/" 276 | }, 277 | "id": "qyOhSZRQRb8W", 278 | "outputId": "7df1d854-c13f-4f93-ec7c-4977df6ad283" 279 | }, 280 | "source": [ 281 | "# specify a model for the conditional expected outcome\n", 282 | "\n", 283 | "# make a function that returns a sklearn model for later use in k-folding\n", 284 | "def make_Q_model():\n", 285 | " return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=500, max_depth=None)\n", 286 | "Q_model = make_Q_model()\n", 287 | "\n", 288 | "# Sanity check that chosen model actually improves test error\n", 289 | "# A real analysis should give substantial attention to model selection and validation \n", 290 | "\n", 291 | "X_w_treatment = confounders.copy()\n", 292 | "X_w_treatment[\"treatment\"] = treatment\n", 293 | "\n", 294 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n", 295 | "Q_model.fit(X_train, y_train)\n", 296 | "y_pred = Q_model.predict(X_test)\n", 297 | "\n", 298 | "test_mse=mean_squared_error(y_pred, y_test)\n", 299 | "print(f\"Test MSE of fit model {test_mse}\") \n", 300 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n", 301 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")" 302 | ], 303 | "execution_count": 11, 304 | "outputs": [ 305 | { 306 | "output_type": "stream", 307 | "name": "stdout", 308 | "text": [ 309 | "Test MSE of fit model 105637760.68507269\n", 310 | "Test MSE of no-covariate model 246319790.55062827\n" 311 | ] 312 | } 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "metadata": { 318 | "colab": { 319 | "base_uri": "https://localhost:8080/" 320 | }, 321 | "id": "uq6eZEBXbsaI", 322 | "outputId": "974c356c-07f3-4573-f8c3-b83400c82169" 323 | }, 324 | "source": [ 325 | "# specify a model for the propensity score\n", 326 | "\n", 327 | "def make_g_model():\n", 328 | "# return LogisticRegression(max_iter=1000)\n", 329 | " return RandomForestClassifier(n_estimators=100, max_depth=5)\n", 330 | "\n", 331 | "g_model = make_g_model()\n", 332 | "# Sanity check that chosen model actually improves test error\n", 333 | "# A real analysis should give substantial attention to model selection and validation \n", 334 | "\n", 335 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n", 336 | "g_model.fit(X_train, a_train)\n", 337 | "a_pred = g_model.predict_proba(X_test)[:,1]\n", 338 | "\n", 339 | "test_ce=log_loss(a_test, a_pred)\n", 340 | "print(f\"Test CE of fit model {test_ce}\") \n", 341 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n", 342 | "print(f\"Test CE of no-covariate model {baseline_ce}\")" 343 | ], 344 | "execution_count": 12, 345 | "outputs": [ 346 | { 347 | "output_type": "stream", 348 | "name": "stdout", 349 | "text": [ 350 | "Test CE of fit model 0.07789407933364972\n", 351 | "Test CE of no-covariate model 0.21817471356014154\n" 352 | ] 353 | } 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": { 359 | "id": "2RkvV_4_dFWo" 360 | }, 361 | "source": [ 362 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "metadata": { 368 | "id": "KA0AsEGJ_X3b" 369 | }, 370 | "source": [ 371 | "# helper functions to implement the cross fitting\n", 372 | "\n", 373 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n", 374 | " \"\"\"\n", 375 | " Implements K fold cross-fitting for the model predicting the treatment A. \n", 376 | " That is, \n", 377 | " 1. Split data into K folds\n", 378 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 379 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 380 | " Returns an array containing the predictions \n", 381 | "\n", 382 | " Args:\n", 383 | " model: function that returns sklearn model (which implements fit and predict_prob)\n", 384 | " X: dataframe of variables to adjust for\n", 385 | " A: array of treatments\n", 386 | " n_splits: number of splits to use\n", 387 | " \"\"\"\n", 388 | " predictions = np.full_like(A, np.nan, dtype=float)\n", 389 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 390 | " \n", 391 | " for train_index, test_index in kf.split(X, A):\n", 392 | " X_train = X.loc[train_index]\n", 393 | " A_train = A.loc[train_index]\n", 394 | " g = make_model()\n", 395 | " g.fit(X_train, A_train)\n", 396 | "\n", 397 | " # get predictions for split\n", 398 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n", 399 | "\n", 400 | " assert np.isnan(predictions).sum() == 0\n", 401 | " return predictions\n", 402 | "\n", 403 | "\n", 404 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n", 405 | " \"\"\"\n", 406 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n", 407 | " That is, \n", 408 | " 1. Split data into K folds\n", 409 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 410 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 411 | " Returns two arrays containing the predictions for all units untreated, all units treated \n", 412 | "\n", 413 | " Args:\n", 414 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n", 415 | " X: dataframe of variables to adjust for\n", 416 | " y: array of outcomes\n", 417 | " A: array of treatments\n", 418 | " n_splits: number of splits to use\n", 419 | " output_type: type of outcome, \"binary\" or \"continuous\"\n", 420 | "\n", 421 | " \"\"\"\n", 422 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n", 423 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n", 424 | " if output_type == 'binary':\n", 425 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 426 | " elif output_type == 'continuous':\n", 427 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 428 | "\n", 429 | " # include the treatment as input feature\n", 430 | " X_w_treatment = X.copy()\n", 431 | " X_w_treatment[\"A\"] = A\n", 432 | "\n", 433 | " # for predicting effect under treatment / control status for each data point \n", 434 | " X0 = X_w_treatment.copy()\n", 435 | " X0[\"A\"] = 0\n", 436 | " X1 = X_w_treatment.copy()\n", 437 | " X1[\"A\"] = 1\n", 438 | "\n", 439 | " \n", 440 | " for train_index, test_index in kf.split(X_w_treatment, y):\n", 441 | " X_train = X_w_treatment.loc[train_index]\n", 442 | " y_train = y.loc[train_index]\n", 443 | " q = make_model()\n", 444 | " q.fit(X_train, y_train)\n", 445 | "\n", 446 | " if output_type =='binary':\n", 447 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n", 448 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n", 449 | " elif output_type == 'continuous':\n", 450 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n", 451 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n", 452 | "\n", 453 | " assert np.isnan(predictions0).sum() == 0\n", 454 | " assert np.isnan(predictions1).sum() == 0\n", 455 | " return predictions0, predictions1" 456 | ], 457 | "execution_count": 13, 458 | "outputs": [] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "metadata": { 463 | "id": "wVcE6pRQeMNf" 464 | }, 465 | "source": [ 466 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)" 467 | ], 468 | "execution_count": 14, 469 | "outputs": [] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "metadata": { 474 | "id": "GLEHlLLdWSh9" 475 | }, 476 | "source": [ 477 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")" 478 | ], 479 | "execution_count": 15, 480 | "outputs": [] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "metadata": { 485 | "colab": { 486 | "base_uri": "https://localhost:8080/", 487 | "height": 203 488 | }, 489 | "id": "_NVCV0q0g8wQ", 490 | "outputId": "b638a74f-1c3f-4860-bd3d-af9eb397832a" 491 | }, 492 | "source": [ 493 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n", 494 | "data_and_nuisance_estimates.head()" 495 | ], 496 | "execution_count": 16, 497 | "outputs": [ 498 | { 499 | "output_type": "execute_result", 500 | "data": { 501 | "text/html": [ 502 | "
\n", 503 | "\n", 516 | "\n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | "
gQ0Q1AY
00.31335095.5495361571.92251800.0
10.1919582032.0246473895.07048600.0
20.47078829.9404321731.49825900.0
30.51795711037.4872729030.77661000.0
40.0142460.0000002139.63096000.0
\n", 570 | "
" 571 | ], 572 | "text/plain": [ 573 | " g Q0 Q1 A Y\n", 574 | "0 0.313350 95.549536 1571.922518 0 0.0\n", 575 | "1 0.191958 2032.024647 3895.070486 0 0.0\n", 576 | "2 0.470788 29.940432 1731.498259 0 0.0\n", 577 | "3 0.517957 11037.487272 9030.776610 0 0.0\n", 578 | "4 0.014246 0.000000 2139.630960 0 0.0" 579 | ] 580 | }, 581 | "metadata": {}, 582 | "execution_count": 16 583 | } 584 | ] 585 | }, 586 | { 587 | "cell_type": "markdown", 588 | "metadata": { 589 | "id": "VNhM7URdgzQB" 590 | }, 591 | "source": [ 592 | "## Combine predicted values and data into estimate of ATT" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "metadata": { 598 | "id": "J-vONC5ejwh2" 599 | }, 600 | "source": [ 601 | "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n", 602 | " \"\"\"\n", 603 | " # Double ML estimator for the ATT\n", 604 | " This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n", 605 | " \"\"\"\n", 606 | "\n", 607 | " if prob_t is None:\n", 608 | " prob_t = A.mean() # estimate marginal probability of treatment\n", 609 | "\n", 610 | " tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n", 611 | " \n", 612 | " scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n", 613 | " n = Y.shape[0] # number of observations\n", 614 | " std_hat = np.std(scores) / np.sqrt(n)\n", 615 | "\n", 616 | " return tau_hat, std_hat\n" 617 | ], 618 | "execution_count": 17, 619 | "outputs": [] 620 | }, 621 | { 622 | "cell_type": "code", 623 | "metadata": { 624 | "id": "O_F5r0SSkzzK" 625 | }, 626 | "source": [ 627 | "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n", 628 | " \"\"\"\n", 629 | " # Double ML estimator for the ATE\n", 630 | " \"\"\"\n", 631 | "\n", 632 | " tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n", 633 | " \n", 634 | " scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n", 635 | " n = Y.shape[0] # number of observations\n", 636 | " std_hat = np.std(scores) / np.sqrt(n)\n", 637 | "\n", 638 | " return tau_hat, std_hat\n" 639 | ], 640 | "execution_count": 18, 641 | "outputs": [] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "metadata": { 646 | "colab": { 647 | "base_uri": "https://localhost:8080/" 648 | }, 649 | "id": "SjDj0F9Bm9uq", 650 | "outputId": "fdef5c08-3829-400b-ea0e-1cb5dd01bc25" 651 | }, 652 | "source": [ 653 | "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n", 654 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 655 | ], 656 | "execution_count": 19, 657 | "outputs": [ 658 | { 659 | "output_type": "stream", 660 | "name": "stdout", 661 | "text": [ 662 | "The estimate is 1300.9807431649592 pm 1622.6924287596182\n" 663 | ] 664 | } 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "metadata": { 670 | "colab": { 671 | "base_uri": "https://localhost:8080/" 672 | }, 673 | "id": "vSaOp1HwlQ4i", 674 | "outputId": "874e2ea0-dfc1-4594-9d45-b6663f69f163" 675 | }, 676 | "source": [ 677 | "in_treated = data_and_nuisance_estimates['A']==1\n", 678 | "treated_estimates = data_and_nuisance_estimates[in_treated]\n", 679 | "tau_hat, std_hat = ate_aiptw(**treated_estimates)\n", 680 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 681 | ], 682 | "execution_count": 20, 683 | "outputs": [ 684 | { 685 | "output_type": "stream", 686 | "name": "stdout", 687 | "text": [ 688 | "The estimate is -33439.05484914103 pm 50637.28008886066\n" 689 | ] 690 | } 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "metadata": { 696 | "colab": { 697 | "base_uri": "https://localhost:8080/" 698 | }, 699 | "id": "IOuJnlbEo8j_", 700 | "outputId": "74678791-7163-41e6-f7a9-a04a7b669e81" 701 | }, 702 | "source": [ 703 | "# The LaLonde data has severe overlap issues. Lets try computing the estimate restricted to a population with only reasonable propensity scores\n", 704 | "g = data_and_nuisance_estimates['g']\n", 705 | "in_overlap_popluation = ( g < 0.90)\n", 706 | "overlap_data_and_nuisance = data_and_nuisance_estimates[in_overlap_popluation]\n", 707 | "tau_hat, std_hat = att_aiptw(**overlap_data_and_nuisance)\n", 708 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 709 | ], 710 | "execution_count": 21, 711 | "outputs": [ 712 | { 713 | "output_type": "stream", 714 | "name": "stdout", 715 | "text": [ 716 | "The estimate is 572.1572812652179 pm 1501.516696945994\n" 717 | ] 718 | } 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "metadata": { 724 | "id": "LnJppbQdjwVI" 725 | }, 726 | "source": [ 727 | "" 728 | ], 729 | "execution_count": 22, 730 | "outputs": [] 731 | } 732 | ] 733 | } -------------------------------------------------------------------------------- /IV_Strategies.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "view-in-github", 7 | "colab_type": "text" 8 | }, 9 | "source": [ 10 | "\"Open" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "id": "3Pr7ijIYeO--" 17 | }, 18 | "source": [ 19 | "# LATE Estimation Tutorial" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": { 25 | "id": "kVIsqn30gqCx" 26 | }, 27 | "source": [ 28 | "This tutorial gives a short example for how to use instrument variable to estimate local average treatment effect using machine learning methods" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 1, 34 | "metadata": { 35 | "id": "Nv0YbKbGea6U" 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import numpy as np\n", 40 | "import pandas as pd\n", 41 | "import sklearn\n", 42 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", 43 | "from sklearn.linear_model import LogisticRegression\n", 44 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n", 45 | "from sklearn.metrics import mean_squared_error, log_loss\n", 46 | "import math" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": { 53 | "id": "ONvBs_yvia3a" 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "RANDOM_SEED=0\n", 58 | "np.random.seed(RANDOM_SEED)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "id": "yPbJeayiEs3u" 65 | }, 66 | "source": [ 67 | "##Load Outvote 2020 Observational Data" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": { 73 | "id": "2t60_xU_qORv" 74 | }, 75 | "source": [ 76 | "First, load the observational data." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 3, 82 | "metadata": { 83 | "colab": { 84 | "base_uri": "https://localhost:8080/", 85 | "height": 488 86 | }, 87 | "id": "iCuOMjvsoWpm", 88 | "outputId": "5eac723f-847b-41f0-a01b-377ec7ec1261" 89 | }, 90 | "outputs": [ 91 | { 92 | "output_type": "execute_result", 93 | "data": { 94 | "text/plain": [ 95 | " voted_2020 messaged queue_position queue_length \\\n", 96 | "0 1 1 1.0 19.0 \n", 97 | "1 1 1 6.0 19.0 \n", 98 | "2 0 1 8.0 19.0 \n", 99 | "3 1 1 10.0 19.0 \n", 100 | "4 1 1 18.0 19.0 \n", 101 | "... ... ... ... ... \n", 102 | "81199 1 0 5.0 5.0 \n", 103 | "81200 1 0 2.0 3.0 \n", 104 | "81201 0 0 1.0 2.0 \n", 105 | "81202 1 1 2.0 9.0 \n", 106 | "81203 0 1 3.0 3.0 \n", 107 | "\n", 108 | " queue_id voted_2018 voted_2016 is_Democrat \\\n", 109 | "0 xgcwm279xcwkxjq4zxabo 0 0 0 \n", 110 | "1 xgcwm279xcwkxjq4zxabo 1 1 1 \n", 111 | "2 xgcwm279xcwkxjq4zxabo 1 0 1 \n", 112 | "3 xgcwm279xcwkxjq4zxabo 1 1 0 \n", 113 | "4 xgcwm279xcwkxjq4zxabo 0 1 1 \n", 114 | "... ... ... ... ... \n", 115 | "81199 0j3f3gyc3qspaukludfcpy4 1 1 0 \n", 116 | "81200 ixyu0l548p9dextzgjspdr 0 1 0 \n", 117 | "81201 ghclqszfr6qvz2i5yesrwo 0 1 0 \n", 118 | "81202 sscqhe1ttlh7fmkmuk231f 1 1 0 \n", 119 | "81203 41tse6pfdjlw0kwcm7rbo 0 0 0 \n", 120 | "\n", 121 | " is_Republican is_Male is_Female is_Married is_Urban is_Rural \\\n", 122 | "0 0 0 1 0 0 0 \n", 123 | "1 0 1 0 0 0 0 \n", 124 | "2 0 0 1 0 0 0 \n", 125 | "3 0 1 0 0 0 0 \n", 126 | "4 0 0 1 0 0 0 \n", 127 | "... ... ... ... ... ... ... \n", 128 | "81199 0 0 1 1 0 0 \n", 129 | "81200 0 0 1 0 0 0 \n", 130 | "81201 0 0 1 0 0 0 \n", 131 | "81202 0 1 0 1 0 0 \n", 132 | "81203 0 1 0 0 0 0 \n", 133 | "\n", 134 | " is_Battleground age \n", 135 | "0 1 68.0 \n", 136 | "1 0 52.0 \n", 137 | "2 0 26.0 \n", 138 | "3 1 39.0 \n", 139 | "4 1 23.0 \n", 140 | "... ... ... \n", 141 | "81199 1 54.0 \n", 142 | "81200 0 36.0 \n", 143 | "81201 1 90.0 \n", 144 | "81202 1 72.0 \n", 145 | "81203 1 72.0 \n", 146 | "\n", 147 | "[81204 rows x 16 columns]" 148 | ], 149 | "text/html": [ 150 | "\n", 151 | "
\n", 152 | "
\n", 153 | "
\n", 154 | "\n", 167 | "\n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | "
voted_2020messagedqueue_positionqueue_lengthqueue_idvoted_2018voted_2016is_Democratis_Republicanis_Maleis_Femaleis_Marriedis_Urbanis_Ruralis_Battlegroundage
0111.019.0xgcwm279xcwkxjq4zxabo000001000168.0
1116.019.0xgcwm279xcwkxjq4zxabo111010000052.0
2018.019.0xgcwm279xcwkxjq4zxabo101001000026.0
31110.019.0xgcwm279xcwkxjq4zxabo110010000139.0
41118.019.0xgcwm279xcwkxjq4zxabo011001000123.0
...................................................
81199105.05.00j3f3gyc3qspaukludfcpy4110001100154.0
81200102.03.0ixyu0l548p9dextzgjspdr010001000036.0
81201001.02.0ghclqszfr6qvz2i5yesrwo010001000190.0
81202112.09.0sscqhe1ttlh7fmkmuk231f110010100172.0
81203013.03.041tse6pfdjlw0kwcm7rbo000010000172.0
\n", 401 | "

81204 rows × 16 columns

\n", 402 | "
\n", 403 | " \n", 413 | " \n", 414 | " \n", 451 | "\n", 452 | " \n", 476 | "
\n", 477 | "
\n", 478 | " " 479 | ] 480 | }, 481 | "metadata": {}, 482 | "execution_count": 3 483 | } 484 | ], 485 | "source": [ 486 | "outvote = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/outvote_2020_data.csv')\n", 487 | "outvote" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": { 493 | "id": "wksy28PnqSGU" 494 | }, 495 | "source": [ 496 | "**Data description:**\n", 497 | "* Core variables:\n", 498 | " * `voted_2020`: (binary) the outcome; whether subject voted in 2020 election\n", 499 | " * `messaged`: (binary) the treatment; whether subject was messaged by an Outvote user\n", 500 | " * `queue_position`: (ordinal / pos int / 1+) the instrument; the subject's position/ranking in the user's queue; this is randomized\n", 501 | " * `queue_length`: (ordinal / pos int / 2+) the block; how long the user's queue was; this is endogenous / non-randomized and must be blocked on\n", 502 | "\n", 503 | "* Optional / additional variables: \n", 504 | " * `queue_id`: (int) a unique identifier for the queue the subject was in\n", 505 | " * `voted_2018`: (binary) whether subject voted in 2018 election\n", 506 | " * `voted_2016`: (binary) whether subject voted in 2016 election\n", 507 | " * `is_Democrat`: (binary) whether subject is registered Democrat\n", 508 | " * `is_Republican`: (binary) whether subject is registered Republican\n", 509 | " * `is_Male`: (binary) whether subject identifies as Male\n", 510 | " * `is_Female`: (binary) whether subject identifies as Female\n", 511 | " * `is_Married`: (binary) whether subject is married\n", 512 | " * `is_Urban`: (binary) whether subject lives in a city\n", 513 | " * `is_Rural`: (binary) whether subject lives in a rural area\n", 514 | " * `is_Battleground`: (binary) whether subject is registered in a battleground state\n", 515 | " * `age`: (pos int, 18+), subject's age" 516 | ] 517 | }, 518 | { 519 | "cell_type": "code", 520 | "execution_count": 4, 521 | "metadata": { 522 | "colab": { 523 | "base_uri": "https://localhost:8080/" 524 | }, 525 | "id": "6CgdtoZr7uvL", 526 | "outputId": "39c90551-18ee-4dcd-b9d1-a8970900cf62" 527 | }, 528 | "outputs": [ 529 | { 530 | "output_type": "execute_result", 531 | "data": { 532 | "text/plain": [ 533 | "43357" 534 | ] 535 | }, 536 | "metadata": {}, 537 | "execution_count": 4 538 | } 539 | ], 540 | "source": [ 541 | "# select a cutoff K = 37 for the instrument\n", 542 | "# we only consider those queues with length > K\n", 543 | "outvote = outvote.loc[outvote.queue_length>37].reset_index()\n", 544 | "outvote['queue_position'] = (outvote['queue_position'] <= 37.0).astype(int)\n", 545 | "len(outvote)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 5, 551 | "metadata": { 552 | "id": "2AC9TPko-hbt" 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "outcome = outvote['voted_2020']\n", 557 | "treatment = outvote['messaged']\n", 558 | "instrument = outvote['queue_position']\n", 559 | "block = outvote['queue_length']" 560 | ] 561 | }, 562 | { 563 | "cell_type": "markdown", 564 | "metadata": { 565 | "id": "C576dWRsa3ad" 566 | }, 567 | "source": [ 568 | "## Specify Nuisance Function Models\n", 569 | "\n", 570 | "The next step is to specify models for \n", 571 | "\n", 572 | "* $\\mu(z,x)=\\mathbb{E}(Y|z,x)$\n", 573 | "* $m(z,x) = P(A=1|z,x)$\n", 574 | "* $p(x) = P(Z=1|x)$\n", 575 | "\n" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 6, 581 | "metadata": { 582 | "colab": { 583 | "base_uri": "https://localhost:8080/" 584 | }, 585 | "id": "qyOhSZRQRb8W", 586 | "outputId": "4819a4f0-8268-45e2-c667-7b0c6055d4d5" 587 | }, 588 | "outputs": [ 589 | { 590 | "output_type": "stream", 591 | "name": "stdout", 592 | "text": [ 593 | "Test Cross Entropy of fit model 0.5250216548208155\n", 594 | "Test Cross Entropy of no-covariate model 0.5270933091701285\n" 595 | ] 596 | } 597 | ], 598 | "source": [ 599 | "from sklearn.neighbors import KNeighborsClassifier\n", 600 | "# specify a model for mu(z,x)\n", 601 | "\n", 602 | "# make a function that returns a sklearn model for later use in k-folding\n", 603 | "def make_mu_model():\n", 604 | " return KNeighborsClassifier(n_neighbors=300)\n", 605 | "mu_model = make_mu_model()\n", 606 | "\n", 607 | "# Sanity check that chosen model actually improves test error\n", 608 | "# A real analysis should give substantial attention to model selection and validation \n", 609 | "\n", 610 | "X_zx = outvote[['queue_position','queue_length']].copy()\n", 611 | "\n", 612 | "X_train, X_test, y_train, y_test = train_test_split(X_zx, outcome, test_size=0.2)\n", 613 | "mu_model.fit(X_train, y_train)\n", 614 | "y_pred = mu_model.predict_proba(X_test)[:,1]\n", 615 | "\n", 616 | "test_ce=log_loss(y_test, y_pred)\n", 617 | "print(f\"Test Cross Entropy of fit model {test_ce}\") \n", 618 | "baseline_ce=log_loss(y_test, y_train.mean()*np.ones_like(y_test))\n", 619 | "print(f\"Test Cross Entropy of no-covariate model {baseline_ce}\")" 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "execution_count": 7, 625 | "metadata": { 626 | "colab": { 627 | "base_uri": "https://localhost:8080/" 628 | }, 629 | "id": "uq6eZEBXbsaI", 630 | "outputId": "c87bc82f-22be-49f6-af8b-058eec588403" 631 | }, 632 | "outputs": [ 633 | { 634 | "output_type": "stream", 635 | "name": "stdout", 636 | "text": [ 637 | "Test CE of fit model 0.6398686068177049\n", 638 | "Test CE of no-covariate model 0.6714289850271291\n" 639 | ] 640 | } 641 | ], 642 | "source": [ 643 | "# specify a model for m(z,x)\n", 644 | "\n", 645 | "def make_m_model():\n", 646 | " return LogisticRegression(max_iter=1000, warm_start=True, random_state=RANDOM_SEED)\n", 647 | "\n", 648 | "m_model = make_m_model()\n", 649 | "# Sanity check that chosen model actually improves test error\n", 650 | "# A real analysis should give substantial attention to model selection and validation \n", 651 | "\n", 652 | "X_train, X_test, a_train, a_test = train_test_split(X_zx, treatment, test_size=0.2)\n", 653 | "m_model.fit(X_train, a_train)\n", 654 | "a_pred = m_model.predict_proba(X_test)[:,1]\n", 655 | "\n", 656 | "test_ce=log_loss(a_test, a_pred)\n", 657 | "print(f\"Test CE of fit model {test_ce}\") \n", 658 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n", 659 | "print(f\"Test CE of no-covariate model {baseline_ce}\")" 660 | ] 661 | }, 662 | { 663 | "cell_type": "code", 664 | "execution_count": 8, 665 | "metadata": { 666 | "colab": { 667 | "base_uri": "https://localhost:8080/" 668 | }, 669 | "id": "pg-7pFAYxRQ5", 670 | "outputId": "cf0a1fb1-31dd-4c9f-c5df-b8987d66a7c4" 671 | }, 672 | "outputs": [ 673 | { 674 | "output_type": "stream", 675 | "name": "stdout", 676 | "text": [ 677 | "Test CE of fit model 0.5303714147556886\n", 678 | "Test CE of no-covariate model 0.6810169611354872\n" 679 | ] 680 | } 681 | ], 682 | "source": [ 683 | "def make_p_model():\n", 684 | " return RandomForestClassifier(n_estimators=200, max_depth=5)\n", 685 | "\n", 686 | "p_model = make_p_model()\n", 687 | "# Sanity check that chosen model actually improves test error\n", 688 | "# A real analysis should give substantial attention to model selection and validation \n", 689 | "\n", 690 | "X_train, X_test, Z_train, Z_test = train_test_split(block.to_frame(), instrument, test_size=0.2)\n", 691 | "p_model.fit(X_train, Z_train)\n", 692 | "Z_pred = p_model.predict_proba(X_test)[:,1]\n", 693 | "\n", 694 | "test_ce=log_loss(Z_test, Z_pred)\n", 695 | "print(f\"Test CE of fit model {test_ce}\") \n", 696 | "baseline_ce=log_loss(Z_test, Z_train.mean()*np.ones_like(Z_test))\n", 697 | "print(f\"Test CE of no-covariate model {baseline_ce}\")" 698 | ] 699 | }, 700 | { 701 | "cell_type": "markdown", 702 | "metadata": { 703 | "id": "2RkvV_4_dFWo" 704 | }, 705 | "source": [ 706 | "## Use cross fitting to get predicted $\\hat{\\mu}$, $\\hat{m}$, $\\hat{p}$ for each unit" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 9, 712 | "metadata": { 713 | "id": "KA0AsEGJ_X3b" 714 | }, 715 | "outputs": [], 716 | "source": [ 717 | "# helper functions to implement the cross fitting\n", 718 | "\n", 719 | "def p_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, n_splits:int):\n", 720 | " \"\"\"\n", 721 | " Implements K fold cross-fitting for the model predicting the instrument Z. \n", 722 | " That is, \n", 723 | " 1. Split data into K folds\n", 724 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 725 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 726 | " Returns an array containing the predictions \n", 727 | "\n", 728 | " Args:\n", 729 | " model: function that returns sklearn model (which implements fit and predict_prob)\n", 730 | " X: dataframe of variables to adjust for\n", 731 | " Z: array of instruments\n", 732 | " n_splits: number of splits to use\n", 733 | " \"\"\"\n", 734 | " predictions = np.full_like(Z, np.nan, dtype=float)\n", 735 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 736 | " \n", 737 | " for train_index, test_index in kf.split(X, Z):\n", 738 | " X_train = X.loc[train_index]\n", 739 | " Z_train = Z.loc[train_index]\n", 740 | " g = make_model()\n", 741 | " g.fit(X_train, Z_train)\n", 742 | "\n", 743 | " # get predictions for split\n", 744 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n", 745 | "\n", 746 | " assert np.isnan(predictions).sum() == 0\n", 747 | " return predictions\n", 748 | "\n", 749 | "\n", 750 | "def m_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, A:np.array, n_splits:int):\n", 751 | " \"\"\"\n", 752 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n", 753 | " That is, \n", 754 | " 1. Split data into K folds\n", 755 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 756 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 757 | " Returns two arrays containing the predictions for all units untreated, all units treated \n", 758 | "\n", 759 | " Args:\n", 760 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n", 761 | " X: dataframe of variables to adjust for\n", 762 | " Z: array of instruments\n", 763 | " A: array of treatments\n", 764 | " n_splits: number of splits to use\n", 765 | " \"\"\"\n", 766 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n", 767 | " predictions1 = np.full_like(A, np.nan, dtype=float)\n", 768 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 769 | "\n", 770 | " # include the treatment as input feature\n", 771 | " X_zx = X.copy()\n", 772 | " X_zx[\"Z\"] = Z\n", 773 | "\n", 774 | " # for predicting A under Z=1 / Z=0 status for each data point \n", 775 | " X0 = X_zx.copy()\n", 776 | " X0[\"Z\"] = 0\n", 777 | " X1 = X_zx.copy()\n", 778 | " X1[\"Z\"] = 1\n", 779 | " \n", 780 | " for train_index, test_index in kf.split(X_zx, A):\n", 781 | " X_train = X_zx.loc[train_index]\n", 782 | " A_train = A.loc[train_index]\n", 783 | " m = make_model()\n", 784 | " m.fit(X_train, A_train)\n", 785 | " predictions0[test_index] = m.predict_proba(X0.loc[test_index])[:,1]\n", 786 | " predictions1[test_index] = m.predict_proba(X1.loc[test_index])[:,1]\n", 787 | "\n", 788 | " assert np.isnan(predictions0).sum() == 0\n", 789 | " assert np.isnan(predictions1).sum() == 0\n", 790 | " return predictions0, predictions1\n", 791 | "\n", 792 | "def mu_k_fold_fit_and_predict(make_model, X:pd.DataFrame, Z:np.array, y:np.array, n_splits:int, output_type:str):\n", 793 | " \"\"\"\n", 794 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n", 795 | " That is, \n", 796 | " 1. Split data into K folds\n", 797 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 798 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 799 | " Returns two arrays containing the predictions for all units untreated, all units treated \n", 800 | "\n", 801 | " Args:\n", 802 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n", 803 | " X: dataframe of variables to adjust for\n", 804 | " Z: array of instruments\n", 805 | " y: array of outcomes\n", 806 | " n_splits: number of splits to use\n", 807 | " output_type: type of outcome, \"binary\" or \"continuous\"\n", 808 | "\n", 809 | " \"\"\"\n", 810 | " predictions0 = np.full_like(y, np.nan, dtype=float)\n", 811 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n", 812 | " if output_type == 'binary':\n", 813 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 814 | " elif output_type == 'continuous':\n", 815 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 816 | "\n", 817 | " # include the treatment as input feature\n", 818 | " X_zx = X.copy()\n", 819 | " X_zx[\"Z\"] = Z\n", 820 | "\n", 821 | " # for predicting effect under treatment / control status for each data point \n", 822 | " X0 = X_zx.copy()\n", 823 | " X0[\"Z\"] = 0\n", 824 | " X1 = X_zx.copy()\n", 825 | " X1[\"Z\"] = 1\n", 826 | "\n", 827 | " \n", 828 | " for train_index, test_index in kf.split(X_zx, y):\n", 829 | " X_train = X_zx.loc[train_index]\n", 830 | " y_train = y.loc[train_index]\n", 831 | " mu = make_model()\n", 832 | " mu.fit(X_train, y_train)\n", 833 | "\n", 834 | " if output_type =='binary':\n", 835 | " predictions0[test_index] = mu.predict_proba(X0.loc[test_index])[:, 1]\n", 836 | " predictions1[test_index] = mu.predict_proba(X1.loc[test_index])[:, 1]\n", 837 | " elif output_type == 'continuous':\n", 838 | " predictions0[test_index] = mu.predict(X0.loc[test_index])\n", 839 | " predictions1[test_index] = mu.predict(X1.loc[test_index])\n", 840 | "\n", 841 | " assert np.isnan(predictions0).sum() == 0\n", 842 | " assert np.isnan(predictions1).sum() == 0\n", 843 | " return predictions0, predictions1" 844 | ] 845 | }, 846 | { 847 | "cell_type": "code", 848 | "execution_count": 10, 849 | "metadata": { 850 | "id": "wVcE6pRQeMNf" 851 | }, 852 | "outputs": [], 853 | "source": [ 854 | "p = p_k_fold_fit_and_predict(make_p_model, X=block.to_frame(), Z=instrument, n_splits=10)" 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": 11, 860 | "metadata": { 861 | "id": "GLEHlLLdWSh9" 862 | }, 863 | "outputs": [], 864 | "source": [ 865 | "m0,m1= m_k_fold_fit_and_predict(make_m_model, X=block.to_frame(), Z=instrument, A=treatment, n_splits=10)" 866 | ] 867 | }, 868 | { 869 | "cell_type": "code", 870 | "source": [ 871 | "# check relevance\n", 872 | "from matplotlib.pyplot import hist\n", 873 | "hist(m1-m0, density=True)" 874 | ], 875 | "metadata": { 876 | "id": "jpXU9DK26d6c", 877 | "outputId": "72e0d30c-46e6-48b0-fd8a-318b8b88c26d", 878 | "colab": { 879 | "base_uri": "https://localhost:8080/", 880 | "height": 390 881 | } 882 | }, 883 | "execution_count": 18, 884 | "outputs": [ 885 | { 886 | "output_type": "execute_result", 887 | "data": { 888 | "text/plain": [ 889 | "(array([3.97620728e-02, 3.77739692e-02, 9.34408711e-02, 2.04774675e-01,\n", 890 | " 3.37977619e-02, 0.00000000e+00, 0.00000000e+00, 3.61834863e-01,\n", 891 | " 5.95297873e+01, 2.58970380e+01]),\n", 892 | " array([0.04620174, 0.05780291, 0.06940408, 0.08100525, 0.09260642,\n", 893 | " 0.10420759, 0.11580876, 0.12740993, 0.13901109, 0.15061226,\n", 894 | " 0.16221343]),\n", 895 | " )" 896 | ] 897 | }, 898 | "metadata": {}, 899 | "execution_count": 18 900 | }, 901 | { 902 | "output_type": "display_data", 903 | "data": { 904 | "text/plain": [ 905 | "
" 906 | ], 907 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAOiUlEQVR4nO3dfYxldX3H8fdHBopglUWmmy1oByJoSCpgRwrR2pQtlJZGaEIoStvVkuwf1samtnatf7XpH9DaWhKbNhuxDo0WcCuFiAE2K8S0scggzyzKQhZdurDDUxWaVBe//eOelcns7M7duQ/Db3y/kpt7Hn7nnu839/LZM+fec0hVIUlqz2tWugBJ0vIY4JLUKANckhplgEtSowxwSWrUxDh3dtxxx9XU1NQ4dylJzbv77rufqarJhcvHGuBTU1PMzs6Oc5eS1LwkTyy23FMoktQoA1ySGmWAS1KjDHBJalRfAZ7kmCRbkjySZHuSs5Mcm2Rrkke75zWjLlaS9Ip+j8CvAm6pqrcBpwHbgU3Atqo6GdjWzUuSxmTJAE/yBuA9wNUAVfWDqnoBuBCY6YbNABeNqkhJ0v76OQI/EZgD/jnJPUk+k+RoYG1V7e7GPAWsXWzjJBuTzCaZnZubG07VkqS+AnwCeAfwj1V1BvASC06XVO+m4oveWLyqNlfVdFVNT07udyGRJGmZ+rkScxewq6ru7Oa30Avwp5Osq6rdSdYBe0ZVpKTVZWrTzSuy351XXLAi+x2VJY/Aq+op4LtJ3totWg88DNwEbOiWbQBuHEmFkqRF9XsvlD8EPp/kCOBx4IP0wv/6JJcDTwCXjKZESdJi+grwqroXmF5k1frhliNJ6pdXYkpSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWrURD+DkuwEvg+8DOytqukkxwLXAVPATuCSqnp+NGVKkhY6lCPwX6mq06tqupvfBGyrqpOBbd28JGlMBjmFciEw003PABcNXo4kqV/9BngBtyW5O8nGbtnaqtrdTT8FrF1swyQbk8wmmZ2bmxuwXEnSPn2dAwfeXVVPJvkZYGuSR+avrKpKUottWFWbgc0A09PTi46RJB26vo7Aq+rJ7nkPcANwJvB0knUA3fOeURUpSdrfkgGe5OgkP71vGjgPeBC4CdjQDdsA3DiqIiVJ++vnFMpa4IYk+8Z/oapuSXIXcH2Sy4EngEtGV6YkaaElA7yqHgdOW2T5s8D6URQlSVqaV2JKUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqP6DvAkhyW5J8mXu/kTk9yZZEeS65IcMboyJUkLHcoR+EeA7fPmrwQ+VVVvAZ4HLh9mYZKkg+srwJOcAFwAfKabD3AOsKUbMgNcNIoCJUmL6/cI/O+BjwE/6ubfCLxQVXu7+V3A8UOuTZJ0EEsGeJLfBPZU1d3L2UGSjUlmk8zOzc0t5yUkSYvo5wj8XcB7k+wErqV36uQq4JgkE92YE4AnF9u4qjZX1XRVTU9OTg6hZEkS9BHgVfXxqjqhqqaAS4GvVtVlwO3Axd2wDcCNI6tSkrSfQX4H/mfAHyfZQe+c+NXDKUmS1I+JpYe8oqruAO7oph8Hzhx+SZKkfnglpiQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJjkzyjST3JXkoyV90y09McmeSHUmuS3LE6MuVJO3TzxH4/wHnVNVpwOnA+UnOAq4EPlVVbwGeBy4fXZmSpIWWDPDqebGbPbx7FHAOsKVbPgNcNJIKJUmL6usceJLDktwL7AG2Ao8BL1TV3m7ILuD4A2y7Mclsktm5ublh1CxJos8Ar6qXq+p04ATgTOBt/e6gqjZX1XRVTU9OTi6zTEnSQof0K5SqegG4HTgbOCbJRLfqBODJIdcmSTqIfn6FMpnkmG76tcC5wHZ6QX5xN2wDcOOoipQk7W9i6SGsA2aSHEYv8K+vqi8neRi4NslfAfcAV4+wTknSAksGeFXdD5yxyPLH6Z0PlyStAK/ElKRGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRvVzKb0krQpTm25ekf3uvOKCkbyuR+CS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYtGeBJ3pTk9iQPJ3koyUe65ccm2Zrk0e55zejLlSTt088R+F7go1V1KnAW8AdJTgU2Aduq6mRgWzcvSRqTJQO8qnZX1Te76e8D24HjgQuBmW7YDHDRqIqUJO3vkM6BJ5kCzgDuBNZW1e5u1VPA2gNsszHJbJLZubm5AUqVJM3Xd4AneR3wb8AfVdX35q+rqgJqse2qanNVTVfV9OTk5EDFSpJe0VeAJzmcXnh/vqq+1C1+Osm6bv06YM9oSpQkLaafX6EEuBrYXlV/N2/VTcCGbnoDcOPwy5MkHchEH2PeBfwu8ECSe7tlfw5cAVyf5HLgCeCS0ZQoSVrMkgFeVf8B5ACr1w+3HElSv7wSU5IaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqOWDPAkn02yJ8mD85Ydm2Rrkke75zWjLVOStFA/R+CfA85fsGwTsK2qTga2dfOSpDFaMsCr6mvAcwsWXwjMdNMzwEVDrkuStITlngNfW1W7u+mngLUHGphkY5LZJLNzc3PL3J0kaaGBv8SsqgLqIOs3V9V0VU1PTk4OujtJUme5Af50knUA3fOe4ZUkSerHcgP8JmBDN70BuHE45UiS+tXPzwj/Ffg68NYku5JcDlwBnJvkUeBXu3lJ0hhNLDWgqt53gFXrh1yLJOkQeCWmJDXKAJekRhngktQoA1ySGmWAS1KjDHBJapQBLkmNMsAlqVEGuCQ1ygCXpEYZ4JLUKANckhplgEtSowxwSWqUAS5JjTLAJalRBrgkNcoAl6RGGeCS1CgDXJIaZYBLUqMMcElqlAEuSY0ywCWpUQa4JDXKAJekRg0U4EnOT/KtJDuSbBpWUZKkpS07wJMcBvwD8OvAqcD7kpw6rMIkSQc3McC2ZwI7qupxgCTXAhcCDw+jMMHUpptXuoSx23nFBStdwk+Mn8TP12ozSIAfD3x33vwu4BcXDkqyEdjYzb6Y5FsD7PPV5DjgmZUuYshWvKdcOdSXW/F+RmC19bTa+oFFehrC5/rnFls4SID3pao2A5tHvZ9xSzJbVdMrXccwrbaeVls/sPp6Wm39wHh7GuRLzCeBN82bP6FbJkkag0EC/C7g5CQnJjkCuBS4aThlSZKWsuxTKFW1N8mHgVuBw4DPVtVDQ6vs1W/VnRZi9fW02vqB1dfTausHxthTqmpc+5IkDZFXYkpSowxwSWqUAb6IpW4RkOSnklzXrb8zydS8dW9P8vUkDyV5IMmR46x9McvtJ8nhSWa6PrYn+fi4az+QPnp6T5JvJtmb5OIF6zYkebR7bBhf1Qe23H6SnD7v83Z/kt8eb+UHNsh71K1/fZJdST49nooPbsDP3JuT3Nb9d/Tw/MwYSFX5mPeg94XsY8BJwBHAfcCpC8Z8CPinbvpS4LpuegK4Hzitm38jcFjD/bwfuLabPgrYCUw18h5NAW8HrgEunrf8WODx7nlNN72m4X5OAU7upn8W2A0c0/J7NG/9VcAXgE+33g9wB3BuN/064Khh1OUR+P5+fIuAqvoBsO8WAfNdCMx001uA9UkCnAfcX1X3AVTVs1X18pjqPpBB+ing6CQTwGuBHwDfG0/ZB7VkT1W1s6ruB360YNtfA7ZW1XNV9TywFTh/HEUfxLL7qapvV9Wj3fR/A3uAyfGUfVCDvEck+QVgLXDbOIrtw7L76e4RNVFVW7txL1bV/w6jKAN8f4vdIuD4A42pqr3A/9A72j4FqCS3dn9KfWwM9S5lkH62AC/RO6r7DvDJqnpu1AX3oZ+eRrHtqAylpiRn0js6fGxIdQ1i2T0leQ3wt8CfjKCu5RrkPToFeCHJl5Lck+RvupsBDswAH64J4N3AZd3zbyVZv7IlDeRM4GV6f5qfCHw0yUkrW5IWk2Qd8C/AB6tqvyPaxnwI+EpV7VrpQoZkAvglev8gvZPeaZgPDOOFDfD99XOLgB+P6U4vvAF4lt6/yl+rqme6P5G+Arxj5BUf3CD9vB+4pap+WFV7gP8EXg33rRjkNg6vxltADFRTktcDNwOfqKr/GnJtyzVIT2cDH06yE/gk8HtJrhhueYdskH52Afd2p1/2Av/OkHLBAN9fP7cIuAnY9+uFi4GvVu/biVuBn09yVBeEv8zK3153kH6+A5wDkORo4CzgkbFUfXCD3MbhVuC8JGuSrKH3vcWtI6qzX8vupxt/A3BNVW0ZYY2Hatk9VdVlVfXmqpqid9R6TVWt9P8wZpDP3F3AMUn2fTdxDsPKhZX+dvfV+AB+A/g2vXOJn+iW/SXw3m76SOCLwA7gG8BJ87b9HeAh4EHgr1e6l0H6ofdt+Re7fh4G/nSlezmEnt5J78jnJXp/TTw0b9vf73rdQe+UQ7P9dJ+3HwL3znucvtL9DPoezXuND/Aq+BXKED5z59L7hdoDwOeAI4ZRk5fSS1KjPIUiSY0ywCWpUQa4JDXKAJekRhngktQoA1ySGmWAS1Kj/h9VXn6+L6tiOAAAAABJRU5ErkJggg==\n" 908 | }, 909 | "metadata": { 910 | "needs_background": "light" 911 | } 912 | } 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": 13, 918 | "metadata": { 919 | "id": "kQ7s8zWV7IgR" 920 | }, 921 | "outputs": [], 922 | "source": [ 923 | "mu0,mu1= mu_k_fold_fit_and_predict(make_mu_model, X=block.to_frame(), Z=instrument, y=outcome, n_splits=10, output_type=\"binary\")" 924 | ] 925 | }, 926 | { 927 | "cell_type": "code", 928 | "execution_count": 14, 929 | "metadata": { 930 | "colab": { 931 | "base_uri": "https://localhost:8080/", 932 | "height": 206 933 | }, 934 | "id": "_NVCV0q0g8wQ", 935 | "outputId": "d0a90b3d-966c-4830-81c8-f73c71262552" 936 | }, 937 | "outputs": [ 938 | { 939 | "output_type": "execute_result", 940 | "data": { 941 | "text/plain": [ 942 | " p mu0 mu1 m1 m0 Z A Y\n", 943 | "0 0.147581 0.833333 0.843333 0.668028 0.513991 0 0 0\n", 944 | "1 0.143757 0.826667 0.830000 0.667640 0.513672 0 0 1\n", 945 | "2 0.143383 0.823333 0.836667 0.668948 0.512616 0 0 1\n", 946 | "3 0.143757 0.826667 0.836667 0.667598 0.514286 0 0 1\n", 947 | "4 0.143383 0.826667 0.830000 0.667870 0.513021 0 0 0" 948 | ], 949 | "text/html": [ 950 | "\n", 951 | "
\n", 952 | "
\n", 953 | "
\n", 954 | "\n", 967 | "\n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | "
pmu0mu1m1m0ZAY
00.1475810.8333330.8433330.6680280.513991000
10.1437570.8266670.8300000.6676400.513672001
20.1433830.8233330.8366670.6689480.512616001
30.1437570.8266670.8366670.6675980.514286001
40.1433830.8266670.8300000.6678700.513021000
\n", 1039 | "
\n", 1040 | " \n", 1050 | " \n", 1051 | " \n", 1088 | "\n", 1089 | " \n", 1113 | "
\n", 1114 | "
\n", 1115 | " " 1116 | ] 1117 | }, 1118 | "metadata": {}, 1119 | "execution_count": 14 1120 | } 1121 | ], 1122 | "source": [ 1123 | "data_and_nuisance_estimates = pd.DataFrame({'p': p, 'mu0': mu0, 'mu1': mu1, 'm1': m1, 'm0': m0,\n", 1124 | " 'Z': instrument, 'A': treatment, 'Y': outcome})\n", 1125 | "data_and_nuisance_estimates.head()" 1126 | ] 1127 | }, 1128 | { 1129 | "cell_type": "markdown", 1130 | "metadata": { 1131 | "id": "VNhM7URdgzQB" 1132 | }, 1133 | "source": [ 1134 | "## Combine predicted values and data into estimate of LATE" 1135 | ] 1136 | }, 1137 | { 1138 | "cell_type": "code", 1139 | "execution_count": 15, 1140 | "metadata": { 1141 | "id": "Nj0veiaW4RRm" 1142 | }, 1143 | "outputs": [], 1144 | "source": [ 1145 | "def late_estimator(mu1, mu0, m1, m0, p, Z, A, Y, prob = None):\n", 1146 | " '''\n", 1147 | " Estimator for LATE\n", 1148 | " '''\n", 1149 | " n = len(Y)\n", 1150 | " phi_zy = mu1 - mu0 + Z*(Y-mu1)/p - (1-Z)*(Y-mu0)/(1-p)\n", 1151 | " phi_za = m1 - m0 + Z*(A-m1)/p - (1-Z)*(A-m0)/(1-p)\n", 1152 | "\n", 1153 | " tau_za = phi_za.mean()\n", 1154 | " tau_hat = phi_zy.mean()/tau_za\n", 1155 | " phi = phi_zy - phi_za * tau_hat\n", 1156 | " \n", 1157 | " std_hat = math.sqrt((phi**2).mean()/tau_za**2/n)\n", 1158 | "\n", 1159 | " return tau_hat, std_hat\n" 1160 | ] 1161 | }, 1162 | { 1163 | "cell_type": "code", 1164 | "execution_count": 16, 1165 | "metadata": { 1166 | "colab": { 1167 | "base_uri": "https://localhost:8080/" 1168 | }, 1169 | "id": "SjDj0F9Bm9uq", 1170 | "outputId": "262886b4-da24-4b7c-eb75-c1a609e70e6c" 1171 | }, 1172 | "outputs": [ 1173 | { 1174 | "output_type": "stream", 1175 | "name": "stdout", 1176 | "text": [ 1177 | "The estimate is 0.047826812983712996 pm 0.07239241460155232\n" 1178 | ] 1179 | } 1180 | ], 1181 | "source": [ 1182 | "tau_hat, std_hat = late_estimator(**data_and_nuisance_estimates)\n", 1183 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 1184 | ] 1185 | } 1186 | ], 1187 | "metadata": { 1188 | "colab": { 1189 | "provenance": [], 1190 | "authorship_tag": "ABX9TyMBLYRZL7Nk//toT2OnEWO8", 1191 | "include_colab_link": true 1192 | }, 1193 | "kernelspec": { 1194 | "display_name": "Python 3", 1195 | "name": "python3" 1196 | }, 1197 | "language_info": { 1198 | "name": "python" 1199 | } 1200 | }, 1201 | "nbformat": 4, 1202 | "nbformat_minor": 0 1203 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Victor Veitch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # causality-tutorials 2 | Short tutorials on the use of machine learning methods for causal inference 3 | -------------------------------------------------------------------------------- /Sensitivity_Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Sensitivity_Analysis.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "language_info": { 16 | "name": "python" 17 | } 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "id": "view-in-github", 24 | "colab_type": "text" 25 | }, 26 | "source": [ 27 | "
\"Open" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "QfZkNLUb4B-p" 34 | }, 35 | "source": [ 36 | "# Sensitivity Analysis Tutorial\n", 37 | "\n", 38 | "This tutorial gives a short example for how to assess sensitivity to unobserved confounding in causal estimation. We use the Austen plot method (https://arxiv.org/abs/2003.01747). " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "metadata": { 44 | "colab": { 45 | "base_uri": "https://localhost:8080/" 46 | }, 47 | "id": "1RrdIBTLQ9Ac", 48 | "outputId": "62c039df-51d7-4f31-a150-4b6a0e0f0b26" 49 | }, 50 | "source": [ 51 | "!pip install austen-plots" 52 | ], 53 | "execution_count": 218, 54 | "outputs": [ 55 | { 56 | "output_type": "stream", 57 | "name": "stdout", 58 | "text": [ 59 | "Requirement already satisfied: austen-plots in /usr/local/lib/python3.7/dist-packages (0.1.0)\n", 60 | "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.19.5)\n", 61 | "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.1.5)\n", 62 | "Requirement already satisfied: plotnine in /usr/local/lib/python3.7/dist-packages (from austen-plots) (0.6.0)\n", 63 | "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.0.1)\n", 64 | "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from austen-plots) (1.4.1)\n", 65 | "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from austen-plots) (4.62.3)\n", 66 | "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2.8.2)\n", 67 | "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->austen-plots) (2018.9)\n", 68 | "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->austen-plots) (1.15.0)\n", 69 | "Requirement already satisfied: statsmodels>=0.9.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.10.2)\n", 70 | "Requirement already satisfied: descartes>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (1.1.0)\n", 71 | "Requirement already satisfied: patsy>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.5.2)\n", 72 | "Requirement already satisfied: matplotlib>=3.1.1 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (3.2.2)\n", 73 | "Requirement already satisfied: mizani>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from plotnine->austen-plots) (0.6.0)\n", 74 | "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (3.0.6)\n", 75 | "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (1.3.2)\n", 76 | "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.1.1->plotnine->austen-plots) (0.11.0)\n", 77 | "Requirement already satisfied: palettable in /usr/local/lib/python3.7/dist-packages (from mizani>=0.6.0->plotnine->austen-plots) (3.3.0)\n", 78 | "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (3.0.0)\n", 79 | "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->austen-plots) (1.1.0)\n" 80 | ] 81 | } 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "dS2X3Bq1-fxE" 88 | }, 89 | "source": [ 90 | "import numpy as np\n", 91 | "import pandas as pd\n", 92 | "import scipy as sp\n", 93 | "from sklearn import preprocessing\n", 94 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingRegressor, GradientBoostingClassifier\n", 95 | "from sklearn.linear_model import LogisticRegression\n", 96 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n", 97 | "from sklearn.metrics import mean_squared_error, log_loss\n", 98 | "import sklearn\n", 99 | "import os\n", 100 | "import pathlib\n", 101 | "\n", 102 | "from austen_plots.AustenPlot import AustenPlot" 103 | ], 104 | "execution_count": 219, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "id": "zNsGKVyLSRxn" 111 | }, 112 | "source": [ 113 | "RANDOM_SEED = 42\n", 114 | "np.random.seed(RANDOM_SEED)" 115 | ], 116 | "execution_count": 220, 117 | "outputs": [] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": { 122 | "id": "H0YG8hR8RRAP" 123 | }, 124 | "source": [ 125 | "# Data Loading and Initial Fit\n", 126 | "\n", 127 | "Load the diastolic blood pressure data and fit models for the propensity score and conditional expected outcome model, in the same way we'd do in a standard adjustment-based treatment effect estimation. For this tutorial, we'll use random forests for both models. \n", 128 | "\n", 129 | "This section doesn't contain anything special to sensitivity analysis, and can be safely skipped if you've already read the adjustment estimation tutorial." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "id": "yPbJeayiEs3u" 136 | }, 137 | "source": [ 138 | "##Load and Format Observational Data" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "metadata": { 144 | "id": "2AC9TPko-hbt" 145 | }, 146 | "source": [ 147 | "nhanes = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/hbp_dbp.csv')" 148 | ], 149 | "execution_count": 221, 150 | "outputs": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "metadata": { 155 | "colab": { 156 | "base_uri": "https://localhost:8080/", 157 | "height": 223 158 | }, 159 | "id": "-A1LX6-t-hZD", 160 | "outputId": "2f9def4f-1348-48d2-d518-b5b5235b6e42" 161 | }, 162 | "source": [ 163 | "nhanes.head()" 164 | ], 165 | "execution_count": 222, 166 | "outputs": [ 167 | { 168 | "output_type": "execute_result", 169 | "data": { 170 | "text/html": [ 171 | "
\n", 172 | "\n", 185 | "\n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | "
whiteblackhispfemaleage_mohhsizeedumarriedwidoweddivorcedseparatedincomepackyrbmipulsesodiumpotassiumr_sodipotaalcoholinsurancetogetherave_dbptrt_dbp
0001026141200002.2512920.025.50000080521643501.1990800024600
1100042821710003.8815640.029.40000072266823871.11772101104940
2100199511201001.5040770.019.10000064284937750.75470201156701
300105314710002.6741493650.044.40000292343327161.2639910052840
400105817010002.602690730.037.50000068180818830.96017011710800
\n", 347 | "
" 348 | ], 349 | "text/plain": [ 350 | " white black hisp female ... insurance together ave_dbp trt_dbp\n", 351 | "0 0 0 1 0 ... 0 24 60 0\n", 352 | "1 1 0 0 0 ... 1 104 94 0\n", 353 | "2 1 0 0 1 ... 1 156 70 1\n", 354 | "3 0 0 1 0 ... 0 52 84 0\n", 355 | "4 0 0 1 0 ... 1 0 80 0\n", 356 | "\n", 357 | "[5 rows x 23 columns]" 358 | ] 359 | }, 360 | "metadata": {}, 361 | "execution_count": 222 362 | } 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "metadata": { 368 | "id": "fW3FdK8rJNHM" 369 | }, 370 | "source": [ 371 | "# scale continuous covariates\n", 372 | "cont_vars = ['age_mo', 'hhsize', 'edu', 'income', 'packyr', 'bmi',\n", 373 | " 'pulse', 'sodium', 'potassium', 'r_sodipota', 'alcohol', 'together']\n", 374 | "nhanes[cont_vars] = preprocessing.scale(nhanes[cont_vars])\n" 375 | ], 376 | "execution_count": 223, 377 | "outputs": [] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "metadata": { 382 | "id": "APOqpHmrOGzo" 383 | }, 384 | "source": [ 385 | "confounders = nhanes.drop(columns=['trt_dbp', 'ave_dbp'])\n", 386 | "outcome = nhanes['ave_dbp']\n", 387 | "treatment = nhanes['trt_dbp']" 388 | ], 389 | "execution_count": 224, 390 | "outputs": [] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": { 395 | "id": "C576dWRsa3ad" 396 | }, 397 | "source": [ 398 | "## Specify Nuisance Function Models\n", 399 | "\n", 400 | "The next step is to specify models for the conditional expected outcome and propensity score" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "metadata": { 406 | "colab": { 407 | "base_uri": "https://localhost:8080/" 408 | }, 409 | "id": "qyOhSZRQRb8W", 410 | "outputId": "905a03a6-e8c0-4572-e3e0-6f195b6409ad" 411 | }, 412 | "source": [ 413 | "# specify a model for the conditional expected outcome\n", 414 | "\n", 415 | "# make a function that returns a sklearn model for later use in k-folding\n", 416 | "def make_Q_model():\n", 417 | "# return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=None)\n", 418 | " return GradientBoostingRegressor(random_state=RANDOM_SEED, n_estimators=200, max_depth=3)\n", 419 | "Q_model = make_Q_model()\n", 420 | "\n", 421 | "# Sanity check that chosen model actually improves test error\n", 422 | "# A real analysis should give substantial attention to model selection and validation \n", 423 | "\n", 424 | "X_w_treatment = confounders.copy()\n", 425 | "X_w_treatment[\"treatment\"] = treatment\n", 426 | "\n", 427 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n", 428 | "Q_model.fit(X_train, y_train)\n", 429 | "y_pred = Q_model.predict(X_test)\n", 430 | "\n", 431 | "test_mse=mean_squared_error(y_pred, y_test)\n", 432 | "print(f\"Test MSE of fit model {test_mse}\") \n", 433 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n", 434 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")" 435 | ], 436 | "execution_count": 225, 437 | "outputs": [ 438 | { 439 | "output_type": "stream", 440 | "name": "stdout", 441 | "text": [ 442 | "Test MSE of fit model 188.37465077057507\n", 443 | "Test MSE of no-covariate model 196.715556166321\n" 444 | ] 445 | } 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "metadata": { 451 | "colab": { 452 | "base_uri": "https://localhost:8080/" 453 | }, 454 | "id": "uq6eZEBXbsaI", 455 | "outputId": "2a544732-9c9c-4ec6-be9c-8ef28610365c" 456 | }, 457 | "source": [ 458 | "# specify a model for the propensity score\n", 459 | "\n", 460 | "def make_g_model():\n", 461 | " return LogisticRegression(max_iter=1000)\n", 462 | " # return RandomForestClassifier(n_estimators=100, max_depth=5)\n", 463 | " # return GradientBoostingClassifier(n_estimators=200, max_depth=3)\n", 464 | "\n", 465 | "g_model = make_g_model()\n", 466 | "# Sanity check that chosen model actually improves test error\n", 467 | "# A real analysis should give substantial attention to model selection and validation \n", 468 | "\n", 469 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2, stratify=treatment)\n", 470 | "g_model.fit(X_train, a_train)\n", 471 | "a_pred = g_model.predict_proba(X_test)[:,1]\n", 472 | "\n", 473 | "test_ce=log_loss(a_test, a_pred)\n", 474 | "print(f\"Test CE of fit model {test_ce}\") \n", 475 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n", 476 | "print(f\"Test CE of no-covariate model {baseline_ce}\")" 477 | ], 478 | "execution_count": 226, 479 | "outputs": [ 480 | { 481 | "output_type": "stream", 482 | "name": "stdout", 483 | "text": [ 484 | "Test CE of fit model 0.4844169173325631\n", 485 | "Test CE of no-covariate model 0.6785695199678788\n" 486 | ] 487 | } 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": { 493 | "id": "2RkvV_4_dFWo" 494 | }, 495 | "source": [ 496 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "metadata": { 502 | "id": "KA0AsEGJ_X3b" 503 | }, 504 | "source": [ 505 | "# helper functions to implement the cross fitting\n", 506 | "\n", 507 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n", 508 | " \"\"\"\n", 509 | " Implements K fold cross-fitting for the model predicting the treatment A. \n", 510 | " That is, \n", 511 | " 1. Split data into K folds\n", 512 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 513 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 514 | " Returns an array containing the predictions \n", 515 | "\n", 516 | " Args:\n", 517 | " model: function that returns sklearn model (which implements fit and predict_prob)\n", 518 | " X: dataframe of variables to adjust for\n", 519 | " A: array of treatments\n", 520 | " n_splits: number of splits to use\n", 521 | " \"\"\"\n", 522 | " predictions = np.full_like(A, np.nan, dtype=float)\n", 523 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 524 | " \n", 525 | " for train_index, test_index in kf.split(X, A):\n", 526 | " X_train = X.loc[train_index]\n", 527 | " A_train = A.loc[train_index]\n", 528 | " g = make_model()\n", 529 | " g.fit(X_train, A_train)\n", 530 | "\n", 531 | " # get predictions for split\n", 532 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n", 533 | "\n", 534 | " assert np.isnan(predictions).sum() == 0\n", 535 | " return predictions\n", 536 | "\n", 537 | "\n", 538 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n", 539 | " \"\"\"\n", 540 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n", 541 | " That is, \n", 542 | " 1. Split data into K folds\n", 543 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 544 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 545 | " Returns two arrays containing the predictions for all units untreated, all units treated \n", 546 | "\n", 547 | " Args:\n", 548 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n", 549 | " X: dataframe of variables to adjust for\n", 550 | " y: array of outcomes\n", 551 | " A: array of treatments\n", 552 | " n_splits: number of splits to use\n", 553 | " output_type: type of outcome, \"binary\" or \"continuous\"\n", 554 | "\n", 555 | " \"\"\"\n", 556 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n", 557 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n", 558 | " if output_type == 'binary':\n", 559 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 560 | " elif output_type == 'continuous':\n", 561 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 562 | "\n", 563 | " # include the treatment as input feature\n", 564 | " X_w_treatment = X.copy()\n", 565 | " X_w_treatment[\"A\"] = A\n", 566 | "\n", 567 | " # for predicting effect under treatment / control status for each data point \n", 568 | " X0 = X_w_treatment.copy()\n", 569 | " X0[\"A\"] = 0\n", 570 | " X1 = X_w_treatment.copy()\n", 571 | " X1[\"A\"] = 1\n", 572 | "\n", 573 | " \n", 574 | " for train_index, test_index in kf.split(X_w_treatment, y):\n", 575 | " X_train = X_w_treatment.loc[train_index]\n", 576 | " y_train = y.loc[train_index]\n", 577 | " q = make_model()\n", 578 | " q.fit(X_train, y_train)\n", 579 | "\n", 580 | " if output_type =='binary':\n", 581 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n", 582 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n", 583 | " elif output_type == 'continuous':\n", 584 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n", 585 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n", 586 | "\n", 587 | " assert np.isnan(predictions0).sum() == 0\n", 588 | " assert np.isnan(predictions1).sum() == 0\n", 589 | " return predictions0, predictions1" 590 | ], 591 | "execution_count": 227, 592 | "outputs": [] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "metadata": { 597 | "id": "wVcE6pRQeMNf" 598 | }, 599 | "source": [ 600 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)" 601 | ], 602 | "execution_count": 228, 603 | "outputs": [] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "metadata": { 608 | "id": "GLEHlLLdWSh9" 609 | }, 610 | "source": [ 611 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")" 612 | ], 613 | "execution_count": 229, 614 | "outputs": [] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "metadata": { 619 | "colab": { 620 | "base_uri": "https://localhost:8080/", 621 | "height": 203 622 | }, 623 | "id": "_NVCV0q0g8wQ", 624 | "outputId": "9f4fb865-3b5f-4e46-f7a3-c343b1924e9e" 625 | }, 626 | "source": [ 627 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n", 628 | "data_and_nuisance_estimates.head()" 629 | ], 630 | "execution_count": 230, 631 | "outputs": [ 632 | { 633 | "output_type": "execute_result", 634 | "data": { 635 | "text/html": [ 636 | "
\n", 637 | "\n", 650 | "\n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | "
gQ0Q1AY
00.00912269.92841072.999765060
10.15391084.86765490.188475094
20.77799873.34162967.400933170
30.11343983.49875982.388082084
40.22187487.98743086.811025080
\n", 704 | "
" 705 | ], 706 | "text/plain": [ 707 | " g Q0 Q1 A Y\n", 708 | "0 0.009122 69.928410 72.999765 0 60\n", 709 | "1 0.153910 84.867654 90.188475 0 94\n", 710 | "2 0.777998 73.341629 67.400933 1 70\n", 711 | "3 0.113439 83.498759 82.388082 0 84\n", 712 | "4 0.221874 87.987430 86.811025 0 80" 713 | ] 714 | }, 715 | "metadata": {}, 716 | "execution_count": 230 717 | } 718 | ] 719 | }, 720 | { 721 | "cell_type": "markdown", 722 | "metadata": { 723 | "id": "VNhM7URdgzQB" 724 | }, 725 | "source": [ 726 | "## Combine predicted values and data into estimate of ATE" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "metadata": { 732 | "id": "O_F5r0SSkzzK" 733 | }, 734 | "source": [ 735 | "def ate_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n", 736 | " \"\"\"\n", 737 | " # Double ML estimator for the ATE\n", 738 | " \"\"\"\n", 739 | "\n", 740 | " tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()\n", 741 | " \n", 742 | " scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat\n", 743 | " n = Y.shape[0] # number of observations\n", 744 | " std_hat = np.std(scores) / np.sqrt(n)\n", 745 | "\n", 746 | " return tau_hat, std_hat\n" 747 | ], 748 | "execution_count": 231, 749 | "outputs": [] 750 | }, 751 | { 752 | "cell_type": "code", 753 | "metadata": { 754 | "colab": { 755 | "base_uri": "https://localhost:8080/" 756 | }, 757 | "id": "SjDj0F9Bm9uq", 758 | "outputId": "e3a2f168-1bf8-47fd-f046-dd05390bf0d5" 759 | }, 760 | "source": [ 761 | "tau_hat, std_hat = ate_aiptw(**data_and_nuisance_estimates)\n", 762 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 763 | ], 764 | "execution_count": 232, 765 | "outputs": [ 766 | { 767 | "output_type": "stream", 768 | "name": "stdout", 769 | "text": [ 770 | "The estimate is -2.6682535341413107 pm 1.52254875417939\n" 771 | ] 772 | } 773 | ] 774 | }, 775 | { 776 | "cell_type": "markdown", 777 | "metadata": { 778 | "id": "L_GUa-5vMmL4" 779 | }, 780 | "source": [ 781 | "#Sensitivity Analysis\n", 782 | "\n", 783 | "We found an average treatment effect of diastolic blood pressure medication of about 2, significant at the 0.95 level. We'd now conduct some analysis to decide how sensitive this conclusions is to possible unobserved confounding." 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "metadata": { 789 | "id": "zEv_RlkUNQZ9" 790 | }, 791 | "source": [ 792 | "# the first step is to choose a level of bias that would undermine the qualitative conclusion of the study\n", 793 | "# we'll go with the nominal effect\n", 794 | "target_bias = 2.00 # note: bias is specified as an absolute number" 795 | ], 796 | "execution_count": 233, 797 | "outputs": [] 798 | }, 799 | { 800 | "cell_type": "markdown", 801 | "metadata": { 802 | "id": "bDeSqyvvN3mg" 803 | }, 804 | "source": [ 805 | "## Compute influence strength of covariates\n", 806 | "Our task is to assess whether it's plausible that an unobserved confounder could be strong enough to induce a bias of 2 or more. To make that easier, we'd like to know how strong the observed confounders are. Austen plots computes these reference strengths by seeing how much model performance degrades when the covariates are removed. Accordingly, we refit the models with each (group of) reference covariate removed. " 807 | ] 808 | }, 809 | { 810 | "cell_type": "code", 811 | "metadata": { 812 | "id": "b_51ei-JOIrn" 813 | }, 814 | "source": [ 815 | "# First, lets define the groups of covariates we'll measure the strength of.\n", 816 | "# Note: it's important to group the covariates into meaningful groups, because strength is measured conditional on all remaining covariates\n", 817 | "# E.g., if we remove only \"black\" (and not \"hispanic, white\") then we will measure no effect, because we can infer the removed variable from the remaining ones\n", 818 | "\n", 819 | "covariate_groups = {\n", 820 | " 'socioeconomic': ['white', 'black', 'hisp' , 'hhsize', 'edu',\n", 821 | " 'married', 'widowed', 'divorced', 'separated', 'income', 'packyr', 'alcohol',\n", 822 | " 'insurance', 'together'],\n", 823 | " 'sex': ['female'],\n", 824 | " 'age': ['age_mo'],\n", 825 | " 'health': ['bmi', 'pulse', 'sodium', 'potassium', 'r_sodipota']}" 826 | ], 827 | "execution_count": 234, 828 | "outputs": [] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "metadata": { 833 | "id": "eLskfBqCQlwZ" 834 | }, 835 | "source": [ 836 | "# For each covariate group, refit the models without using that group\n", 837 | "nuisance_estimates = {}\n", 838 | "for group, covs in covariate_groups.items():\n", 839 | " remaining_confounders = confounders.drop(columns=covs)\n", 840 | "\n", 841 | " g = treatment_k_fold_fit_and_predict(make_g_model, X=remaining_confounders, A=treatment, n_splits=5)\n", 842 | " Q0, Q1 = outcome_k_fold_fit_and_predict(make_Q_model, X=remaining_confounders, y=outcome, A=treatment, n_splits=5, output_type=\"continuous\")\n", 843 | "\n", 844 | " data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n", 845 | " nuisance_estimates[group] = data_and_nuisance_estimates\n" 846 | ], 847 | "execution_count": 235, 848 | "outputs": [] 849 | }, 850 | { 851 | "cell_type": "markdown", 852 | "metadata": { 853 | "id": "73aTyanbTihM" 854 | }, 855 | "source": [ 856 | "## Save computed estimates as CSVs\n", 857 | "The Austen plot code expects the nuisance function estimates to be provided as csvs with columns 'g', 'Q', 't', 'y'" 858 | ] 859 | }, 860 | { 861 | "cell_type": "code", 862 | "metadata": { 863 | "id": "Am4bdBMGXhqr" 864 | }, 865 | "source": [ 866 | "data_and_nuisance_path = 'data_and_nuisance_estimates.csv'\n", 867 | "covariate_dir_path = 'covariates/'" 868 | ], 869 | "execution_count": 236, 870 | "outputs": [] 871 | }, 872 | { 873 | "cell_type": "code", 874 | "metadata": { 875 | "id": "YJ-QU3gXSqiz" 876 | }, 877 | "source": [ 878 | "def _convert_to_austen_format(nuisance_estimate_df: pd.DataFrame):\n", 879 | " austen_df = pd.DataFrame()\n", 880 | " austen_df['y']=nuisance_estimate_df['Y']\n", 881 | " austen_df['t']=nuisance_estimate_df['A']\n", 882 | " austen_df['g']=nuisance_estimate_df['g']\n", 883 | " A = nuisance_estimate_df['A']\n", 884 | " austen_df['Q']=A*nuisance_estimate_df['Q1'] + (1-A)*nuisance_estimate_df['Q0'] # use Q1 when A=1, and Q0 when A=0\n", 885 | "\n", 886 | " return austen_df" 887 | ], 888 | "execution_count": 237, 889 | "outputs": [] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "metadata": { 894 | "id": "31BWqKXmVAQr" 895 | }, 896 | "source": [ 897 | "austen_data_and_nuisance = _convert_to_austen_format(data_and_nuisance_estimates)\n", 898 | "austen_data_and_nuisance.to_csv(data_and_nuisance_path, index=False)\n", 899 | "\n", 900 | "pathlib.Path(covariate_dir_path).mkdir(exist_ok=True)\n", 901 | "for group, nuisance_estimate in nuisance_estimates.items():\n", 902 | " austen_nuisance_estimate = _convert_to_austen_format(nuisance_estimate)\n", 903 | " austen_nuisance_estimate.to_csv(os.path.join(covariate_dir_path,group+\".csv\"), index=False)" 904 | ], 905 | "execution_count": 238, 906 | "outputs": [] 907 | }, 908 | { 909 | "cell_type": "markdown", 910 | "metadata": { 911 | "id": "C84zSBeIVe0L" 912 | }, 913 | "source": [ 914 | "## Make plots" 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "metadata": { 920 | "id": "2C0cixtvVhmD" 921 | }, 922 | "source": [ 923 | "ap = AustenPlot(data_and_nuisance_path, covariate_dir_path)" 924 | ], 925 | "execution_count": 239, 926 | "outputs": [] 927 | }, 928 | { 929 | "cell_type": "code", 930 | "metadata": { 931 | "colab": { 932 | "base_uri": "https://localhost:8080/" 933 | }, 934 | "id": "JFgN5L6YW0oF", 935 | "outputId": "9bb60caf-daef-4ba3-8ab2-8a4f6d3b43eb" 936 | }, 937 | "source": [ 938 | "p, plot_coords, variable_coords = ap.fit(bias=target_bias) # recall we set target_bias=2.0" 939 | ], 940 | "execution_count": 240, 941 | "outputs": [ 942 | { 943 | "output_type": "stream", 944 | "name": "stdout", 945 | "text": [ 946 | "Fitting main dataset\n" 947 | ] 948 | } 949 | ] 950 | }, 951 | { 952 | "cell_type": "code", 953 | "metadata": { 954 | "colab": { 955 | "base_uri": "https://localhost:8080/", 956 | "height": 396 957 | }, 958 | "id": "lfPaV4IDckdS", 959 | "outputId": "56a1085c-cc7d-45e7-d61b-832125c05857" 960 | }, 961 | "source": [ 962 | "p" 963 | ], 964 | "execution_count": 241, 965 | "outputs": [ 966 | { 967 | "output_type": "display_data", 968 | "data": { 969 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAikAAAFqCAYAAADft8pBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVhV1f4/8PcGZZZ5EEgFQUHERCC8OCKpgEMO5Zii+FVMMiPLORUnFKfKuUKcS8vZSjMVblpKCloaRFgMiSMIB5kR9u8Pfpzr6YAC+yAHeb+eh+dy9l57rc9ZN8/5sNbaawuiKIogIiIiUjMaDR0AERERUVWYpBAREZFaYpJCREREaolJChEREaklJilERESklpikEBERkVpikkJERERqiUkKERERqSUmKURERKSWmKTQC0MQBISFhTV0GEREpCJMUkht7dy5E4IgKPxYWlqiT58+OHnyZEOHp1L//PMPlixZAi8vL5iYmMDc3Bw+Pj44c+ZMjesoLy/H6tWrYW9vDx0dHbz88sv48ssv6zFqIqL61ayhAyB6lqVLl8Le3h6iKOLevXvYuXMnBgwYgBMnTmDQoEHycoWFhWjWrHH+J33s2DFERERg6NChmDBhAh4/fozdu3ejX79+iIqKQlBQ0DPrWLBgAVatWoUpU6bglVdewbFjxzB27FgIgoDRo0c/h3dBRKRaAh8wSOpq586dCAoKwuXLl+Hp6Sk/np2dDSsrK4wYMQL79u1rwAhV5/fff4eVlRXMzc3lx4qLi+Hm5oa8vDz8888/T70+IyMD9vb2CA4OxqZNmwAAoiiid+/eSElJQWpqKjQ1Nev1PRARqRqne6jRMTY2hq6urtKoyb/XpKSlpSEkJAROTk7Q1dWFmZkZRowYgdTUVIXrSktLsWTJErRr1w46OjowMzNDjx498MMPPzyHd1OhY8eOCgkKAGhra2PAgAG4desWHj169NTrjx07htLSUoSEhMiPCYKAadOm4datW7h48WK9xE1EVJ8a59g4NSkymQyZmZkQRRH379/Hxo0bkZeXh3Hjxj31usuXL+Pnn3/G6NGj8dJLLyE1NRVbt26Fj48PEhISoKenBwAICwvDypUrMXnyZHh5eSE3NxdXrlxBfHw8+vXrV2395eXlePjwYY3eg5GREZo3b17zN/3/3b17F3p6evJYq3P16lXo6+ujQ4cOCse9vLzk53v06FHr9omIGhKTFFJ7ffv2VXitra2NqKiopyYQADBw4EC88cYbCscGDx4Mb29vHDp0COPHjwcAfPvttxgwYAA+++yzWsWVnp4Oe3v7GpWNjo6Gj49Preq/efMmDh8+jBEjRjxzqubOnTuwsrKCIAgKx62trQEAt2/frlXbRETqgEkKqb3Nmzejffv2AIB79+5h7969mDx5Mlq0aIHhw4dXe52urq7899LSUuTm5sLR0RHGxsaIj4+XJynGxsb4/fffkZycjHbt2tU4rpYtW9Z4Sqhz5841rhcACgoKMGLECOjq6mLVqlXPLF9YWAhtbW2l4zo6OvLzRESNDZMUUnteXl4KC2fHjBmDLl26YPr06Rg0aBC0tLSqvK6wsBArV67Ejh07kJGRgSfXiMtkMvnvS5cuxZAhQ9C+fXu4urrC398f48ePx8svv/zUuHR0dJRGeVShrKwMo0ePRkJCAk6ePAkbG5tnXqOrq4vi4mKl40VFRfLzRESNDRfOUqOjoaGBPn364M6dO0hOTq623DvvvIMVK1Zg5MiR+Oqrr3D69Gn88MMPMDMzQ3l5ubxcr1698NdffyEqKgqurq6IjIyEu7s7IiMjnxpHWVkZ7t69W6OfkpKSGr+/KVOm4JtvvsHOnTvh6+tbo2usra1x9+5d/PtmvTt37gBAjRIdIiJ1w5EUapQeP34MAMjLy6u2zMGDBzFhwgSsW7dOfqyoqAg5OTlKZU1NTREUFISgoCDk5eWhV69eCAsLw+TJk6ut/59//lH5mpRZs2Zhx44d+PjjjzFmzJga1Q0Abm5uiIyMRGJiIlxcXOTHY2Nj5eeJiBobJinU6JSWluL06dPQ0tJSupvlSZqamkojCxs3bkRZWZnCsaysLJiZmclfGxgYwNHR8Zl7k6h6TcqaNWuwdu1azJ8/H++++2615WQyGe7cuQNra2sYGRkBAIYMGYL33nsPW7ZsUdgnZdu2bbC1tUW3bt1qFCcRkTphkkJq7+TJk/jjjz8AAPfv38cXX3yB5ORkzJ07F4aGhtVeN2jQIOzZswdGRkZwcXHBxYsXcebMGYWEBABcXFzg4+MDDw8PmJqa4sqVKzh48CCmT5/+1LhUuSblyJEjmD17Ntq1a4cOHTpg7969Cuf79esHKysredmgoCDs2LEDEydOBAC89NJLCA0NxZo1a1BaWopXXnkFR48exfnz57Fv3z5u5EZEjRKTFFJ7ixYtkv+uo6MDZ2dnbN26FVOnTn3qdZ988gk0NTWxb98+FBUVoXv37jhz5gz8/PwUys2YMQPHjx/H6dOnUVxcjDZt2mD58uWYNWtWvbyfqvz6668AgOTkZPldR0+Kjo6WJynVWbVqFUxMTPDpp59i586daNeuHfbu3YuxY8fWS8xERPWN2+ITERGRWuLdPURERKSWmKQQERGRWmKSQkRERGqJSQoRERGpJSYpREREpJZUmqQUFRVV+fwQdfbjjz9i8ODBsLGxgSAIOHr0aEOHRERERJCYpMTExOC9996Dl5cXDAwMoK+vDz09PbRo0QJeXl4IDQ1FTEyMikKtH/n5+ejcuTM2b97c0KEQERHRE2q9T0ppaSk+/fRTrF+/HqmpqTA1NYW7uzvatm0LExMTiKKI7OxspKSkID4+Hg8fPkSbNm3w/vvvY+rUqWjevHl9vRfJBEHAkSNHMHTo0IYOhYiIqMmr9Y6zjo6OKCkpwYQJEzBy5Ei4u7s/tXxcXBy+/vprhIeHY+3atUhNTa1rrERERNSE1DpJmT9/PiZOnAhtbe0alffw8ICHhweWLl2KHTt21DpAIiIiapq4Lf4TajPdU1hYiJKSkucQFRG9SLS0tKCrq9vQYRA1CnzAYA1lZGTg9u3bAICysjLk5+fDxMSkyrKffPIJSkpK4OXlhd69ez/PMOUqn3pbVlbWIO0DQLNmzfD48eMGa18d+gBgP1RiP1S4d+8efHx8mKgQ1UCtkpSCggIkJSXB0dERLVq0UDj3008/oXv37ioNTp3Y2trC1tYWACCTyXD+/Hk4OTkp9QNQ8WFcUlICY2NjODg4PO9QAQCVA2SCIDRI+wCQnZ1dbSL3PKhDHwDsh0rsB+DRo0e4ffs2SkpKmKQQ1UCNk5RLly5h8ODB0NLSQnZ2NubPn48PP/xQfj4gIAC5ubn1EmR9ysvLw82bN+WvU1JScO3aNZiamqJ169ZPvbZFixYwMjJSOq6hUXFnt5aWVpXnn4fKv1ibNWu4wbK8vLwGe/+AevQBwH6oxH4gotqq8T4pM2fOxKZNm5CRkYFff/0V33zzDQIDA+V/nTTWpS1XrlxBly5d0KVLFwAV77NLly5YtGhRA0dGRETUtNU4SUlISMCoUaMAAO3atUNMTAwePnyIYcOGNeoFpD4+PhBFUeln586dda6zcji5sSZuRERE6qDGSYqRkREyMjLkr3V0dHD06FHo6urCz88P5eXl9RJgY8QkhYiISLoaJyl9+/ZV2uekWbNm+OKLL+Do6IjCwkKVB0dERERNV41XkG3durXK2wcFQcDnn3+OhQsXqjSwxowjKURERNLVOEnR0tKClpZWteefdSdMU8IkhYiISDpJT0EmIiIiqi9MUuoBR1KIiIikq1OSUlpaigULFsDR0REWFhYYOHAgrl69qlQuKSkJ69evR79+/SQH2pgwSSEiIpKuTlsvhoeHY+XKlWjZsiXatGmD6Oho9O7dGxcuXICRkRG2bNmCQ4cOISUlBaIowtDQUNVxNwpMUoiIiOquTknK3r170a9fP3zzzTdo3rw5bt++jYEDB+K9995DXFwccnNz4enpiTFjxqB///7w9vZWddxqrXJbfCYpREREdVenJCUtLQ1z5sxB8+bNAQA2NjZYtWoVAgIC4Orqiq+++grOzs4qDZSIiIialjqtSXn8+DH09PQUjnXu3BkAMG/evCafoHBNChERkXR1vrsnMzNT4Uu4clTFyspKelSNXEM+Cp6IiOhFUednlr/33nuYN28eOnbsCDc3N9jZ2UEQBJSWlqoyvkaNIylERER1V6ck5dSpU/j111/x22+/4ddff8WuXbvkycmAAQPQpk0bdOrUCa6urvL/dXV1VWng6ozTPURERNLVKUnp378/+vfvL39dWlqKhIQEedLy22+/ITY2FidOnABQ8aVdVlammogbAU73EBERSVfn6Z4nNW/eHJ07d0bnzp0xfvx4+fF79+7h2rVruH79uiqaaXQ4kkJERFR3KklSqmNlZQU/Pz/4+fnVZzNqh9M9RERE0vHZPfWA0z1ERETSMUmpRxxJISIiqjsmKfWA0z1ERETSMUmpB0xSiIiIpGOSUg+YpBAREUlX67t70tPT69RQ69at63QdERERNU21TlIqt7+vraa4mRtHUoiIiOqu1klKVFQUb7F9BiYpRERE0tU6SZk4cWI9hEFERESkqF53nH2R6enpQRRFPH78uNoy5eXlTz1fn9Rhek0QhAZ7/4B69AHAfqjEfuDoKlFtqSRJKSoqwqFDhxAfHw+ZTIby8nKF84IgYPv27apoqsFlZGQgLS3tqR+2nO4hIiKSTnKSkpaWhj59+iA1NRXGxsaQyWQwNTVFTk4OysrKYG5uDgMDA1XEqhZsbW1hYGCA8+fPQxAENGum3IWVSUp155+nhmxfFMUGf/9Aw/YBwH6oxH7gIzOIakvyPimzZs2CTCbDpUuX8Oeff0IURRw4cAB5eXmIiIiArq4uvv/+e1XE2mhwJIWIiEg6yUnKuXPnEBISAi8vL2hoVFQniiK0tbUxa9YsvPrqqwgNDZUcaGPCv5aIiIikk5ykFBQUwM7ODgBgaGgIQRAgk8nk5729vXHhwgWpzTQqlUnKv9fmEBERUc1JTlJat26NW7duAaiY67W1tcWlS5fk5xMSEqCjoyO1mUaFIylERETSSV5B5uvri2PHjmHx4sUAKvZRWblyJbKzs1FeXo49e/YgMDBQcqCNEdekEBER1Z3kJGXu3Lm4fPkyiouLoa2tjfnz5+P27ds4ePAgNDU1MXbsWKxfv14VsTYaXDhLREQkneQkpXXr1goPD9TR0UFkZCQiIyOlVt1ocbqHiIhIOslrUqh6HEkhIiKqu1qPpKSnpwOAfPSk8vWzPDna8qLjdA8REZF0tU5S7OzsIAgCCgsLoaWlJX/9LOrw3AwiIiJqPGqdpERFRUEQBDRv3lzhNf0PR1KIiIikq3WSMnHiRIXXvr6+sLCwgK6ubpXlCwsL8eDBgzoF11gxSSEiIpJO8sJZe3t7HDlypNrzx48fh729vdRmiIiIqImRnKQ8a7SgtLRU/kyfpqLy/XJbfCIiorqr0z4pubm5yMnJkb/Oysqq8i6fnJwc7N+/H9bW1nWPkIiIiJqkOiUpH330EZYuXQqgYv1FaGhotU86FkURy5cvr3uEjRDXpBAREUlXpySlf//+MDAwgCiKmD17NsaMGQN3d3eFMoIgQF9fHx4eHvD09FRJsI0FkxQiIiLp6pSkeHt7w9vbGwCQn5+P119/Ha6urioNjIiIiJo2SStaCwoKcPz4cVy4cEFV8bwQOJJCREQknaQkRU9PDykpKdzM7V+YpBAREUkn+d5gf39/fP/996qIhYiIiEhOcpKycOFC/Pnnnxg/fjwuXLiAjIwMPHz4UOmnKeFIChERkXR1Wjj7pI4dOwIAEhIS8MUXX1Rbrik9YJDTX0RERNJJTlIWLVrEL+VqcCSFiIio7iQnKWFhYSoI48XC6R4iIiLpGtVDdTZv3gw7Ozvo6Oiga9eu+OWXX6ot6+PjA0EQlH4GDhwoLzNx4kSl8/7+/pLjZJJCREQkneSRFAAoKirCoUOHEB8fD5lMpvRgPUEQsH37dkltHDhwADNnzsS2bdvQtWtXfPzxx/Dz80NSUhIsLS2Vyh8+fBglJSXy11lZWejcuTNGjBihUM7f3x87duyQv9bW1pYUJ8AkhYiISBUkJylpaWno06cPUlNTYWxsDJlMBlNTU+Tk5KCsrAzm5uYwMDCQHOj69esxZcoUBAUFAQC2bduGb7/9FlFRUZg7d65SeVNTU4XX+/fvh56enlKSoq2tjZYtW0qO70lco0NERCSd5OmeWbNmQSaT4dKlS/jzzz8hiiIOHDiAvLw8REREQFdXV/I+KiUlJYiLi0Pfvn3/F7iGBvr27YuLFy/WqI7t27dj9OjR0NfXVzgeExMDS0tLODk5Ydq0acjKypIU65M4kkJERFR3kpOUc+fOISQkBF5eXtDQqKhOFEVoa2tj1qxZePXVV6t9QnJNZWZmoqysDFZWVgrHrayscPfu3Wde/8svv+DGjRuYPHmywnF/f3/s3r0bZ8+eRUREBP773/8iICBA8u3SnO4hIiKSTvJ0T0FBAezs7AAAhoaGEAQBMplMft7b2xsffPCB1GYk2b59Ozp16gQvLy+F46NHj5b/3qlTJ7z88stwcHBATEwMXn311ecdJhERET1BcpLSunVr3Lp1q6KyZs1ga2uLS5cuYfjw4QAqNnnT0dGR1Ia5uTk0NTVx7949heP37t175nqS/Px87N+/H0uXLn1mO23btoW5uTlu3ryplKRkZGTg9u3bAIDS0lLo6OggOzsbeXl5SvUUFRUBqJimysjIeGa79aFyFKch18cUFRU12PsH1KMPAPZDJfZDxWcHEdWc5CTF19cXx44dw+LFiwFU3Na7cuVKZGdno7y8HHv27EFgYKCkNrS0tODh4YGzZ89i6NChAIDy8nKcPXsW06dPf+q1X3/9NYqLizFu3LhntnPr1i1kZWXB2tpa6ZytrS1sbW0BADKZDOfPn4eJiQmMjIyUyurp6QH4X9LWEB4/fiyPoaFkZGQ02PsH1KMPAPZDJfYDFEaZiejZJP9rnTt3Li5fvozi4mJoa2tj/vz5uH37Ng4ePAhNTU2MHTsW69evlxzozJkzMWHCBHh6esLLywsff/wx8vPz5Xf7BAYGwtbWFitXrlS4bvv27Rg6dCjMzMwUjufl5WHJkiV4/fXX0bJlS/z111+YPXs2HB0d4efnJzleIiIikkYl0z2tW7eWv9bR0UFkZCQiIyOlVq1g1KhRePDgARYtWoS7d+/Czc0Np06dki+mTU9Ply/crZSUlIQLFy7g9OnTSvVpamrit99+w65du5CTkwMbGxv0798fy5Ytk7xXChfOEhERSdew47+1NH369Gqnd2JiYpSOOTk5VZsoqOLW6OowSSEiIpJOZUnKjRs38N133yE1NRUAYGdnh4CAAHTq1ElVTTQaTFKIiIikk5ykFBcXY+rUqdizZw9EUZRPuZSXl2PevHl48803ERkZCS0tLcnBNhZMUoiIiKSTvJnbnDlzsHv3bkybNg2JiYkoKipCcXExEhMT8dZbb2Hv3r2YPXu2KmIlIiKiJkTySMrevXsxfvx4bNq0SeG4k5MTNm/ejNzcXOzduxcff/yx1KYaDY6kEBERSSd5JKW0tBT/+c9/qj3frVs3+f4ETUVDb5pFRET0IpCcpPj5+T31LplTp06hf//+UptplDiSQkREVHeSp3uWLVuGkSNHYvjw4Xj77bfh6OgIAEhOTsbmzZuRlpaGAwcO4OHDhwrXmZqaSm1abXG6h4iISDrJSUqHDh0AANevX8exY8cUzlV+Sbu4uChdJ/VJw+qM0z1ERETSSU5SFi1axC/lanAkhYiIqO4kJylhYWEqCOPFwukeIiIi6SQvnCVlHFkiIiKSrtZJiouLC3bv3o2SkpIaX1NcXIwdO3ZUuTblRcSRFCIiIulqPd0zceJEzJw5E++++y5ee+019O3bF+7u7rC3t4eenh4AID8/HykpKbhy5QrOnDmDEydOQEtLC7NmzVL5G1BnTFKIiIjqrtZJyuzZszFt2jRs374dO3fuxJ49e+QjB82aVVRXuXmbKIpwdXXFkiVLMGnSJBgaGqowdPXFkRQiIiLp6rRwtkWLFggNDUVoaChSU1Px888/448//kBWVhYAwMzMDM7OzvD29oa9vb1KA24MmKQQERFJJ/nuHjs7O9jZ2akgFCIiIqL/4d099YAjKURERNIxSakHTFKIiIikY5JCREREaolJSj3gSAoREZF0TFLqAZMUIiIi6ZikEBERkVqSfAtypeLiYsTHx+P+/fvo3r07zM3NVVV1o8ORFCIiIulUMpKyYcMGWFtbo0ePHhg+fDh+++03AEBmZibMzc0RFRWlimYaDSYpRERE0klOUnbs2IHQ0FD4+/tj+/btCl/M5ubm8PX1xf79+6U206hoaFR0K5MUIiKiupOcpKxbtw5DhgzBF198gcGDByud9/DwwO+//y61mUalMkkpLy9v4EiIiIgaL8lJys2bNxEQEFDteVNTU/kzfZqKyukeJilERER1J3nhrLGxMTIzM6s9n5CQgJYtW0ptRu3o6elBFEX5E5+r8qzz9amsrKxB2n2SIAgN9v4B9egDgP1Qif3AKWCi2pKcpAwYMACfffYZQkJClM79/vvv+PzzzzFp0iSpzaiNjIwMpKWlPfXDliMpRNRYiKKIK1euIDo6GrmyHBgaGaNPnz7w9PSUf5YRNRTJScry5cvRtWtXuLq6YvDgwRAEAbt27UJUVBQOHToEa2trLFq0SBWxqgVbW1sYGBjg/PnzEAQBzZopd2HlMVEUqzz/PDVk++rw/oGG7QOA/VCJ/QC1+9JPSEjAh3PeR9qfN+BmLsJEW8SdYgEHtm9Em/auWB6xDi4uLjWuLy0tDRERETh79izS09Ohp6cHX19frFmzBnZ2dgplf/vtN7zzzjv45ZdfYGZmhrfeegu2traYNGkSUlJSFMqfPHkS4eHhiI+Ph4aGBnr16oXVq1ejY8eOKuoJUleS/7Xa2NggLi4O8+fPx4EDByCKIvbs2YMWLVpgzJgxWLVqVZPbM4UjKUSk7hISEjB5/Ch0NcrB/NfMYKr3v6+DhwWPsS/uBiaPH4XIPQdqnKhcvnwZP//8M0aPHo2XXnoJqamp2Lp1K3x8fJCQkAA9PT0AFSPSffr0gSAImDdvHvT19REZGQltbW2lOvfs2YMJEybAz88PERERKCgowNatW9GjRw9cvXpVKfmhF4tK/qSwtLREZGQkIiMj8eDBA5SXl8PCwkJ+l0tT8+QtyKIoqt1fT0TUtImiiA/nvI+uRjmY3sNS6TPKVK8ZpvewBC7cx4dz3seh49/V6HNs4MCBeOONNxSODR48GN7e3jh06BDGjx8PAIiIiEB2djbi4+Ph5uYGAAgKCkK7du0Urs3Ly8OMGTMwefJkfPbZZ/LjEyZMgJOTE8LDwxWO04tH5VmEhYUFrKysmmyCAkDhvXOhHBGpmytXriDtzxt408Os2uRDEASMdTdF2p83EBcXV6N6dXV15b+XlpYiKysLjo6OMDY2Rnx8vPzcqVOn4O3tLU9QgIo7Qd98802F+n744Qfk5ORgzJgxyMzMlP9oamqia9euiI6Ors3bpkZIJSMpFy5cQFRUFP7++29kZ2crfTELgoBff/1VFU01Ck/+o2eSQkTqJjo6Gm7mosIUT1XM9JvDzVzEuXPn4Onp+cx6CwsLsXLlSuzYsQMZGRkKn38ymUz+e1paGry9vZWud3R0VHidnJwMAPD19a2yPUNDw2fGRI2b5CRl/fr1mDVrFnR0dODk5ARTU1NVxNWoPZmklJeXQ1NTswGjISJSlCvLgYl2zf6AMtEWkftEgvE077zzjnwXcm9vbxgZGUEQBIwePbpOa/Qqr9mzZ0+VW1mow0Jsql+S/x9es2YNunfvjhMnTsDIyEgVMTV6nO4hInVmaGSMO8U1WyuXXSzAqYaf7QcPHsSECROwbt06+bGioiLk5OQolGvTpg1u3rypdP2/jzk4OACoWPfYt2/fGsVALxbJC0cKCgrw5ptvMkF5wpNJCu/wISJ106dPH1zLFPCw4Omb62Xll+JaplDtdMu/aWpqKv1htnHjRqWN9Pz8/HDx4kVcu3ZNfuzhw4fYt2+fUjlDQ0OEh4ejtLRUqb0HDx7UKC5qvCSPpPTp0wfXr19XRSwvjH9P9xARqRNPT0+0ae+KfXE3qry7B6gYBf4i/iHsnDrBw8OjRvUOGjQIe/bsgZGREVxcXHDx4kWcOXMGZmZmCuVmz56NvXv3ol+/fnjnnXfktyC3bt0aDx8+lMdjaGiIrVu3Yvz48XB3d8fo0aNhYWGB9PR0fPvtt+jevTs2bdokvUNIbUlOUjZu3Ij+/ftj7dq1mDRpEtekgNM9RKTeBEHA8oh1mDx+FHDhPt70qGqflCzEyowRuWltjbdR+OSTT6CpqYl9+/ahqKgI3bt3x5kzZ+Dn56dQrlWrVoiOjsaMGTMQHh4OCwsLvP3229DX18eMGTOgo6MjLzt27FjY2Nhg1apVWLNmDYqLi2Fra4uePXsiKChINR1CaktyktKqVStMnToVH3zwAebMmQMdHR2lhaKCICis7H7R8e4eIlJ3Li4uiNxzAB/OeR9Tj/9vx9nsYgHXMgW0ae+KyE2123HW2NgYUVFRSsdTU1OVjrm5ueHHH39UOBYaGgodHR2lDUB9fHzg4+NT4zjoxSE5SVm0aBFWrFgBW1tbeHp6cm0KON1DRI2Di4sLDh3/DnFxcTh37hxyZTI4GRlhmq8vPDw86nUjysLCQoV9VbKysrBnzx706NGDd0SSnOQkZdu2bRg4cCCOHj3apDdwexIXzhJRYyEIAjw9PWu0D4oqeXt7w8fHBx06dMC9e/ewfft25ObmYuHChc81DlJvkpOUkpISDBw4kAnKE7gmhYjo6QYMGICDBw/is88+gyAIcHd3x/bt29GrV6+GDo3UiOTMYtCgQTh//rwqYnlhcLqHiOjpwsPD8eeff6KgoAD5+fk4f/4890IhJZKTlMWLFyMhIQEhISGIi4vDgwcP8PDhQy7rkroAACAASURBVKWfpoQjKURERNJJnu5xcnICAFy7dg2ffvppteX+vZnPi4x39xAREUmnkrt76nMFeGPEhbNERETSSU5SwsLCVBDGi4VJChERkXQqfYRkXl4e/vnnHwAVm7wZGBiosvpGg9M9RERE0qnkvuHLly+jT58+MDExgaurK1xdXWFiYgJfX19cuXJFFU00Kry7h4iISDrJIymxsbHw8fGBlpYWJk+ejA4dOgAAEhMT8eWXX6JXr16IiYmBl5eX5GAbC97dQ0REJJ3kkZQFCxbA1tYWSUlJ2Lp1K2bMmIEZM2Zg69atSEpKgo2NDRYsWKCKWBsNjqQQUVMVFhYGQRCQmZnZIO3WpuzzjpFqT3KSEhsbi6lTp6Jly5ZK56ysrBAcHIxLly5JbaZR4cJZIqKGFx4ejqNHjzZ0GCSB5CRFQ0MDjx8/rvZ8WVlZk9syn9M9REQNj0lK4yc5e+jWrRs2b96MtLQ0pXPp6enYsmULunfvLrUZAMDmzZthZ2cHHR0ddO3aFb/88ku1ZXfu3AlBEBR+dHR0FMqIoohFixbB2toaurq66Nu3L5KTkyXHyekeImosRFHE5cuXsXr1anz44XysXr0aly9f5h9YpBYkJynh4eGQyWRwdnbG2LFjERYWhrCwMIwZMwbOzs6QyWRYuXKl5EAPHDiAmTNnYvHixYiPj0fnzp3h5+eH+/fvV3uNoaEh7ty5I//5dyK1evVqbNiwAdu2bUNsbCz09fXh5+eHoqIiSbHyFmQiagwSEhLwxrABCJk0HH9f3AAxbRf+vrgBIZOG441hA5CQkFDnunNycjBx4kQYGxvDyMgIQUFBKCgoUCizd+9eeHh4QFdXF6amphg9erR8G4tK58+fx4gRI9C6dWtoa2ujVatWeO+991BYWPjU9gVBQH5+Pnbt2iX/Q3XixIm1jpEaluS7e7p06YLY2FgsWLAAx48fl/8frKenB39/fyxfvhwuLi6SA12/fj2mTJmCoKAgAMC2bdvw7bffIioqCnPnzq3yGkEQqlwrA1QkDx9//DE+/PBDDBkyBACwe/duWFlZ4ejRoxg9enSdY+V0DxGpu4SEBEwJGoWe7XIQPtcMZob/+zrIyn2MHSdvYErQKHy+40CdPsNHjhwJe3t7rFy5EvHx8YiMjISlpSUiIiIAACtWrMDChQsxcuRITJ48GQ8ePMDGjRvRq1cvXL16FcbGxgCAr7/+GgUFBZg2bRrMzMzwyy+/YOPGjbh16xa+/vrratvfs2cPJk+eDC8vLwQHBwMAHBwcahUjNTyVbObm4uKCI0eOoLy8HA8ePAAAWFhYqGwtSklJCeLi4jBv3jz5MQ0NDfTt2xcXL16s9rq8vDy0adMG5eXlcHd3R3h4ODp27AgASElJwd27dxWeumlkZISuXbvi4sWLKktSON1DROpGFEUsnP8+erbLwfsjLZXuijEzbIb3R1oCX93Hwvnv4+CR72r9+JMuXbpg+/bt8tdZWVnYvn07IiIikJaWhsWLF2P58uWYP3++vMzw4cPRpUsXbNmyRX48IiICurq68jLBwcFwdHTE/PnzkZ6ejtatW1fZ/rhx4/DWW2+hbdu2GDduXK1jJPUgOYt4/PgxcnNzKyrT0ICVlRWsrKzkX9S5ublPXVhbE5mZmSgrK4OVlZXCcSsrK9y9e7fKa5ycnBAVFYVjx45h7969KC8vR7du3XDr1i0AkF9XmzprikkKEamzK1euIP2vGwgKMKs2+RAEARP9TZH+1w3ExcXVuo233npL4XXPnj2RlZWF3NxcHD58GOXl5Rg5ciQyMzPlPy1btkS7du0QHR0tv+7JBCU/Px+ZmZno1q0bRFHE1atXax1XTWMk9SA5SZkxYwa6detW7fnu3bvj/fffl9pMrXl7eyMwMBBubm7o3bs3Dh8+DAsLi6c+qVlVuCaFiNRZdHQ0PBxFhSmeqpgbNYeHo4hz587Vuo1/j3CYmJgAALKzs5GcnAxRFNGuXTtYWFgo/CQmJiqsNUxPT8fEiRNhamoKAwMDWFhYoHfv3gAAmUxW67hqGiOpB8nTPadOnUJgYGC159944w3s3bsXn3zySZ3bMDc3h6amJu7du6dw/N69e9WuOfm35s2bo0uXLrh58yYAyK+7d+8erK2tFep0c3NTuj4jIwO3b98GAJSWlkJHRwfZ2dnIy8tTKvvkBkH379+Xz60+T5XJUUM+obqoqAgZGRkN1r469AHAfqjEfqj47FAHubk5MDOo2R9QZgYicnNrnwxoampWeVwURZSXl0MQBJw8ebLKcpXPfSsrK0O/fv3w8OFDzJkzB87OztDX10dGRgYmTpwoeaT6aTGSepCcpNy+fRu2trbVnrexsZH8waSlpQUPDw+cPXsWQ4cOBVAxjXL27FlMnz69RnWUlZXh+vXrGDBgAADA3t4eLVu2xNmzZ+VJSW5uLmJjYzFt2jSl621tbeXvUyaT4fz58zAxMYGRkZFS2Sc/iMzMzJ7aP/WlcoqtWTOVPkOyVjIyMhrkvVdShz4A2A+V2A/S//JXFUNDY/ydV7NkLStPQFtD5c85KRwcHCCKIuzt7dG+fftqy12/fh1//vkndu3apfDH8A8//FCjdho6MSfpJE/3mJmZISkpqdrziYmJMDQ0lNoMZs6cic8//xy7du1CYmIipk2bhvz8fPndPoGBgQoLa5cuXYrTp0/j77//Rnx8PMaNG4e0tDRMnjwZQMV/vKGhoVi+fDmOHz+O69evIzAwEDY2NvJEqK64TwoRqbM+ffog7qaArNynrxfMlJUi7qYAX19flbY/fPhwaGpqYsmSJUqjFqIoIisrC8D/RjqeLCOKYo1H5vX19ZGTk6OiqKkhSP6Twt/fH59++inefPNNdOnSReFcfHw8PvvsM4wYMUJqMxg1ahQePHiARYsW4e7du3Bzc8OpU6fkC1/T09MVFqxmZ2djypQpuHv3LkxMTODh4YGff/5Z4Va62bNnIz8/H8HBwcjJyUGPHj1w6tQppU3faosLZ4lInXl6eqK1gyt2nLxR5d09QEUysPPUQ7Rx7AQPDw+Vtu/g4IDly5dj3rx5SE1NxdChQ9GiRQukpKTgyJEjCA4OxgcffABnZ2c4ODjggw8+QEZGBgwNDXHo0KEarxnx8PDAmTNnsH79etjY2MDe3h5du3ZV6Xuh+iU5SVm2bBlOnToFLy8vvPbaa/JbfG/cuIETJ07A0tISy5YtkxwoAEyfPr3a6Z2YmBiF1x999BE++uijp9YnCAKWLl2KpUuXqiS+StwnhYjUmSAIWBa+DlOCRgFf3UdQQFX7pGThfLIxPt+xtl6mTebOnYv27dvjo48+wpIlSwAArVq1Qv/+/fHaa68BqFhLeOLECcyYMQMrV66Ejo4Ohg0bhunTp6Nz587PbGP9+vUIDg7Ghx9+iMLCQkyYMIFJSiMjiCr4Fr1z5w7mzp2LY8eOyW/dMjQ0xNChQxEeHg4bGxvJgaqTyjUpPXv2rHJNyu3bt+V3NL3//vvw9PR83iGqxfw71yBUYD9UYD88+7PjeUtISMDC+e8j/a8bFXf7GIjIyhMQd1NAawdXLAtfp5LNOInqSiX/Wq2trbFr1y6IoqiwmVtTXbT05EhKWVlZA0ZCRFQ9FxcXHDzyHeLi4nDu3Dnk5srQ1tAIkxf4wsPDo8l+hpP6kJykpKenw8LCArq6uhAEAZaWlgrnCwsL8eDBg2p3BXwRPXlbG9ekEJE6EwQBnp6eDTLiS/Qsku/usbe3x5EjR6o9f/z4cdjb20ttplF5MknhSAoREVHdSE5SnrWkpbS0VGXP8GksON1DREQkXZ2me3JzcxXuPc/KykJ6erpSuZycHOzfv19hR9em4MmFeUxSiIiI6qZOScpHH30kv223clO00NDQKsuKoojly5fXPcJGiCMpRERE0tUpSenfvz8MDAwgiiJmz56NMWPGwN3dXaGMIAjQ19eHh4dHk1uQxTUpRERE0tUpSfH29oa3tzeAikdnDx8+HJ06dVJpYI0Z7+4hIiKSTvItyIsXL1ZFHC+UJ6d7KjeQIiIiotqRnKRoaGjUaMOfpjTtUdknlY8kJyIiotqTnKQsWrRIKUkpKytDamoqjh49CicnJwwaNEhqM42OpqYmHj9+3KSSMyIiIlWSnKSEhYVVe+7OnTv4z3/+g/bt20ttptGpnPJhkkJERFQ39brLmrW1Nd566y2VPQW5MalcPMskhYiIqG7qfStYfX19pKSk1HczaqdyJIVrUoiIiOqmXp9ZfuPGDWzYsKFJT/fw7h4iUmeiKOLKlSuIjo5GjiwXxkaG6NOnDzw9PfkUZGpwKnnAYNu2bZV+TE1N0blzZ9y7dw/r169XRayNSuV0D0dSiEhdJSQkYMCQYRgeOBkbTsVh17V72HAqDsMDJ2PAkGFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMrGxsfD394eRkRH09PTQu3dv/PTTT/LziYmJ0NXVRWBgoELdFy5cgKamJubMmVP3N02NiuSRlN69eytl24IgwMTEBA4ODhg9ejRMTU2lNtPocOEsEamzhIQEjAqchBzLjjAbOwnNDIzl5x7n5eDGxWMYFTgJB3ZHwcXFpcb1vvXWWzh48CCmT58OFxcXZGVl4cKFC0hMTIS7uzvOnTuHgIAAeHh4YPHixdDQ0MCOHTvg6+uL8+fPw8vLCx06dMCyZcswa9YsvPHGG3jttdeQn5+PiRMnwtnZWf5YFnrxCeKzHmNMSmQyGc6fP4+ePXvCyMioyjLvvPMOMjMz4ePjg6lTpz7nCP83zfTkww6ft4yMDNja2jZY++rQBwD7oRL7oWafHc+DKIoYMGQYbjw2g2XfwCqndURRxP0zu+HaLAvfHTtS46kfY2NjjBs3Dps2baqyTicnJ7Rt2xYnT56U11lYWIiOHTvC0dERp0+fBlAxCt27d28kJyfj999/x+LFi/Hpp5/i4sWLTe5RK01ZvS+cbap4dw8RqasrV67gRnIKzLyHVJt8CIIAU+8huJGcgri4uBrXbWxsjNjYWNy+fVvp3LVr15CcnIyxY8ciKysLmZmZyMzMRH5+Pl599VX8+OOP8ilyDQ0N7Ny5E3l5eQgICMCWLVswb948JihNjEqTlLy8PCQmJiIxMRF5eXmqrLrR4XQPEamr6OhoiNbOClM8VWluYAzR2hnnzp2rcd2rV6/GjRs30KpVK3h5eSEsLAx///03ACA5ORkAMGHCBFhYWCj8REZGori4GDKZTF6Xg4MDwsLCcPnyZXTs2BELFy6sw7ulxkwl456XL1/G7NmzceHCBYUsuGfPnli9enWTzHw5kkJE6ipHlgtR17BGZUVdQ8hkuTWue+TIkejZsyeOHDmC06dPY82aNYiIiMDhw4fl3w9r1qyBm5tbldcbGBgovK6c/rl9+zaysrLQsmXLGsdCjZ/kJCU2NhY+Pj7Q0tLC5MmT0aFDBwAVq7O//PJL9OrVCzExMfDy8pIcbGPCfVKISF0ZGxlCKEyuUVmhMBdGRu1qVb+1tTVCQkIQEhKC+/fvw93dHStWrMBHH30EADA0NETfvn2fWc+2bdvwww8/YMWKFVi5ciWmTp2KY8eO1SoWatwkT/csWLAAtra2SEpKwtatWzFjxgzMmDEDW7duRVJSEmxsbLBgwQJVxNqocLqHiNRVnz59INz5A4/zcp5arjQvB8KdP+Dr61ujesvKyhSmawDA0tISNjY2KC4uhoeHBxwcHLB27doqlwQ8ePBA/ntKSgpmzZqF119/HfPnz8fatWtx/Phx7N69u0ax0ItBJSMpixYtqnIIzsrKCsHBwdwWn4hIjXh6esK1nT1uXDz21Lt7Hl48hk7t28LDw6NG9T569AgvvfQS3njjDXTu3BkGBgY4c+YMLl++jHXr1kFDQwORkZEICAhAx44dERQUBFtbW2RkZCA6OhqGhoY4ceIERFHEpEmToKuri61btwIApk6dikOHDuHdd99F3759YWNjo9I+IfUkOUnR0NB46q6qZWVl8lGFpoQjKUSkrgRBwLpV4RgVOAn3z+yGmfcQpX1Ssi4eg/H937F2d1SNbz/W09NDSEgITp8+LV+D4ujoiC1btmDatGkAAB8fH1y8eBHLli3Dpk2bkJeXh5YtW6Jr167y7Ro2btyImJgYHDp0CBYWFvL6t2/fDldXV0yZMgXffvutCnuE1JXkJKVbt27YvHkzxo4dizZt2iicS09Px5YtW9C9e3epzTQ6TFKISJ25uLjgwO4ovD93Pm58sRCitTNEXUMIhbkQ7vwB13b2WFfLjdy0tLSwevVqrF69+qnl3NzccOjQoWrPVy4b+LdWrVopTSfRi01ykhIeHo5evXrB2dkZw4YNkz+nJykpCceOHUOzZs2wcuVKyYGqGz09PYiiWO0o0pPTPQ3x/B51SI4EQWjQZxepQx8A7IdK7IeKKRR14uLigu+OHUFcXBzOnTsHmaxikayv7yx4eHjw2T3U4CQnKV26dEFsbCwWLFiA48ePo6CgAEDFl7i/vz+WL19eq0xc3WVkZCAtLe2ZH7aVSQofMEhE6kwQBHh6ejbJrSJI/alknxQXFxccOXIE5eXl8tXZFhYWL+RaFFtbWxgYGOD8+fMQBKHaLbYr/wIRRbFBt+FuyLYb+r1XaugY2A8V2A/gyARRLan0X6uGhgasrKxUWWWjxbt7iIiIpHnxhjrUBBfOEhERScMkpZ5UDilzTQoREVHdMEmpJ5VJSmlpaQNHQkRE1DgxSaknTFKIiIikUenC2by8PGRnZ1e5F0Dr1q1V2ZTaY5JCREQkjeQkpaioCEuWLMH27duRlZVVbbmmtoCUSQoREZE0kpOUkJAQ7Nq1C0OHDkXPnj1hYmKiirgavcokpaysDOXl5S/knjFERET1SXKScvjwYUyePBmffvqpKuJ5YTy5YdTjx4+hpaXVgNEQEb14du7ciaCgIKSkpMDOzq6hw2mUYmJi0KdPH0RHR8PHx6ehw1Ei+c97QRDg7u6uilheKM2bN5f/XlJS0oCREBERNU6Sk5QhQ4bgzJkzqojlhVK54yzAvVKIiOrD+PHjUVhYiDZt2jR0KI1Wr169UFhYiF69ejV0KFWSPN2zcOFCjBw5EsHBwZg6dSpat26t8AVdydTUVGpTjcqTIylcPEtE6koURVy5cgXR0dHIyZXB2NAIffr0gaenp9o/a0hTU7PK7xuqOQ0NDejo6DR0GNWSPJLSrl07XL16FZGRkfDy8kLLli1hYWGh9NPUPLkmhUkKEamjhIQEDBg2BMMnBWLDz6ewK+0aNvx8CsMnBWLAsCFISEiodZ2PHj1CaGgo7OzsoK2tDUtLS/Tr1w/x8fHyMl9//TU8PDygq6sLc3NzjBs3DhkZGUp1/fHHHxg5ciQsLCygq6sLJycnLFiwQH5+586dEAQBqampCtdt2bIFHTt2hLa2NmxsbPD2228jJydHqf7Y2Fj4+/vDyMgIenp66N27N3766SelchkZGfi///s/2NjYQFtbG/b29pg2bZrCVP7ff/+NESNGwNTUFHp6evjPf/6Db7/9VqGemJgYCIKAr776CitWrMBLL70EHR0dvPrqq7h586ZSuzXpp4kTJ8LAwADp6ekYNGgQDAwMYGtri82bNwMArl+/Dl9fX+jr66NNmzb44osvqowpJiZGqW8GDBgAExMT6Ovr4+WXX8Ynn3yiFGN9kzySsmjRIrXPthsCkxQiUmcJCQkYFRSIHEdLmM15E80MDeTnHufm4cbJnzEqKBAHduyGi4tLjet96623cPDgQUyfPh0uLi7IysrChQsXkJiYCHd3d/li11deeQUrV67EvXv38Mknn+Cnn37C1atXYWxsDAD47bff0LNnTzRv3hzBwcGws7PDX3/9hRMnTmDFihXVth8WFoYlS5agb9++mDZtGpKSkrB161ZcvnwZP/30k3yU+9y5cwgICICHhwcWL14MDQ0N7NixA76+vjh//jy8vLwAALdv34aXlxdycnIQHBwMZ2dnZGRk4ODBgygoKICWlhbu3buHbt26oaCgADNmzICZmRl27dqF1157DQcPHsSwYcMUYly1ahU0NDTwwQcfQCaTYfXq1XjzzTcRGxsrL1PTfgIq7iINCAhAr169sHr1auzbtw/Tp0+Hvr4+FixYgDfffBPDhw/Htm3bEBgYCG9vb9jb21fbhz/88AMGDRoEa2trvPvuu2jZsiUSExPxzTff4N13363xfwsqIVKt5eTkiCdOnBBzcnKqLRMTEyOOHj1aHD16tJicnPwco6tQWloqlpaWPvd2n3Tr1q0GbV8d+kAU2Q+V2A81++x4HsrLy0X/IYPFl8YGiO5H14sexz5S+nE/ul58aWyA6D9ksFheXl7juo2MjMS33367ynMlJSWipaWl6OrqKhYWFsqPf/PNNyIAcdGiRfJjvXr1Elu0aCGmpaUpxV5px44dIgAxJSVFFEVRvH//vqilpSX2799fLCsrk5fbtGmTCECMioqS19GuXTvRz89Pob6CggLR3t5e7Nevn/xYYGCgqKGhIV6+fFnp/VReGxoaKgIQz58/Lz/36NEj0d7eXrSzs5PHEh0dLQIQO3ToIBYXF8vLfvLJJyIA8fr167XupwkTJogAxPDwcPmx7OxsUVdXVxQEQdy/f7/8+B9//CECEBcvXiw/VhlTdHS0KIqi+PjxY9He3l5s06aNmJ2dXW3fPy/cvKOecCSFiNTVlStXcOOvZJgFdKt2JFwQBJj6d8ONv5IRFxdX47qNjY0RGxuL27dvV9nu/fv3ERISorAOYuDAgXB2dpZPjzx48AA//vgjJk2apLRb+dNG7s+cOYOSkhKEhoYq7E01ZcoUGBoayuu/du0akpOTMXbsWGRlZSEzMxOZmZnIz8/Hq6++ih9//BHl5eUoLy/H0aNHMXjwYHh6elbZRwDw3XffwcvLCz169JCfMzAwQHBwMFJTU5WmzYKCghS2pejZsyeAiimj2vTTkyZPniz/3djYGE5OTtDX18fIkSPlx52cnGBsbCxvpypXr15FSkoKQkNDFUZrnny/z5NKk5S8vDwkJiYiMTEReXl5qqy60eHCWSJSV9HR0RAdrBWmeKrS3MgAooM1zp07V+O6V69ejRs3bqBVq1bw8vJCWFiY/EsxLS0NQMWX5b85OzvLz1eWd3V1rXG7T6tfS0sLbdu2lZ9PTk4GAEyYMEFp/WRkZCSKi4shk8nw4MED5ObmPjOOtLS0Kt9Thw4dFOKq9O/Eq3IT1Ozs7Ke+D0Cxnyrp6Ogorf00MjLCSy+9pJRYGBkZydupyl9//QWg9n1fX1Ty7J7Lly9j9uzZuHDhAsrLywFUrBju2bMnVq9eXWUG+qJ7csU5kxQiUic5uTKILXRrVFZsoQtZrqzGdY8cORI9e/bEkSNHcPr0aaxZswYRERE4fPhwXcNVucrvqTVr1sDNza3KMgYGBnj48GG9tF/dHUliFc+9k1KfqttpCJKTlNjYWPj4+EBLSwuTJ0+WZ46JiYn48ssv0atXL8TExMgXITUVHEkhInVlbGgE4VFhjcoKjwphZGhUq/qtra0REhKCkJAQ3L9/H+7u7lixYgXWrFkDAEhKSoKvr6/CNUlJSfL9Ttq2bQsAuHHjRq3arbw+KSlJXgdQsaFmSkoK+vbtCwBwcHAAABgaGsqPVcXCwgKGhobPjKNNmzZISkpSOv7HH38oxFWX9/G0fqoPlX1z48aNp/bN8yJ5umfBggWwtbWVr6CeMWMGZsyYga1btyIpKQk2NjYKt4w1FVyTQkTqqk+fPhD+uoPHuU+fli+V5UH4647SF2V1ysrKIJMpjrpYWlrCxsYGxcXF8PT0hKWlJbZt24bi4mJ5mZMnTyIxMREDBw4EUJEc9OrVC1FRUUhPT1eo72mjAH379oWWlhY2bNigUG779u2QyWTy+j08PODg4IC1a9dWuTThwYMHACpmBIYOHYoTJ07gypUrSuUq2xgwYAB++eUXXLx4UX4uPz8fn332Gezs7Gp1dxSAGvdTfXB3d4e9vT0+/vhjpdu2G2IERiUjKYsWLULLli2VzllZWSE4OBjLli2T2kyj8+RIypP/kRERNTRPT0+4OrTDjZM/w3JkvyoXRIqiiIenfkYnx/bw8PCoUb2PHj3CSy+9hDfeeAOdO3eGgYEBzpw5g8uXL2PdunVo3rw5IiIiEBQUhN69e2PMmDHyW2vt7Ozw3nvvyevasGEDevToAXd3dwQHB8Pe3h6pqan49ttvce3atSrbt7CwwLx587BkyRL4+/vjtddeQ1JSErZs2YJXXnkF48aNA1CRfERGRiIgIAAdO3ZEUFAQbG1tkZGRgejoaBgaGuLEiRMAgPDwcJw+fRq9e/dGcHAwOnTogDt37uDrr7/GhQsXYGxsjLlz5+LLL79EQEAAZsyYAVNTU+zatQspKSk4dOhQrR8wW5t+UjUNDQ1s3boVgwcPhpubG4KCgmBtbY0//vgDv//+O77//vt6a7sqkpMUDQ2Np277XlZW1iSfAKytrS3/vaioqAEjISJSJAgC1oWvwqigQNz/6geYBXRT2icl6+TPML55H2t37K7xXR16enoICQnB6dOncfjwYZSXl8PR0RFbtmzBtGnTAFRsPqanp4dVq1Zhzpw50NfXx7BhwxAREaFwN0nnzp1x6dIlLFy4EFu3bkVRURHatGmjcLdKVcLCwmBhYYFNmzbhvffeg6mpKYKDgxEeHq7wx6OPjw8uXryIZcuWYdOmTcjLy0PLli3RtWtXTJ06VV7O1tYWsbGxWLhwIfbt24fc3FzY2toiICAAenp6ACr+IP/5558xZ84cbNy4EUVFRXj55Zdx4sSJOo961LSf6oOfnx+io6OxZMkSrFu3DuXl5XBwcMCUKVPqtd2qCKLE8ZuAgABcv34dP/30k9I8WXp6Orp3745OnTrh6fFx+AAAIABJREFUu+++kxSoOpHJZDh//jx69uwJI6Oq52ozMjIwb948lJaW4vXXX8cbb7zxXGOsTByfnHZ63jIyMmBra9tg7atDHwDsh0rsh5p9djxPCQkJeH/+XNz4KxmigzXEFroQHhVC+OsOXB3aYV34qlpPVRCpkuR/reHh4ejVqxecnZ0xbNgwtG/fHkDF4p5jx46hWbNmWLlypeRAGyNtbW2UlpZyuoeI1JKLiwu+O3IMcXFxOHfuHGS5MhgZGsH3Q194eHhwN3FqcJKTlC5duiA2NhYLFizA8ePHUVBQAKBi2M/f3x/Lly9vspm4jo4O8vLyON1DRGpLEAR4eno2ya0iSP2pZLGIi4sLjhw5gtzcXNy5cwd37txBbm4uDh8+rNIEZfPmzbCzs4OOjg66du2KX375pdqyn3/+OXr27AkTExOYmJigb9++SuUnTpwIQRAUfvz9/VUWb+VOgUxSiIiIak+lK1o1NDRgZWUFKysrlS+WPXDgAGbOnInFixcjPj4enTt3hp+fH+7fv19l+ZiYGIwZMwbR0dG4ePEiWrVqhf79+ys9QdLf31+eWN25cwdffvmlymJmkkJERFR3jea2m/Xr12PKlCkICgqCi4sLtm3bBj09PURFRVVZft++fQgJCYGbmxucnZ0RGRmJ8vJynD17VqGctrY2WrZsKf+p3J5YFZikEBER1V2jSFJKSkoQFxensPudhoYG+vbtq7B5ztMUFBSgtLQUpqamCsdjYmJgaWkJJycnTJs2DVlZWSqLuzJJ4cJZIiKi2msUSUpmZibKyspgZWWlcNzKygp3796tUR1z5syBjY2NQqLj7++P3bt34+zZs4iIiMB///tfBAQEoKysTCVxV+6VwpEUIiKi2mvYjROek1WrVmH//v2IiYlReOz16NGj5b936tQJL7/8MhwcHBATE4NXX31Vcruc7iEiIqo7lSUpxcXFiI+Px/3799G9e3eYm5urqmqYm5tDU1MT9+7dUzh+7969Krfjf9LatWuxatUqnDlzBi+//PJTy7Zt2xbm5ua4efOmUpKSkZGB27dvA6h4Fo+Ojg6ys7OrfO4DUJGYVG4eVVBQoLRgt75V7tHXkPscFBUVPff3/SR16AOA/VCJ/cDneBHVlkqSlA0bNiAsLEz+YKkffvgBvr6+yMzMhLOzM1avXo1JkybVuX4tLS14eHjg7NmzGDp0KADIF8FOnz692utWr16NFStW4Pvvv6/RHgC3bt1CVlYWrK2tlc7Z2trKd8us3DXSxMTkqTvOViZqJSUlz32nTXXYXZM7jFZgP1RgP0Dp4XtE9HSS16Ts2LEDoaGh8Pf3x/bt2xWekmhubg5fX1/s379fajOYOXMmPv/8c+zatQuJiYmYNm0a8vPzERQUBAAIDAzEvHnz5OUjIiKwcOFCREVFwc7ODnfv3sXdu3flIx95eXmYNWsWLl26hNTUVJw9exZDhgyBo6Mj/Pz8JMcL/G+6p7S0VGXrXIiIiJoKyX9SrFu3DkOGDMEXX3xR5Z0xHh4e2LBhg9RmMGrUKDx48ACLFi3C3bt34ebmhlP/r707j4riSvsH/q1u9n0HMQpoDKK4gYG4EFGJu2IyMaIxwbiQxDjGyWTUOYlLRl+DmuhEX4/ihmgS9WcS9ThOHINHE41oVHSMGM1oEBNEwiqbLN19f3/wdg9tN0ov2N34/ZzTB7rqVt3n3i7goarurcOHNTfT3rp1S2tulg0bNqC+vl7nmTmLFy/GkiVLIJfLcenSJWRkZKC8vBzBwcEYNmwYli5dqvVwQFOoHz4FND6228PDwyz7JSIiehyYnKRcv34dc+bMaXa9j4+P2Yb1zp49u9nLO8ePH9d6f/PmzQfuy9nZudUfOe3m9t+nijJJISIiMozJl3u8vLxQXFzc7PorV6489ObWtqppklJZWWnBSIiIiGyPyUnKqFGjsGnTJpSXl+usy8nJwebNmzFu3DhTq7FJTZOU5kYBERERkX4mJynLli2DUqlEZGQk3n//fUiShIyMDEyZMgV9+/ZFQEAAFi1aZI5YbQ6TFCIiIuOZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36tFnnTLElTFKIiIiMZ5YJAwICArBlyxZs2bIFRUVFUKlU8Pf3N/uTkG2Ng4MDHBwcUF9fzySFiIjIQCZnEQqFAhUVFZr3/v7+CAwM1CQoFRUVmkmUHkfqsylMUoiIiAxjcpIyZ84c9O/fv9n1AwYMwJ///GdTq7FZTFKIiIiMY3KScvjwYZ0J05p68cUX8c9//tPUamyWem4UTodNRERkGJOTlNu3bz/weRzBwcEWfaiYpXl7ewMAysrKLBwJERGRbTE5SfH19cW1a9eaXf/TTz891jOtMkkhIiIyjslJyogRI5CWloYLFy7orMvOzsamTZswcuRIU6uxWT4+PgAaH1N/7949C0dDRERkO0wegrx06VIcPnwYMTExGDduHLp37w4AuHz5Mg4ePIiAgAAsXbrU5EBtlfpMCtB4NsXZ2dmC0RAREdkOk5OU4OBgnDt3DgsWLMCBAwewb98+AI03jL788stYvnw5goODTQ7UVjVNUkpLSx/rviAiIjKEWSZza9euHTIyMiCEQFFREYDG+VIkSTLH7m2a+nIPALM9DZqIiOhxYJYkRU2SJAQEBJhzlzbP29sb9vb2aGhoQGFhoaXDISIishlmTVKqqqpQVlYGIYTOuo4dO5qzKpshk8kQEBCA/Px83Llzx9LhEBER2QyTk5Ta2lp88MEH2Lp16wMvZyiVSlOrsllBQUFMUoiIiAxkcpIya9YsZGRkYPz48YiLi9O6UZQaBQUFAQAKCwshhOC9OkRERC1gcpLy1VdfYcaMGUhLSzNHPG1SYGAgAKCmpgaVlZWP9eR2RERELWXyZG6SJCEqKsocsbRZTR8bcOvWLQtGQkREZDtMTlISExORmZlpjljarNDQUM33N2/etFgcREREtsTkJGXhwoX45ZdfkJKSgvPnz6OoqAilpaU6r8eZi4uL5pJPbm6uhaMhIiKyDSbfk9KlSxcAwIULF7B169Zmyz3Oo3uAxrMphYWFTFKIiIhayOQkZdGiRRyt0gKdO3fGmTNnUFBQgPLycnh5eVk6JCIiIqtmcpKyZMkSM4TR9qkfvAgAOTk5GDBggAWjISIisn4m35OiVldXh6ysLBw4cADFxcXm2m2bERoaCldXVwCNSQoRERE9mFmmxV+7di2WLFmCu3fvAgC++eYbDBkyBMXFxejatStWrlyJadOmmaMqq+Hi4gIhBBQKhd71kiTprIuIiMC5c+fw73//G/X19ZDJzJYj6rCGe4D09cGjZA19ALAf1NgP0PvIECJqnsl/JdPT0zF37lyMGDECW7du1foh9PPzw5AhQ7B7925Tq7Ea+fn5yMnJMeqXrXo+mdLSUly/ft3coREREbUpJp9J+fjjj5GYmIjPP/9c77N7oqOjsXbtWlOrsRrt27eHm5sbTpw4AUmSYGenvwuFEDrrYmNjsW3bNigUCpw5cwbdunVr9Xibi+9R0NcHlmDpGNgPjdgP4CADIgOZfCbl+vXrGDlyZLPrfXx8HvjgwceJi4sLevfuDQA4efIkamtrLRwRERGR9TI5SfHy8nrgjbJXrlzRPGCPgOeeew5A43N8jh8/btlgiIiIrJjJScqoUaOwadMmlJeX66zLycnB5s2bMW7cOFOraTN69OiBDh06AAD+8Y9/oK6uzsIRERERWSeTk5Rly5ZBqVQiMjIS77//PiRJQkZGBqZMmYK+ffsiICAAixYtMkesbYIkSUhMTAQAlJSU4NChQxaOiIiIyDqZnKQEBwfj/PnzGDFiBPbs2QMhBHbu3ImDBw9i0qRJOH36NPz8/MwRa5vRv39/zeME9u3bh7y8PAtHREREZH3MMlFHQEAAtmzZgtLSUhQWFqKgoABlZWXYtm0bAgICzFFFmyJJEqZNmwa5XA6FQoG1a9eiqqrK0mERERFZFbPPJubv74/AwMBWnaisLQgNDUVSUhIA4Pbt2/joo4842oeIiKgJZhIWNHr0aDz77LMAgGvXruFvf/ub3huQiYiIHkcmJykymQxyufyhL9IlSRJmzpyJmJgYAEBubi7mz5+Pc+fOWTgyIiIiyzN56sVFixbpzKKoVCpx8+ZN7N+/H+Hh4RgzZoyp1bRZdnZ2ePvtt/Hpp5/i66+/RkVFBT7++GP07t0bSUlJCAkJsXSIREREFmFykrJkyZJm1xUUFOCZZ57BU089ZWo1bZpMJsOrr76KiIgIbNmyBRUVFbh48SIuXryIyMhIDBs2DL1794a9vb2lQyUiInpkWvUhFu3atcMbb7yBpUuXYtKkSa1ZVZvw9NNPIyIiAvv27cORI0egUChw+fJlXL58Gc7OzoiOjkbv3r3RrVs3eHt762yvUqlw+vRpnDx5ElVVVfD29sbIkSPRtWtXC7SGiIjINK3+pC1XV1fk5ua2djVthpubG1555RWMGjUKmZmZyMzMRFVVFe7du4eTJ0/i5MmTABrnp+ncuTNCQ0MREhKCoqIifPzhUhTc/Bm9/QTc7FW4fk+GjA1r0DtmAJalrsITTzxh4dYRERG1XKsmKZcvX8batWt5uccIvr6+mDhxIl544QX8+OOPOHPmDM6fP4/q6moAjcOWb9++jRMnTqC4uBg/XTiNsWECb8a4wsfNEXZ2DrCzs8PvNQI7Lx7DyxOex47dX/AeFyIishkmJylhYWF6Hz9eXl6Ou3fvwsXFBfv37ze1mseWvb09oqKiEBUVBZVKhby8PFy5cgVXr17FzZs3UVhYiJ9/vIDxnVQYH+4IoAFVVQ2a7WUApjwp8L/ncjAiYTAGxg+Fu7s7XF1d4erqChcXF7i6usLNzQ0uLi5wcXGBk5MTHB0dH/jiiC0iImptJicpgwYN0klSJEmCt7c3OnfujKSkJPj4+JhaDaHxBtuwsDCEhYVh9OjRAIAvv/wSuRe+w5TYdpALJRoaGqBQKNCgUECoVAAAuUzC8+H2WPR9IQoLC80yu62dnR3s7e1hZ2en8736pVQq4erqqnedTCbTDF9Xf3//e/X3LSkjk8kgSZLWS6VSQZIkzQ3H+sqoJx1s+r2+cveXb66MWtP31dXVqKio0ClzfzljljX92twyIYQhHy0RkdUwOUnZvn27GcIgY2V9fwKDOtohwMdLa7kQAiqVCkqlEkqlEgEBSnS/UYKgoCBERUWhurpa52XIjLcKhQIKhcLczaFH5GGJjb5l6vf6zpy2pC4hRLPbGrrPltZprnLmqtfX1xcDBw40a51EbZnBScqtW7eMqqhjx45GbUcPdre0GJ1c9H+M6jMNau29axAZGYk33nhDb3mlUomamhrU1dU99FVbWwul8r9nbpq+1MuqqqpgZ2entUyhUECpVGoSKPVXIYTOcp4BaD1N+5b9/Og4OztbOgQim2JwkhIaGmrUfx9KpdLgbejhPLx8UPZby/q2vA5wd3dvdr1cLoe7u/sDyxgiPz8f7du3N3p7lUql87o/kWn6vXob9VeFQgEhBGQyGYQQWq+m5e5f11yZlpYH/vuHXwiB8vJyeHl5aS3TV+7+ZU3f61vW0n2pY1ZfqjI0DlMSmqblKysr9R5b5k6Smtuf+vO5vx9au96m7OxafUAlUZti8E9Menp6a8RBRhqcMAyp3+zDawoVHO2af8rBzdJa5FbIEB8f/+iCM5H6XhNjqS9HWfoPg6nJmqnYD42soR/u3r2LEydOWKx+Iltj8E9rnz59EBISAk9Pz9aIhwz03HPP4ePlQfjqUhEmRfnpLaMSAp9ll+Hpgc8hLCzsEUdIRERkHIP/Te3Tpw8OHTqkeT9kyBAcPXrUrEFRyzk6OuJvK1bjq1/s8Om5IlTVaV/6KapqwKpjv+MXEYz3F//NQlESEREZzuAzKc7OzqipqdG8P378OGbMmGHWoMgwcXFxWLspA0sXLsCh/TcRHaCCuwPwe42Ef5fI8FSPaGxbtYY3LxMRkU0xOEnp1asXVq9eDblcrrnkc/bsWTg5OT1wuxdeeMG4CKlFBgwYgEPfHMd3332HEydOoLqyAuG+fpg7ciR69uxp9qGWRERErc3gJOWTTz7Biy++iOnTpwNonBfgk08+wSeffNLsNpIkcXTPIyCXyzF48GAMHjzYKm4SJCIiMoXBf8H69u2L69ev48aNGygsLER8fDzee+89JCQktEZ8j8T69euxatUq3LlzB7169cK6desQExNj0D7OnDmD5cv/B2fPfIu62lpACDTIneHu7Y2OISGYMCYREyZMQIcOHVqpFURERG2LUf9m29nZITw8HOHh4UhOTsaYMWMQGxtr7tgeiT179uCdd97Bxo0bERsbi7///e8YPnw4rl27hoCAgBbtY968eUjf/Am6hciQMhzwcVGiWmmH73+uxw//+R2Xql3w69dfYENGOlYs/oCXvoiIiFrA5GsBtj5vyurVqzFz5ky89tprAICNGzfi0KFD2LZtGxYsWPDQ7VNTU7Fj69/xp5fcEdtVDkVtBSQfH8DeHtHPADd/q8eKz26hNCQIzv26450l78Pd3R3PPfdcazeNiIjIphk/U5YeVVVV+PXXX3Hr1i2dlzWqr6/H+fPntS5VyWQyJCQkICsr66HbKxQK/O/aVZic4IQBUd5Q1NwFvNyB/3ugHQCEPuGA2S84Qjp9Dg5hwRBxkVi++iOt2UmJiIhIl8lJSm1tLf76178iICAAnp6eCA0N1Typt+nLGhUXF0OpVCIwMFBreWBgIO7cufPQ7Xfs2AFJWYkh/fygvFcF2MkhOTrolOv2pCM6+atQcOwHeA/og+u3f8Xp06fN1g4iIqK2yOTLPbNmzUJGRgbGjx+PuLg4eHt7myMum5CVlYWIjnbwcHNAfVkp4KSboACNo5ue6SrD9Zu3YOfmDKmDP3788Uf079//EUdMRERkO0xOUr766ivMmDEDaWlp5ojnkfLz84NcLkdhYaHW8sLCQgQFBWkty8/Px+3btwEADQ0NcHJyQnV1Nezk//cAOAjgAXOR2MkBoVCgoaEeKgBFRUXIz883e5vU1A87s+T8KLW1ta3axoexhj4A2A9q7IfG3x1E1HImJymSJCEqKsocsTxyDg4OiI6OxtGjRzF+/HgAjU9KPXr0KGbPnq1Vtn379pqHo6kfEtalSxccycmEUikgk9tD1VDbbF03bqsAX1/YyeSQiu6ie/furfqwNWuYJ4UPlGvEfmjEfmj83UFELWfyPSmJiYnIzMw0RywW8c4772Dz5s3IyMjATz/9hDfffBPV1dWa0T4PMmvWLJRUyXEhpwxyZzegrgHQM2ldSbkCP/wH8Ivri8rLN+AJO47uISIiegiT/6VYuHAhXnrpJaSkpOD1119Hx44dIZfLdcr5+PiYWlWrmDhxIoqKirBo0SLcuXMHvXv3xuHDh3VuptUnMDAQMf2GIP0fRxD6hAs87JygKq+E5OOpufRTVy+wZV817rVrjwA/H5Rt2o+ZL02Bm5tbazeNiIjIpklCfaHWSDLZf0/GPOhab1uaFl99uScuLg729vbo3y8WteU/Y/xAR8Q8qYCrC9Dg5ITsGwIHv2/AtTovuPfrC9m/czEmZiDWrvk77JsMU24N1nBqm6f3G7EfGrEftH93qJ99RkTNM/mnddGiRRa/Ic+SXFxccPrMWfzpT3/Cpwf+H3b86x6c7AVq62tRrZCh3sEd3r5OeOq3e5j2x3cxefJkvWeaiIiISJvJScqSJUvMEIZtc3JywoYNG7Bu3Trs378fBQUFuHfvHqKjo2Fvbw8vLy9ERkZqnXUiIiKiBzMqScnOzjZ4G1sdAfQglZWVOsvUN8SWlZVpzRmjr2xrspbhlpYczWANfQCwH9TYD4/+9wCRrTMqSenbt2+Lf9CFEJAkqU3dk+Lg4AC5XI6LFy9aOhQisjGSJMHBQf/Ej0SkzagkxdYfKmgqZ2dnxMfHo76+vtkyOTk56N69+yOMyvpisHT9jIExWGMMP/30E5ydnS0aA5GtMCpJSU5ONnccNsfZ2fmBv2js7e0tfvd+SEiIRWNgHzRiPzRiPzTijfNELcc7OdswSw73tBbsg0bsh0bsByLbwiSllQQHB1s6BItjHzRiPzRiPzRiPxC1nMmTuRERERG1Bp5JISIiIqvEJIWIiIisEpMUM1u/fj1CQ0Ph5OSE2NhY/PDDD5YOyWwMadvmzZsRFxcHb29veHt7IyEhQaf81KlTIUmS1mvEiBGt3QyTGdIP27dv12mjk5OTVhkhBBYtWoR27drB2dkZCQkJ+M9//tPazTCZIf0QHx+v0w+SJGH06NGaMrZ6PLTEd999h7FjxyI4OBiSJGH//v2WDonIJjBJMaM9e/bgnXfeweLFi5GdnY1evXph+PDh+P333y0dmskMbdvx48cxadIkHDt2DFlZWejQoQOGDRuG/Px8rXIjRoxAQUGB5rVr165H0RyjGfMZe3h4aLUxLy9Pa/3KlSuxdu1abNy4EWfOnIGrqyuGDx+O2tra1m6O0Qzth6+++kqrDy5fvgy5XI4JEyZolbO146Glqqur0atXL6xfv97SoRDZFkFmExMTI9566y3Ne6VSKYKDg8WHH35owajMw9S2KRQK4e7uLjIyMjTLkpOTRWJiotljbU2G9kN6errw9PRsdn8qlUoEBQWJVatWaZaVl5cLR0dHsWvXLvMFbmamHg9r1qwR7u7uoqqqSrPMFo8HYwAQ+/bts3QYRDaBZ1LMpL6+HufPn0dCQoJmmUwmQ0JCArKysiwYmenM0baamho0NDTAx8dHa/nx48cREBCA8PBwvPnmmygpKTFr7OZkbD9UVVUhJCQEHTp0QGJiInJycjTrcnNzcefOHa19enp6IjY21mqPG3McD1u3bkVSUhJcXV21ltvS8UBErY9JipkUFxdDqVQiMDBQa3lgYCDu3LljoajMwxxtmz9/PoKDg7X+sI0YMQI7duzA0aNHsWLFCnz77bcYOXKk1T7nyZh+CA8Px7Zt23DgwAF8+umnUKlU6N+/P3777TcA0GxnS8eNqcfDDz/8gMuXL2PGjBlay23teCCi1mfUtPhEhkhNTcXu3btx/PhxrZtGk5KSNN/36NEDPXv2ROfOnXH8+HEMHTrUEqGaXb9+/dCvXz/N+/79+yMiIgJpaWlYunSpBSOznK1bt6JHjx6IiYnRWv44HA9EZBieSTETPz8/yOVyFBYWai0vLCxEUFCQhaIyD1Pa9tFHHyE1NRVHjhxBz549H1i2U6dO8PPzw/Xr102OuTWY4zO2t7dHnz59NG1Ub2dLx40p/VBdXY3du3dj+vTpD63H2o8HImp9TFLMxMHBAdHR0Th69KhmmUqlwtGjR7X+k7ZFxrZt5cqVWLp0KQ4fPoy+ffs+tJ7ffvsNJSUlaNeunVniNjdzfMZKpRI//vijpo1hYWEICgrS2mdFRQXOnDljtceNKf2wd+9e1NXVYcqUKQ+tx9qPByJ6BCx9525bsnv3buHo6Ci2b98urly5IlJSUoSXl5e4c+eOpUMz2cPa9sorr4gFCxZoyqempgoHBwfxxRdfiIKCAs2rsrJSCCFEZWWlePfdd0VWVpbIzc0VmZmZIioqSnTp0kXU1tZapI0tYWg/fPDBB+Jf//qXuHHjhjh//rxISkoSTk5OIicnR1MmNTVVeHl5iQMHDohLly6JxMREERYWJu7du/fI29dShvaD2sCBA8XEiRN1ltvq8dBSlZWV4sKFC+LChQsCgFi9erW4cOGCyMvLs3RoRFaNSYqZrVu3TnTs2FE4ODiImJgYcfr0aUuHZDYPatugQYNEcnKy5n1ISIgAoPNavHixEEKImpoaMWzYMOHv7y/s7e1FSEiImDlzpk0kdIb0w9y5czVlAwMDxahRo0R2drbW/lQqlVi4cKEIDAwUjo6OYujQoeLatWuPqjlGM6QfhBDi6tWrAoA4cuSIzr5s+XhoiWPHjun9ebi/j4hIGx8wSERERFaJ96QQERGRVWKSQkRERFaJSQoRERFZJSYpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUlKG3P27Fn0798frq6ukCQJFy9exPbt2yFJEm7evGnp8IiMsnLlSnTt2hUqlcqo7Tdu3IiOHTuirq7OzJERUWtikmJl1AnFuXPnDN62oaEBEyZMQGlpKdasWYOdO3ciJCSkFaIkADh16hSWLFmC8vJyxmEAQ+OtqKjAihUrMH/+fMhkxv3Kmjp1Kurr65GWlmbU9kRkGUxS2pAbN24gLy8P7777LlJSUjBlyhR4e3tbOqw269SpU/jggw8snhxYSxwtZWi827Ztg0KhwKRJk4yu08nJCcnJyVi9ejX4TFUi28EkpQ35/fffAQBeXl4WjoT0qa6utnQINik9PR3jxo2Dk5OTSft56aWXkJeXh2PHjpkpMiJqbUxSrNySJUsgSRKuX7+OqVOnwsvLC56ennjttddQU1OjKTd16lQMGjQIADBhwgRIkoT4+Hi9+5w6dSpCQ0Obret++fn5mDZtGgIDA+Ho6Iju3btj27ZtRsXZdJ/Tp09HcHAwHB0dERYWhjfffBP19fUG1ducCxcuYOTIkfDw8ICbmxuGDh2K06dPN9vmlsbddLu//OUvAICwsDBIkqR13496v1euXMHkyZPh7e2NgQMHGtS2vLw8zJo1C+Hh4XB2doavry8mTJigdW9RS+P4+eefMWXKFHh6esLf3x8LFy6EEAK//vorEhMT4eHhgaCgIHz88cc6bTXn5/+weO+Xm5uLS5cuISEhQe/6EydO4JlnnoGzszPCwsKwfv16AMD48ePx8ssva5WNjo6Gj48PDhw4oHdfRGR97CwdALXMSy+9hLCwMHz44YfIzs7Gli1bEBAQgBUrVgAAXn907/MmAAAIVklEQVT9dbRv3x7Lly/HnDlz8PTTTyMwMNDkegsLC/HMM89AkiTMnj0b/v7++PrrrzF9+nRUVFRg7ty5BsUJALdv30ZMTAzKy8uRkpKCrl27Ij8/H1988QVqamrg4OBgcL1N5eTkIC4uDh4eHpg3bx7s7e2RlpaG+Ph4fPvtt4iNjTW4f+/3wgsv4Oeff8auXbuwZs0a+Pn5AQD8/f21yk2YMAFdunTB8uXLNZcZWtq2s2fP4tSpU0hKSsITTzyBmzdvYsOGDYiPj8eVK1fg4uLS4jgmTpyIiIgIpKam4tChQ1i2bBl8fHyQlpaGIUOGYMWKFfjss8/w7rvv4umnn8azzz7bKp9/S+NVO3XqFAAgKipK77qEhAT06NEDq1atwqlTpzB79my0a9cOR44c0ZvQRkVF4fvvv9dbFxFZIUFWJT09XQAQZ8+eFUIIsXjxYgFATJs2Tavc888/L3x9fbWWHTt2TAAQe/fu1bvP3NxcIYQQycnJIiQkRKdudV1NTZ8+XbRr104UFxdrLU9KShKenp6ipqbG4DhfffVVIZPJNG1sSqVSGVSvPuPHjxcODg7ixo0bmmW3b98W7u7u4tlnn9Xb5pbEfb9Vq1Zp9au+/U6aNElnXUvbpq+NWVlZAoDYsWOHQXGkpKRolikUCvHEE08ISZJEamqqZnlZWZlwdnYWycnJBsdqSD8+KN77vf/++wKAqKys1Fk3ZMgQ4ebmJkpLS4UQjcdO7969RVBQkLCzsxNlZWU626SkpAhnZ+eH1ktE1oGXe2zEG2+8ofU+Li4OJSUlqKioaLU6hRD48ssvMXbsWAghUFxcrHkNHz4cd+/eRXZ2tkFxqlQq7N+/H2PHjkXfvn116pQkyah61ZRKJY4cOYLx48ejU6dOmuXt2rXD5MmTcfLkSb191lr9e/9+DWmbs7OzZruGhgaUlJTgySefhJeXV7Ptb86MGTM038vlcvTt2xdCCEyfPl2z3MvLC+Hh4fjll18MjrW59prajyUlJbCzs4Obm5vW8oaGBpw8eRJjxozR3BwuSRLGjBmDO3fuIC4uTu+9Wd7e3rh3794DL+URkfXg5R4b0bFjR6336l/MZWVl8PDwaJU6i4qKUF5ejk2bNmHTpk16y6hv1m1pnEVFRaioqEBkZKRZ6226bU1NDcLDw3XWRUREQKVS4ddff0X37t0NittYYWFhOvG1tG337t3Dhx9+iPT0dOTn52uNSrl7965BcdzfPk9PTzg5OWkutzRdXlJSYnCszdXTWsdpcXEx6uvr8dRTT2kt79OnDwBg7NixerdT96G+e6+IyPowSbERcrlc73JhxHDK5n5BK5VKrffqibOmTJmC5ORkvdv07NlT67054jSmXlOZs3+bano2BDCsbX/84x+Rnp6OuXPnol+/fvD09IQkSUhKSjJ4UjN97XtYmy31+Tfl6+sLhUKByspKuLu7a5arR/rcfyyrz56o76m5X1lZGVxcXHQ+FyKyTkxSHkPe3t5656jIy8vTeu/v7w93d3colcpmR1cYyt/fHx4eHrh8+fIDyxhbr7+/P1xcXHDt2jWddVevXoVMJkOHDh0MjlsfY/4bN6RtX3zxBZKTk7VG3NTW1up8dq11VqA1Pn/AsHi7du0KoHGUT9OEyNvbG66urrh165ZW+YMHDwJoHJEUHR2ts7/c3FxEREQYEzYRWQDvSXkMde7cGXfv3sWlS5c0ywoKCrBv3z6tcnK5HH/4wx/w5Zdf6k0qioqKDK5bJpNh/PjxOHjwoN5ZdYUQJtUrl8sxbNgwHDhwQGtYa2FhIT7//HMMHDjQbJcdXF1dAcCgSdQMaZtcLtc5A7Fu3TqdM17GxGHuWA1hSLz9+vUDAL3HyqBBg7B//37N/S7V1dX47LPPAABnzpzRu7/s7Gz079/fqLiJ6NHjmZTHUFJSEubPn4/nn38ec+bMQU1NDTZs2ICnnnpK50bI1NRUHDt2DLGxsZg5cya6deuG0tJSZGdnIzMzE6WlpQbXv3z5chw5cgSDBg1CSkoKIiIiUFBQgL179+LkyZPw8vIyqd5ly5bhm2++wcCBAzFr1izY2dkhLS0NdXV1WLlypcHxNkf9n/p7772HpKQk2NvbY+zYsZo/ws1padvGjBmDnTt3wtPTE926dUNWVhYyMzPh6+trljhaojU+f0Pi7dSpEyIjI5GZmYlp06ZprZs3bx4GDx6M+Ph4TJs2DQcOHEBFRQVGjx6NDRs2IDQ0FJMnT9bs9/z58ygtLUViYqIRPUFEFvGohxPRgzU3BLmoqEhvuabDOFs6BFkIIY4cOSIiIyOFg4ODCA8PF59++qneIchCCFFYWCjeeust0aFDB2Fvby+CgoLE0KFDxaZNmzRlDIlTCCHy8vLEq6++Kvz9/YWjo6Po1KmTeOutt0RdXZ1B9TYnOztbDB8+XLi5uQkXFxcxePBgcerUKZ1yhsZ9v6VLl4r27dsLmUymVb65/RrStrKyMvHaa68JPz8/4ebmJoYPHy6uXr0qQkJCtIYJGxNHcnKycHV11Ylr0KBBonv37gbHamg/NhevPqtXrxZubm56h2Tv2rVLdOvWTRPX3r17xe3bt8Wzzz4rJEnS2u/8+fNFx44dNcPcicj6SULwQRZEZL3u3r2LTp06YeXKlVpDpg1RV1eH0NBQLFiwAG+//baZIySi1sJ7UojIqnl6emLevHlYtWqVwaOa1NLT02Fvb68zjwsRWTeeSSEiIiKrxDMpREREZJWYpBAREZFVYpJCREREVolJChEREVklJilERERklZikEBERkVVikkJERERWiUkKERERWSUmKURERGSVmKQQERGRVWKSQkRERFbp/wMeP0o3HlQD8wAAAABJRU5ErkJggg==\n", 970 | "text/plain": [ 971 | "
" 972 | ] 973 | }, 974 | "metadata": {} 975 | }, 976 | { 977 | "output_type": "execute_result", 978 | "data": { 979 | "text/plain": [ 980 | "" 981 | ] 982 | }, 983 | "metadata": {}, 984 | "execution_count": 241 985 | } 986 | ] 987 | } 988 | ] 989 | } -------------------------------------------------------------------------------- /data/ditella-crime-2004/CrimebyBlock.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/CrimebyBlock.dta -------------------------------------------------------------------------------- /data/ditella-crime-2004/MonthlyPanel.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/MonthlyPanel.dta -------------------------------------------------------------------------------- /data/ditella-crime-2004/README: -------------------------------------------------------------------------------- 1 | Data from 2 | 3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack 4 | Rafael Di Tella 5 | Ernesto Schargrodsky 6 | AMERICAN ECONOMIC REVIEW 7 | VOL. 94, NO. 1, MARCH 2004 8 | https://www.aeaweb.org/articles?id=10.1257/000282804322970733 9 | 10 | Also includes a notebook to pre-process the data for the tutorial. 11 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice. -------------------------------------------------------------------------------- /data/ditella-crime-2004/README~: -------------------------------------------------------------------------------- 1 | Data from 2 | 3 | Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack 4 | Rafael Di Tella 5 | Ernesto Schargrodsky 6 | AMERICAN ECONOMIC REVIEW 7 | VOL. 94, NO. 1, MARCH 2004 8 | 9 | Also includes a notebook to pre-process the data for the tutorial. 10 | Note: this pre-processing was chosen for simplicity, and should not be viewed as indicative of good statistical practice. -------------------------------------------------------------------------------- /data/ditella-crime-2004/WeeklyPanel.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vveitch/causality-tutorials/e08034512b38f938996acd7f6b60b5be996e99ba/data/ditella-crime-2004/WeeklyPanel.dta -------------------------------------------------------------------------------- /data/ditella-crime-2004/data_cleaning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "id": "35cf30ec", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# Data cleaning for demo of difference-in-differences estimation w/ machine learning methods\n", 11 | "# data from \"Do Police Reduce Crime? Estimates Using the Allocation of Police Forces After a Terrorist Attack\" Rafael Di Tella \n", 12 | "# https://www.aeaweb.org/articles?id=10.1257/000282804322970733" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 57, 18 | "id": "efebdb5e", 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "import pandas as pd\n", 23 | "import numpy as np\n", 24 | "import pyreadstat " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 66, 30 | "id": "59563136", 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "name": "stderr", 35 | "output_type": "stream", 36 | "text": [ 37 | ":1: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n", 38 | "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", 39 | " cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n", 40 | ":2: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n", 41 | "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", 42 | " panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "cbb, _ = pyreadstat.read_dta('CrimebyBlock.dta')\n", 48 | "panel, _ = pyreadstat.read_dta('MonthlyPanel.dta')" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 67, 54 | "id": "0fa52bad", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "panel = panel.merge(cbb[['observ','educjefe','ocupado']], on='observ') # education of head of household, unemployment rate" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 68, 64 | "id": "6c908a03", 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/html": [ 70 | "
\n", 71 | "\n", 84 | "\n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | "
observbarriocallealturainstitu1institu3distanciedpubestservbancototrobmeseducjefeocupado
0870.0OnceCordobaa23000.01.01.01.00.00.00.004.010.8466110.949495
1870.0OnceCordobaa23000.01.01.01.00.00.00.005.010.8466110.949495
2870.0OnceCordobaa23000.01.01.01.00.00.00.006.010.8466110.949495
3870.0OnceCordobaa23000.01.01.01.00.00.00.007.010.8466110.949495
4870.0OnceCordobaa23000.01.01.01.00.00.00.008.010.8466110.949495
.............................................
9631140.0BelgranoVirrey del Pinoa16000.00.03.00.00.00.00.2510.012.7719610.950423
9632140.0BelgranoVirrey del Pinoa16000.00.03.00.00.00.00.0011.012.7719610.950423
9633140.0BelgranoVirrey del Pinoa16000.00.03.00.00.00.00.0012.012.7719610.950423
9634140.0BelgranoVirrey del Pinoa16000.00.03.00.00.00.00.0072.012.7719610.950423
9635140.0BelgranoVirrey del Pinoa16000.00.03.00.00.00.00.0073.012.7719610.950423
\n", 294 | "

9636 rows × 14 columns

\n", 295 | "
" 296 | ], 297 | "text/plain": [ 298 | " observ barrio calle altura institu1 institu3 distanci \\\n", 299 | "0 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n", 300 | "1 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n", 301 | "2 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n", 302 | "3 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n", 303 | "4 870.0 Once Cordoba a2300 0.0 1.0 1.0 \n", 304 | "... ... ... ... ... ... ... ... \n", 305 | "9631 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n", 306 | "9632 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n", 307 | "9633 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n", 308 | "9634 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n", 309 | "9635 140.0 Belgrano Virrey del Pino a1600 0.0 0.0 3.0 \n", 310 | "\n", 311 | " edpub estserv banco totrob mes educjefe ocupado \n", 312 | "0 1.0 0.0 0.0 0.00 4.0 10.846611 0.949495 \n", 313 | "1 1.0 0.0 0.0 0.00 5.0 10.846611 0.949495 \n", 314 | "2 1.0 0.0 0.0 0.00 6.0 10.846611 0.949495 \n", 315 | "3 1.0 0.0 0.0 0.00 7.0 10.846611 0.949495 \n", 316 | "4 1.0 0.0 0.0 0.00 8.0 10.846611 0.949495 \n", 317 | "... ... ... ... ... ... ... ... \n", 318 | "9631 0.0 0.0 0.0 0.25 10.0 12.771961 0.950423 \n", 319 | "9632 0.0 0.0 0.0 0.00 11.0 12.771961 0.950423 \n", 320 | "9633 0.0 0.0 0.0 0.00 12.0 12.771961 0.950423 \n", 321 | "9634 0.0 0.0 0.0 0.00 72.0 12.771961 0.950423 \n", 322 | "9635 0.0 0.0 0.0 0.00 73.0 12.771961 0.950423 \n", 323 | "\n", 324 | "[9636 rows x 14 columns]" 325 | ] 326 | }, 327 | "execution_count": 68, 328 | "metadata": {}, 329 | "output_type": "execute_result" 330 | } 331 | ], 332 | "source": [ 333 | "panel" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 69, 339 | "id": "64777226", 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "panel = panel.drop(columns=['altura','institu3','distanci']) # unsure what these are" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 70, 349 | "id": "64ecee42", 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "english_translation = {'observ': 'block',\n", 354 | " 'barrio': 'neighbourhood',\n", 355 | " 'calle': 'street',\n", 356 | " 'institu1': 'jewish_insitute',\n", 357 | " 'edpub': 'public_institution',\n", 358 | " 'estserv': 'gas_station',\n", 359 | " 'banco': 'bank',\n", 360 | " 'totrob': 'car_thefts',\n", 361 | " 'mes': 'month',\n", 362 | " 'educjefe': 'education',\n", 363 | " 'ocupado': 'employment_rate'\n", 364 | " }\n", 365 | "panel = panel.rename(columns=english_translation)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 71, 371 | "id": "8e49470e", 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "panel.to_csv(\"DiTella_crime.csv\", index=False)" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": null, 381 | "id": "98cd4270", 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [] 385 | } 386 | ], 387 | "metadata": { 388 | "kernelspec": { 389 | "display_name": "Python 3", 390 | "language": "python", 391 | "name": "python3" 392 | }, 393 | "language_info": { 394 | "codemirror_mode": { 395 | "name": "ipython", 396 | "version": 3 397 | }, 398 | "file_extension": ".py", 399 | "mimetype": "text/x-python", 400 | "name": "python", 401 | "nbconvert_exporter": "python", 402 | "pygments_lexer": "ipython3", 403 | "version": "3.8.8" 404 | } 405 | }, 406 | "nbformat": 4, 407 | "nbformat_minor": 5 408 | } 409 | -------------------------------------------------------------------------------- /difference_in_differences.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "difference-in-differences.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true, 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | }, 16 | "language_info": { 17 | "name": "python" 18 | } 19 | }, 20 | "cells": [ 21 | { 22 | "cell_type": "markdown", 23 | "metadata": { 24 | "id": "view-in-github", 25 | "colab_type": "text" 26 | }, 27 | "source": [ 28 | "\"Open" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "QfZkNLUb4B-p" 35 | }, 36 | "source": [ 37 | "# Difference-in-Differences Estimation Tutorial\n", 38 | "\n", 39 | "A short example on how to estimate the difference-in-differences ATT with 2 period panel data using using machine learning methods.\n", 40 | "\n", 41 | "Data from this paper: https://www.aeaweb.org/articles?id=10.1257/000282804322970733\n", 42 | "\n", 43 | "In brief: following a terrorist attack on a synagogue in Buenos Aires, additional police officers were stationed on blocks containing Jewish institutions. This provides a natural experiment for the effect of policing on deterring crime. The data includes the number of car thefts in many city blocks the months before and after the increase in policing. Comparing the change in thefts for blocks with Jewish institutions (hence, increased police) to the other blocks gives a measurement. However, blocks with Jewish institutions may differ in significant ways---e.g., they may tend to be better educated or located in certain neighbourhoods. We want to use machine learning methods to control for such potential issues. " 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "metadata": { 49 | "id": "dS2X3Bq1-fxE" 50 | }, 51 | "source": [ 52 | "import numpy as np\n", 53 | "import pandas as pd\n", 54 | "import scipy as sp\n", 55 | "from sklearn import preprocessing\n", 56 | "from sklearn.linear_model import LinearRegression, LogisticRegression\n", 57 | "from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor\n", 58 | "from sklearn.model_selection import KFold, StratifiedKFold, train_test_split\n", 59 | "from sklearn.metrics import mean_squared_error, log_loss\n", 60 | "import sklearn\n", 61 | "import os" 62 | ], 63 | "execution_count": 197, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "metadata": { 69 | "id": "nxJ46X9cFJ9X" 70 | }, 71 | "source": [ 72 | "RANDOM_SEED=42\n", 73 | "np.random.seed(RANDOM_SEED)" 74 | ], 75 | "execution_count": 198, 76 | "outputs": [] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": { 81 | "id": "yPbJeayiEs3u" 82 | }, 83 | "source": [ 84 | "##Load and Format Data\n", 85 | "\n", 86 | "We reformat the data so that there is an \"outcome\" column equal to the difference in car thefts after and before the time period, a \"treatment\" column indictaing the presence of a jewish institute, and \"confounders\" denoting variables that may differ between jewish and non-jewish blocks, and which may also affect the change in crime rate. \n", 87 | "\n", 88 | "After doing this formatting, the estimation procedure is identical to computing the ATT with a regression adjustment" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "metadata": { 94 | "id": "2AC9TPko-hbt" 95 | }, 96 | "source": [ 97 | "panel = pd.read_csv('https://raw.githubusercontent.com/vveitch/causality-tutorials/main/data/ditella-crime-2004/DiTella_crime.csv')\n" 98 | ], 99 | "execution_count": 199, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "colab": { 106 | "base_uri": "https://localhost:8080/", 107 | "height": 203 108 | }, 109 | "id": "-A1LX6-t-hZD", 110 | "outputId": "b94e3198-8c72-423d-a01d-7fc26cc26856" 111 | }, 112 | "source": [ 113 | "panel.head()" 114 | ], 115 | "execution_count": 200, 116 | "outputs": [ 117 | { 118 | "output_type": "execute_result", 119 | "data": { 120 | "text/html": [ 121 | "
\n", 122 | "\n", 135 | "\n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | "
blockneighbourhoodstreetjewish_insitutepublic_institutiongas_stationbankcar_theftsmontheducationemployment_rate
0870.0OnceCordoba0.01.00.00.00.04.010.8466110.949495
1870.0OnceCordoba0.01.00.00.00.05.010.8466110.949495
2870.0OnceCordoba0.01.00.00.00.06.010.8466110.949495
3870.0OnceCordoba0.01.00.00.00.07.010.8466110.949495
4870.0OnceCordoba0.01.00.00.00.08.010.8466110.949495
\n", 225 | "
" 226 | ], 227 | "text/plain": [ 228 | " block neighbourhood street ... month education employment_rate\n", 229 | "0 870.0 Once Cordoba ... 4.0 10.846611 0.949495\n", 230 | "1 870.0 Once Cordoba ... 5.0 10.846611 0.949495\n", 231 | "2 870.0 Once Cordoba ... 6.0 10.846611 0.949495\n", 232 | "3 870.0 Once Cordoba ... 7.0 10.846611 0.949495\n", 233 | "4 870.0 Once Cordoba ... 8.0 10.846611 0.949495\n", 234 | "\n", 235 | "[5 rows x 11 columns]" 236 | ] 237 | }, 238 | "metadata": {}, 239 | "execution_count": 200 240 | } 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "zUpUVabW59bS" 247 | }, 248 | "source": [ 249 | "# Terrorist attack occurred in July 18, and increased police presence begins July 25. Data before this is before period, and after is after period \n", 250 | "first_period = panel['month'].isin([4., 5., 6., 71.])\n", 251 | "panel['first_period']=first_period" 252 | ], 253 | "execution_count": 201, 254 | "outputs": [] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "metadata": { 259 | "id": "D-8dY5W8Q-PJ" 260 | }, 261 | "source": [ 262 | "# code neighbourhood as integer for later convenience\n", 263 | "panel['neighbourhood']=panel['neighbourhood'].astype('category').cat.codes" 264 | ], 265 | "execution_count": 202, 266 | "outputs": [] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "metadata": { 271 | "colab": { 272 | "base_uri": "https://localhost:8080/", 273 | "height": 447 274 | }, 275 | "id": "ZBR4R-90DmRn", 276 | "outputId": "a121111a-1d58-4426-dd0c-354c147453d6" 277 | }, 278 | "source": [ 279 | "# We need to reduce the multiple before and after months in some fashion\n", 280 | "# There is not a clear canonical way to do this, but an average seems reasonable\n", 281 | "panel = panel.groupby(['block', 'first_period']).mean()\n", 282 | "panel = panel.reset_index(level='first_period')\n", 283 | "panel" 284 | ], 285 | "execution_count": 203, 286 | "outputs": [ 287 | { 288 | "output_type": "execute_result", 289 | "data": { 290 | "text/html": [ 291 | "
\n", 292 | "\n", 305 | "\n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | "
first_periodneighbourhoodjewish_insitutepublic_institutiongas_stationbankcar_theftsmontheducationemployment_rate
block
1.0False00.00.00.00.00.00000025.2511.9198890.926594
1.0True00.00.00.00.00.0000005.0011.9198890.926594
2.0False00.00.00.00.00.15625025.2511.9198890.926594
2.0True00.00.00.00.00.0000005.0011.9198890.926594
3.0False00.00.00.00.00.03125025.2511.9198890.926594
.................................
874.0True10.00.00.00.00.0000005.0010.8984850.939759
875.0False10.00.00.00.00.00000025.2510.8984850.939759
875.0True10.00.00.00.00.0833335.0010.8984850.939759
876.0False10.00.00.00.00.00000025.2510.8984850.939759
876.0True10.00.00.00.00.0833335.0010.8984850.939759
\n", 480 | "

1752 rows × 10 columns

\n", 481 | "
" 482 | ], 483 | "text/plain": [ 484 | " first_period neighbourhood ... education employment_rate\n", 485 | "block ... \n", 486 | "1.0 False 0 ... 11.919889 0.926594\n", 487 | "1.0 True 0 ... 11.919889 0.926594\n", 488 | "2.0 False 0 ... 11.919889 0.926594\n", 489 | "2.0 True 0 ... 11.919889 0.926594\n", 490 | "3.0 False 0 ... 11.919889 0.926594\n", 491 | "... ... ... ... ... ...\n", 492 | "874.0 True 1 ... 10.898485 0.939759\n", 493 | "875.0 False 1 ... 10.898485 0.939759\n", 494 | "875.0 True 1 ... 10.898485 0.939759\n", 495 | "876.0 False 1 ... 10.898485 0.939759\n", 496 | "876.0 True 1 ... 10.898485 0.939759\n", 497 | "\n", 498 | "[1752 rows x 10 columns]" 499 | ] 500 | }, 501 | "metadata": {}, 502 | "execution_count": 203 503 | } 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "metadata": { 509 | "id": "APOqpHmrOGzo", 510 | "colab": { 511 | "base_uri": "https://localhost:8080/" 512 | }, 513 | "outputId": "397aa5fc-b7c1-4617-dc99-765a18ceda5f" 514 | }, 515 | "source": [ 516 | "# now create a version of the data w/ \"outcome\" = after - before thefts, and \n", 517 | "compact_df = panel[~panel['first_period']]\n", 518 | "car_thefts = panel['car_thefts'].values\n", 519 | "compact_df['Y1-Y0']=car_thefts[~panel['first_period']] - car_thefts[panel['first_period']]" 520 | ], 521 | "execution_count": 204, 522 | "outputs": [ 523 | { 524 | "output_type": "stream", 525 | "name": "stderr", 526 | "text": [ 527 | "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n", 528 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 529 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 530 | "\n", 531 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 532 | " after removing the cwd from sys.path.\n" 533 | ] 534 | } 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "metadata": { 540 | "id": "uN-97eQ3FvW5" 541 | }, 542 | "source": [ 543 | "# format this in a manner sympatico with ATT estimation\n", 544 | "compact_df = compact_df.reset_index()\n", 545 | "\n", 546 | "outcome = compact_df['Y1-Y0']\n", 547 | "treatment = compact_df['jewish_insitute']\n", 548 | "confounders = compact_df[['neighbourhood','public_institution', 'gas_station', 'bank', 'education', 'employment_rate']]" 549 | ], 550 | "execution_count": 205, 551 | "outputs": [] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "metadata": { 556 | "colab": { 557 | "base_uri": "https://localhost:8080/" 558 | }, 559 | "id": "cYl3bRQ4HliO", 560 | "outputId": "7379f923-f388-400e-8557-3848238cc675" 561 | }, 562 | "source": [ 563 | "# finally, do some light data cleaning\n", 564 | "treatment=treatment.astype(int)\n", 565 | "\n", 566 | "# scale continuous covariates\n", 567 | "cont_vars = ['education', 'employment_rate']\n", 568 | "confounders[cont_vars] = preprocessing.scale(confounders[cont_vars])\n", 569 | "\n" 570 | ], 571 | "execution_count": 206, 572 | "outputs": [ 573 | { 574 | "output_type": "stream", 575 | "name": "stderr", 576 | "text": [ 577 | "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n", 578 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 579 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 580 | "\n", 581 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 582 | " \n", 583 | "/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py:1734: SettingWithCopyWarning: \n", 584 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 585 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 586 | "\n", 587 | "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", 588 | " isetter(loc, value[:, i].tolist())\n" 589 | ] 590 | } 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": { 596 | "id": "C576dWRsa3ad" 597 | }, 598 | "source": [ 599 | "## Specify Nuisance Function Models\n", 600 | "\n", 601 | "The next step is to specify models for the conditional expected outcome and propensity score" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "metadata": { 607 | "colab": { 608 | "base_uri": "https://localhost:8080/" 609 | }, 610 | "id": "qyOhSZRQRb8W", 611 | "outputId": "63ed01b0-48af-41b4-d878-909b06470000" 612 | }, 613 | "source": [ 614 | "# specify a model for the conditional expected outcome\n", 615 | "\n", 616 | "# TODO(victorveitch) the covariates have basically no predictive power, replace this example with something better\n", 617 | "\n", 618 | "# make a function that returns a sklearn model for later use in k-folding\n", 619 | "def make_Q_model():\n", 620 | " # return LinearRegression()\n", 621 | " return RandomForestRegressor(random_state=RANDOM_SEED, n_estimators=100, max_depth=2)\n", 622 | "Q_model = make_Q_model()\n", 623 | "\n", 624 | "# Sanity check that chosen model actually improves test error\n", 625 | "# A real analysis should give substantial attention to model selection and validation \n", 626 | "\n", 627 | "X_w_treatment = confounders.copy()\n", 628 | "X_w_treatment[\"treatment\"] = treatment\n", 629 | "\n", 630 | "X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)\n", 631 | "Q_model.fit(X_train, y_train)\n", 632 | "y_pred = Q_model.predict(X_test)\n", 633 | "\n", 634 | "test_mse=mean_squared_error(y_pred, y_test)\n", 635 | "print(f\"Test MSE of fit model {test_mse}\") \n", 636 | "baseline_mse=mean_squared_error(y_train.mean()*np.ones_like(y_test), y_test)\n", 637 | "print(f\"Test MSE of no-covariate model {baseline_mse}\")" 638 | ], 639 | "execution_count": 207, 640 | "outputs": [ 641 | { 642 | "output_type": "stream", 643 | "name": "stdout", 644 | "text": [ 645 | "Test MSE of fit model 0.027801530904389606\n", 646 | "Test MSE of no-covariate model 0.028564516759592096\n" 647 | ] 648 | } 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "metadata": { 654 | "colab": { 655 | "base_uri": "https://localhost:8080/" 656 | }, 657 | "id": "uq6eZEBXbsaI", 658 | "outputId": "5b82bcee-03a4-48db-8a16-8c68168245b4" 659 | }, 660 | "source": [ 661 | "# specify a model for the propensity score\n", 662 | "\n", 663 | "def make_g_model():\n", 664 | "# return LogisticRegression(max_iter=1000)\n", 665 | " return RandomForestClassifier(n_estimators=100, max_depth=2)\n", 666 | "\n", 667 | "g_model = make_g_model()\n", 668 | "# Sanity check that chosen model actually improves test error\n", 669 | "# A real analysis should give substantial attention to model selection and validation \n", 670 | "\n", 671 | "X_train, X_test, a_train, a_test = train_test_split(confounders, treatment, test_size=0.2)\n", 672 | "g_model.fit(X_train, a_train)\n", 673 | "a_pred = g_model.predict_proba(X_test)[:,1]\n", 674 | "\n", 675 | "test_ce=log_loss(a_test, a_pred)\n", 676 | "print(f\"Test CE of fit model {test_ce}\") \n", 677 | "baseline_ce=log_loss(a_test, a_train.mean()*np.ones_like(a_test))\n", 678 | "print(f\"Test CE of no-covariate model {baseline_ce}\")" 679 | ], 680 | "execution_count": 208, 681 | "outputs": [ 682 | { 683 | "output_type": "stream", 684 | "name": "stdout", 685 | "text": [ 686 | "Test CE of fit model 0.1597166570168377\n", 687 | "Test CE of no-covariate model 0.16733990853941555\n" 688 | ] 689 | } 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": { 695 | "id": "2RkvV_4_dFWo" 696 | }, 697 | "source": [ 698 | "## Use cross fitting to get get predicted outcomes and propensity scores for each unit" 699 | ] 700 | }, 701 | { 702 | "cell_type": "code", 703 | "metadata": { 704 | "id": "KA0AsEGJ_X3b" 705 | }, 706 | "source": [ 707 | "# helper functions to implement the cross fitting\n", 708 | "\n", 709 | "def treatment_k_fold_fit_and_predict(make_model, X:pd.DataFrame, A:np.array, n_splits:int):\n", 710 | " \"\"\"\n", 711 | " Implements K fold cross-fitting for the model predicting the treatment A. \n", 712 | " That is, \n", 713 | " 1. Split data into K folds\n", 714 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 715 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 716 | " Returns an array containing the predictions \n", 717 | "\n", 718 | " Args:\n", 719 | " model: function that returns sklearn model (which implements fit and predict_prob)\n", 720 | " X: dataframe of variables to adjust for\n", 721 | " A: array of treatments\n", 722 | " n_splits: number of splits to use\n", 723 | " \"\"\"\n", 724 | " predictions = np.full_like(A, np.nan, dtype=float)\n", 725 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 726 | " \n", 727 | " for train_index, test_index in kf.split(X, A):\n", 728 | " X_train = X.loc[train_index]\n", 729 | " A_train = A.loc[train_index]\n", 730 | " g = make_model()\n", 731 | " g.fit(X_train, A_train)\n", 732 | "\n", 733 | " # get predictions for split\n", 734 | " predictions[test_index] = g.predict_proba(X.loc[test_index])[:, 1]\n", 735 | "\n", 736 | " assert np.isnan(predictions).sum() == 0\n", 737 | " return predictions\n", 738 | "\n", 739 | "\n", 740 | "def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):\n", 741 | " \"\"\"\n", 742 | " Implements K fold cross-fitting for the model predicting the outcome Y. \n", 743 | " That is, \n", 744 | " 1. Split data into K folds\n", 745 | " 2. For each fold j, the model is fit on the other K-1 folds\n", 746 | " 3. The fitted model is used to make predictions for each data point in fold j\n", 747 | " Returns two arrays containing the predictions for all units untreated, all units treated \n", 748 | "\n", 749 | " Args:\n", 750 | " model: function that returns sklearn model (that implements fit and either predict_prob or predict)\n", 751 | " X: dataframe of variables to adjust for\n", 752 | " y: array of outcomes\n", 753 | " A: array of treatments\n", 754 | " n_splits: number of splits to use\n", 755 | " output_type: type of outcome, \"binary\" or \"continuous\"\n", 756 | "\n", 757 | " \"\"\"\n", 758 | " predictions0 = np.full_like(A, np.nan, dtype=float)\n", 759 | " predictions1 = np.full_like(y, np.nan, dtype=float)\n", 760 | " if output_type == 'binary':\n", 761 | " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 762 | " elif output_type == 'continuous':\n", 763 | " kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)\n", 764 | "\n", 765 | " # include the treatment as input feature\n", 766 | " X_w_treatment = X.copy()\n", 767 | " X_w_treatment[\"A\"] = A\n", 768 | "\n", 769 | " # for predicting effect under treatment / control status for each data point \n", 770 | " X0 = X_w_treatment.copy()\n", 771 | " X0[\"A\"] = 0\n", 772 | " X1 = X_w_treatment.copy()\n", 773 | " X1[\"A\"] = 1\n", 774 | "\n", 775 | " \n", 776 | " for train_index, test_index in kf.split(X_w_treatment, y):\n", 777 | " X_train = X_w_treatment.loc[train_index]\n", 778 | " y_train = y.loc[train_index]\n", 779 | " q = make_model()\n", 780 | " q.fit(X_train, y_train)\n", 781 | "\n", 782 | " if output_type =='binary':\n", 783 | " predictions0[test_index] = q.predict_proba(X0.loc[test_index])[:, 1]\n", 784 | " predictions1[test_index] = q.predict_proba(X1.loc[test_index])[:, 1]\n", 785 | " elif output_type == 'continuous':\n", 786 | " predictions0[test_index] = q.predict(X0.loc[test_index])\n", 787 | " predictions1[test_index] = q.predict(X1.loc[test_index])\n", 788 | "\n", 789 | " assert np.isnan(predictions0).sum() == 0\n", 790 | " assert np.isnan(predictions1).sum() == 0\n", 791 | " return predictions0, predictions1" 792 | ], 793 | "execution_count": 209, 794 | "outputs": [] 795 | }, 796 | { 797 | "cell_type": "code", 798 | "metadata": { 799 | "id": "wVcE6pRQeMNf" 800 | }, 801 | "source": [ 802 | "g = treatment_k_fold_fit_and_predict(make_g_model, X=confounders, A=treatment, n_splits=10)" 803 | ], 804 | "execution_count": 210, 805 | "outputs": [] 806 | }, 807 | { 808 | "cell_type": "code", 809 | "metadata": { 810 | "id": "GLEHlLLdWSh9" 811 | }, 812 | "source": [ 813 | "Q0,Q1=outcome_k_fold_fit_and_predict(make_Q_model, X=confounders, y=outcome, A=treatment, n_splits=10, output_type=\"continuous\")" 814 | ], 815 | "execution_count": 211, 816 | "outputs": [] 817 | }, 818 | { 819 | "cell_type": "code", 820 | "metadata": { 821 | "colab": { 822 | "base_uri": "https://localhost:8080/", 823 | "height": 203 824 | }, 825 | "id": "_NVCV0q0g8wQ", 826 | "outputId": "625e4e9d-8ea4-4e57-c684-6037a4ce3b3f" 827 | }, 828 | "source": [ 829 | "data_and_nuisance_estimates = pd.DataFrame({'g': g, 'Q0': Q0, 'Q1': Q1, 'A': treatment, 'Y': outcome})\n", 830 | "data_and_nuisance_estimates.head()" 831 | ], 832 | "execution_count": 212, 833 | "outputs": [ 834 | { 835 | "output_type": "execute_result", 836 | "data": { 837 | "text/html": [ 838 | "
\n", 839 | "\n", 852 | "\n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | "
gQ0Q1AY
00.027920-0.065413-0.13339700.000000
10.027276-0.070393-0.11887800.156250
20.028456-0.076041-0.1422260-0.302083
30.028456-0.065413-0.13339700.062500
40.025655-0.020747-0.08544900.062500
\n", 906 | "
" 907 | ], 908 | "text/plain": [ 909 | " g Q0 Q1 A Y\n", 910 | "0 0.027920 -0.065413 -0.133397 0 0.000000\n", 911 | "1 0.027276 -0.070393 -0.118878 0 0.156250\n", 912 | "2 0.028456 -0.076041 -0.142226 0 -0.302083\n", 913 | "3 0.028456 -0.065413 -0.133397 0 0.062500\n", 914 | "4 0.025655 -0.020747 -0.085449 0 0.062500" 915 | ] 916 | }, 917 | "metadata": {}, 918 | "execution_count": 212 919 | } 920 | ] 921 | }, 922 | { 923 | "cell_type": "markdown", 924 | "metadata": { 925 | "id": "VNhM7URdgzQB" 926 | }, 927 | "source": [ 928 | "## Combine predicted values and data into estimate of ATT" 929 | ] 930 | }, 931 | { 932 | "cell_type": "code", 933 | "metadata": { 934 | "id": "J-vONC5ejwh2" 935 | }, 936 | "source": [ 937 | "def att_aiptw(Q0, Q1, g, A, Y, prob_t=None):\n", 938 | " \"\"\"\n", 939 | " # Double ML estimator for the ATT\n", 940 | " This uses the ATT specific scores, see equation 3.9 of https://www.econstor.eu/bitstream/10419/149795/1/869216953.pdf\n", 941 | " \"\"\"\n", 942 | "\n", 943 | " if prob_t is None:\n", 944 | " prob_t = A.mean() # estimate marginal probability of treatment\n", 945 | "\n", 946 | " tau_hat = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0)).mean()/ prob_t\n", 947 | " \n", 948 | " scores = (A*(Y-Q0) - (1-A)*(g/(1-g))*(Y-Q0) - tau_hat*A) / prob_t\n", 949 | " n = Y.shape[0] # number of observations\n", 950 | " std_hat = np.std(scores) / np.sqrt(n)\n", 951 | "\n", 952 | " return tau_hat, std_hat\n" 953 | ], 954 | "execution_count": 213, 955 | "outputs": [] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "metadata": { 960 | "colab": { 961 | "base_uri": "https://localhost:8080/" 962 | }, 963 | "id": "SjDj0F9Bm9uq", 964 | "outputId": "bfbca9bb-c2e0-4171-d65f-bebb71fd0da1" 965 | }, 966 | "source": [ 967 | "tau_hat, std_hat = att_aiptw(**data_and_nuisance_estimates)\n", 968 | "print(f\"The estimate is {tau_hat} pm {1.96*std_hat}\")" 969 | ], 970 | "execution_count": 214, 971 | "outputs": [ 972 | { 973 | "output_type": "stream", 974 | "name": "stdout", 975 | "text": [ 976 | "The estimate is -0.0777691984649497 pm 0.05810535308191231\n" 977 | ] 978 | } 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "metadata": { 984 | "colab": { 985 | "base_uri": "https://localhost:8080/" 986 | }, 987 | "id": "R3YqKD60UElw", 988 | "outputId": "b4dad931-c970-429e-8c83-ece9db655c9f" 989 | }, 990 | "source": [ 991 | "# for comparison, the point estimate without any covariate correction\n", 992 | "outcome[treatment==1].mean()-outcome[treatment==0].mean()" 993 | ], 994 | "execution_count": 215, 995 | "outputs": [ 996 | { 997 | "output_type": "execute_result", 998 | "data": { 999 | "text/plain": [ 1000 | "-0.06683773314434818" 1001 | ] 1002 | }, 1003 | "metadata": {}, 1004 | "execution_count": 215 1005 | } 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "code", 1010 | "metadata": { 1011 | "id": "37ep7LyGUHH9" 1012 | }, 1013 | "source": [ 1014 | "" 1015 | ], 1016 | "execution_count": 215, 1017 | "outputs": [] 1018 | } 1019 | ] 1020 | } --------------------------------------------------------------------------------