├── .gitignore ├── 1.2 - Simpsons Paradox.ipynb ├── 1.3 - Probability and Statistics.ipynb ├── 1.4 - Graphs.ipynb ├── 1.5 - Structural Causal Models.ipynb ├── 2.2 - Chains and Forks.ipynb ├── 2.3 - Colliders.ipynb ├── 2.4 - d-separation.ipynb ├── 2.5 - Model Testing and Causal Search.ipynb ├── 3.1 - Interventions.ipynb ├── 3.2 - The Adjustment Formula.ipynb ├── 3.3 - Backdoor Criterion.ipynb ├── 3.4 - Front-Door Criterion.ipynb ├── 3.5 - Conditional Interventions and Covariate-Specific Effects.ipynb ├── 3.6 - Inverse Probability Weighing.ipynb ├── 3.7 - Mediation.ipynb ├── CausalModel.py ├── LICENSE ├── README.md ├── d4sci.mplstyle ├── dags ├── Causality.Fig.1.2.dot ├── Primer.Fig.1.10.dot ├── Primer.Fig.1.6.dot ├── Primer.Fig.1.7a.dot ├── Primer.Fig.1.8.dot ├── Primer.Fig.1.9.dot ├── Primer.Fig.2.1.dot ├── Primer.Fig.2.2.dot ├── Primer.Fig.2.3.dot ├── Primer.Fig.2.5.dot ├── Primer.Fig.2.6.dot ├── Primer.Fig.2.7.dot ├── Primer.Fig.2.8.dot ├── Primer.Fig.2.9.dot ├── Primer.Fig.3.1.dot ├── Primer.Fig.3.10b.dot ├── Primer.Fig.3.11.dot ├── Primer.Fig.3.12.dot ├── Primer.Fig.3.2.dot ├── Primer.Fig.3.3.dot ├── Primer.Fig.3.4.dot ├── Primer.Fig.3.5.dot ├── Primer.Fig.3.6.dot ├── Primer.Fig.3.7.dot ├── Primer.Fig.3.8.dot └── Primer.SCM.1.5.3.dot ├── data ├── D4Sci_logo_ball.png ├── D4Sci_logo_full.png ├── book2.jpeg ├── causality.jpeg ├── iris.csv └── newsletter.png └── environment.yml /.gitignore: -------------------------------------------------------------------------------- 1 | figures/ 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /1.3 - Probability and Statistics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
\n", 8 | "
\"Data
\n", 9 | "

Causal Inference In Statistics - A Primer

\n", 10 | "

1.3 Probability and Statistics

\n", 11 | "

Bruno Gonçalves
\n", 12 | " www.data4sci.com
\n", 13 | " @bgoncalves, @data4sci

\n", 14 | "

\n", 15 | "

\n", 16 | "
" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from collections import Counter\n", 26 | "from pprint import pprint\n", 27 | "\n", 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "import matplotlib.pyplot as plt \n", 31 | "\n", 32 | "import watermark\n", 33 | "\n", 34 | "%load_ext watermark\n", 35 | "%matplotlib inline" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "We start by print out the versions of the libraries we're using for future reference" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 2, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "watermark 2.0.2\n", 55 | "json 2.0.9\n", 56 | "numpy 1.18.1\n", 57 | "autopep8 1.5\n", 58 | "pandas 1.0.1\n", 59 | "Fri Jul 24 2020 \n", 60 | "\n", 61 | "CPython 3.7.3\n", 62 | "IPython 6.2.1\n", 63 | "\n", 64 | "compiler : Clang 4.0.1 (tags/RELEASE_401/final)\n", 65 | "system : Darwin\n", 66 | "release : 19.5.0\n", 67 | "machine : x86_64\n", 68 | "processor : i386\n", 69 | "CPU cores : 8\n", 70 | "interpreter: 64bit\n", 71 | "Git hash : 7bb21d92754ca48aa8a201cf2e70b750085be46b\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "%watermark -n -v -m -g -iv" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Load default figure style" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 3, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "plt.style.use('./d4sci.mplstyle')" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## Probability" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 4, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "data = pd.DataFrame({\n", 109 | " 'Gender': ['Male', 'Male', 'Male', 'Male', 'Female', 'Female', 'Female', 'Female'],\n", 110 | " 'Education': ['No High School', 'High School', 'College', 'Grad School', 'No High School', 'High School', 'College', 'Grad School'],\n", 111 | " 'Occurrence': [112, 231, 595, 242, 136, 189, 763, 172]\n", 112 | "})" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 5, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "data.set_index(['Gender', 'Education'], inplace=True)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "data": { 131 | "text/plain": [ 132 | "Occurrence 2440\n", 133 | "dtype: int64" 134 | ] 135 | }, 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "data.sum()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 7, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "P = data/data.sum()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 8, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\n", 176 | "\n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | "
Occurrence
GenderEducation
MaleNo High School0.045902
High School0.094672
College0.243852
Grad School0.099180
FemaleNo High School0.055738
High School0.077459
College0.312705
Grad School0.070492
\n", 226 | "
" 227 | ], 228 | "text/plain": [ 229 | " Occurrence\n", 230 | "Gender Education \n", 231 | "Male No High School 0.045902\n", 232 | " High School 0.094672\n", 233 | " College 0.243852\n", 234 | " Grad School 0.099180\n", 235 | "Female No High School 0.055738\n", 236 | " High School 0.077459\n", 237 | " College 0.312705\n", 238 | " Grad School 0.070492" 239 | ] 240 | }, 241 | "execution_count": 8, 242 | "metadata": {}, 243 | "output_type": "execute_result" 244 | } 245 | ], 246 | "source": [ 247 | "P" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 9, 253 | "metadata": {}, 254 | "outputs": [ 255 | { 256 | "data": { 257 | "text/plain": [ 258 | "Occurrence 0.172131\n", 259 | "dtype: float64" 260 | ] 261 | }, 262 | "execution_count": 9, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "P.loc[(slice(None), 'High School'), :].sum()" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 10, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": [ 279 | "Occurrence 0.611066\n", 280 | "dtype: float64" 281 | ] 282 | }, 283 | "execution_count": 10, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "P.loc['Female'].sum() + P.loc[(slice(None), 'High School'), :].sum() - P.loc[('Female', 'High School')]" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 11, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "Occurrence 231\n", 301 | "Name: (Male, High School), dtype: int64" 302 | ] 303 | }, 304 | "execution_count": 11, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "data.loc[('Male', 'High School')]" 311 | ] 312 | }, 313 | { 314 | "cell_type": "code", 315 | "execution_count": 12, 316 | "metadata": {}, 317 | "outputs": [], 318 | "source": [ 319 | "P_Female = data.loc['Female'].sum()/data.sum()" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": 13, 325 | "metadata": {}, 326 | "outputs": [ 327 | { 328 | "data": { 329 | "text/plain": [ 330 | "Occurrence 0.611066\n", 331 | "dtype: float64" 332 | ] 333 | }, 334 | "execution_count": 13, 335 | "metadata": {}, 336 | "output_type": "execute_result" 337 | } 338 | ], 339 | "source": [ 340 | "(data.loc[('Male', 'High School')] + data.loc['Female'].sum())/data.sum()" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 14, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "idx = pd.IndexSlice" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 15, 355 | "metadata": {}, 356 | "outputs": [ 357 | { 358 | "data": { 359 | "text/plain": [ 360 | "Occurrence 0.172131\n", 361 | "dtype: float64" 362 | ] 363 | }, 364 | "execution_count": 15, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "data.loc[(slice(None), 'High School'), :].sum()/data.sum()" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": 16, 376 | "metadata": {}, 377 | "outputs": [ 378 | { 379 | "data": { 380 | "text/plain": [ 381 | "Occurrence 0.077459\n", 382 | "dtype: float64" 383 | ] 384 | }, 385 | "execution_count": 16, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "data.loc[('Female', 'High School')]/data.sum()" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 17, 397 | "metadata": {}, 398 | "outputs": [], 399 | "source": [ 400 | "table = pd.pivot_table(data.reset_index(), index='Gender', columns='Education')" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 18, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "data": { 410 | "text/html": [ 411 | "
\n", 412 | "\n", 429 | "\n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | "
Occurrence
EducationCollegeGrad SchoolHigh SchoolNo High School
Gender
Female763172189136
Male595242231112
\n", 467 | "
" 468 | ], 469 | "text/plain": [ 470 | " Occurrence \n", 471 | "Education College Grad School High School No High School\n", 472 | "Gender \n", 473 | "Female 763 172 189 136\n", 474 | "Male 595 242 231 112" 475 | ] 476 | }, 477 | "execution_count": 18, 478 | "metadata": {}, 479 | "output_type": "execute_result" 480 | } 481 | ], 482 | "source": [ 483 | "table" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 19, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [ 492 | "table.loc['Total'] = table.sum()\n", 493 | "table['Total'] = table.sum(axis=1)" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 20, 499 | "metadata": {}, 500 | "outputs": [ 501 | { 502 | "data": { 503 | "text/html": [ 504 | "
\n", 505 | "\n", 522 | "\n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | "
OccurrenceTotal
EducationCollegeGrad SchoolHigh SchoolNo High School
Gender
Female7631721891361260
Male5952422311121180
Total13584144202482440
\n", 573 | "
" 574 | ], 575 | "text/plain": [ 576 | " Occurrence Total\n", 577 | "Education College Grad School High School No High School \n", 578 | "Gender \n", 579 | "Female 763 172 189 136 1260\n", 580 | "Male 595 242 231 112 1180\n", 581 | "Total 1358 414 420 248 2440" 582 | ] 583 | }, 584 | "execution_count": 20, 585 | "metadata": {}, 586 | "output_type": "execute_result" 587 | } 588 | ], 589 | "source": [ 590 | "table" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": {}, 596 | "source": [ 597 | "
\n", 598 | " \"Data \n", 599 | "
" 600 | ] 601 | } 602 | ], 603 | "metadata": { 604 | "kernelspec": { 605 | "display_name": "Python 3", 606 | "language": "python", 607 | "name": "python3" 608 | }, 609 | "language_info": { 610 | "codemirror_mode": { 611 | "name": "ipython", 612 | "version": 3 613 | }, 614 | "file_extension": ".py", 615 | "mimetype": "text/x-python", 616 | "name": "python", 617 | "nbconvert_exporter": "python", 618 | "pygments_lexer": "ipython3", 619 | "version": "3.8.5" 620 | }, 621 | "toc": { 622 | "base_numbering": 1, 623 | "nav_menu": {}, 624 | "number_sections": true, 625 | "sideBar": true, 626 | "skip_h1_title": true, 627 | "title_cell": "Table of Contents", 628 | "title_sidebar": "Contents", 629 | "toc_cell": false, 630 | "toc_position": {}, 631 | "toc_section_display": true, 632 | "toc_window_display": false 633 | }, 634 | "varInspector": { 635 | "cols": { 636 | "lenName": 16, 637 | "lenType": 16, 638 | "lenVar": 40 639 | }, 640 | "kernels_config": { 641 | "python": { 642 | "delete_cmd_postfix": "", 643 | "delete_cmd_prefix": "del ", 644 | "library": "var_list.py", 645 | "varRefreshCmd": "print(var_dic_list())" 646 | }, 647 | "r": { 648 | "delete_cmd_postfix": ") ", 649 | "delete_cmd_prefix": "rm(", 650 | "library": "var_list.r", 651 | "varRefreshCmd": "cat(var_dic_list()) " 652 | } 653 | }, 654 | "types_to_exclude": [ 655 | "module", 656 | "function", 657 | "builtin_function_or_method", 658 | "instance", 659 | "_Feature" 660 | ], 661 | "window_display": false 662 | } 663 | }, 664 | "nbformat": 4, 665 | "nbformat_minor": 2 666 | } 667 | -------------------------------------------------------------------------------- /3.6 - Inverse Probability Weighing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
\n", 8 | "
\"Data
\n", 9 | "

Causal Inference In Statistics - A Primer

\n", 10 | "

3.6 Inverse Probability Weighing

\n", 11 | "

Bruno Gonçalves
\n", 12 | " www.data4sci.com
\n", 13 | " @bgoncalves, @data4sci

\n", 14 | "

\n", 15 | "

\n", 16 | "
" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from collections import Counter\n", 26 | "from pprint import pprint\n", 27 | "\n", 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "\n", 31 | "import matplotlib\n", 32 | "import matplotlib.pyplot as plt \n", 33 | "\n", 34 | "from CausalModel import CausalModel\n", 35 | "\n", 36 | "import watermark\n", 37 | "\n", 38 | "%load_ext watermark\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "We start by print out the versions of the libraries we're using for future reference" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Python implementation: CPython\n", 59 | "Python version : 3.8.5\n", 60 | "IPython version : 7.19.0\n", 61 | "\n", 62 | "Compiler : Clang 10.0.0 \n", 63 | "OS : Darwin\n", 64 | "Release : 20.2.0\n", 65 | "Machine : x86_64\n", 66 | "Processor : i386\n", 67 | "CPU cores : 16\n", 68 | "Architecture: 64bit\n", 69 | "\n", 70 | "Git hash: 6ba5a323994ecc2dbeee8be27219dbf5207f8dfb\n", 71 | "\n", 72 | "numpy : 1.19.2\n", 73 | "matplotlib: 3.3.2\n", 74 | "watermark : 2.1.0\n", 75 | "pandas : 1.1.3\n", 76 | "\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "%watermark -n -v -m -g -iv" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Load default figure style" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 3, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "plt.style.use('./d4sci.mplstyle')\n", 98 | "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "We load the DAG from Fig 1.10 for ease of reference" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "G = CausalModel('dags/Primer.Fig.1.10.dot')" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 5, 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "data": { 124 | "image/png": "\n", 125 | "text/plain": [ 126 | "
" 127 | ] 128 | }, 129 | "metadata": {}, 130 | "output_type": "display_data" 131 | } 132 | ], 133 | "source": [ 134 | "fig, ax = plt.subplots(1, figsize=(2.2,2.2))\n", 135 | "G.plot(ax=ax)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "In our example, the data for the joint probability $P\\left(X, Y, Z\\right)$ is:" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 6, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "PXYZ = pd.DataFrame({\n", 152 | " 'X': ['Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'No', 'No'],\n", 153 | " 'Y': ['Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No'],\n", 154 | " 'Z': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],\n", 155 | " 'Prob': [0.116, 0.274, 0.009, 0.101, 0.334, 0.079, 0.051, 0.036]\n", 156 | "})" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 7, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/html": [ 167 | "
\n", 168 | "\n", 181 | "\n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
XYZProb
0YesYesMale0.116
1YesYesFemale0.274
2YesNoMale0.009
3YesNoFemale0.101
4NoYesMale0.334
5NoYesFemale0.079
6NoNoMale0.051
7NoNoFemale0.036
\n", 250 | "
" 251 | ], 252 | "text/plain": [ 253 | " X Y Z Prob\n", 254 | "0 Yes Yes Male 0.116\n", 255 | "1 Yes Yes Female 0.274\n", 256 | "2 Yes No Male 0.009\n", 257 | "3 Yes No Female 0.101\n", 258 | "4 No Yes Male 0.334\n", 259 | "5 No Yes Female 0.079\n", 260 | "6 No No Male 0.051\n", 261 | "7 No No Female 0.036" 262 | ] 263 | }, 264 | "execution_count": 7, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "PXYZ" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "We check that it all sums up to one" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 8, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "data": { 287 | "text/plain": [ 288 | "1.0" 289 | ] 290 | }, 291 | "execution_count": 8, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "PXYZ['Prob'].sum()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "The conditional probability $P\\left(Y, Z | X\\right)$ for X=Yes:" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 9, 310 | "metadata": {}, 311 | "outputs": [], 312 | "source": [ 313 | "PYZ_X = PXYZ[PXYZ['X'] == 'Yes'].copy()\n", 314 | "PYZ_X['Prob'] /= PYZ_X['Prob'].sum()" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 10, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "data": { 324 | "text/html": [ 325 | "
\n", 326 | "\n", 339 | "\n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | "
XYZProb
0YesYesMale0.232
1YesYesFemale0.548
2YesNoMale0.018
3YesNoFemale0.202
\n", 380 | "
" 381 | ], 382 | "text/plain": [ 383 | " X Y Z Prob\n", 384 | "0 Yes Yes Male 0.232\n", 385 | "1 Yes Yes Female 0.548\n", 386 | "2 Yes No Male 0.018\n", 387 | "3 Yes No Female 0.202" 388 | ] 389 | }, 390 | "execution_count": 10, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "PYZ_X.round(3)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "These numbers still differ slightly from the corrected in the [online Errata](http://bayes.cs.ucla.edu/PRIMER/pearl-etal-2016-primer-errata-pages-july2020.pdf)" 404 | ] 405 | }, 406 | { 407 | "cell_type": "markdown", 408 | "metadata": {}, 409 | "source": [ 410 | "Here we note that these values are just the corresponding values of $P\\left(X, Y, Z\\right)$ divided by $P\\left(X=\\mathrm{Yes}\\right)$" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 11, 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "PX = PXYZ[['X', 'Prob']].groupby('X', as_index=False).sum()" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 12, 425 | "metadata": {}, 426 | "outputs": [ 427 | { 428 | "data": { 429 | "text/html": [ 430 | "
\n", 431 | "\n", 444 | "\n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | "
XProb
0No0.5
1Yes0.5
\n", 465 | "
" 466 | ], 467 | "text/plain": [ 468 | " X Prob\n", 469 | "0 No 0.5\n", 470 | "1 Yes 0.5" 471 | ] 472 | }, 473 | "execution_count": 12, 474 | "metadata": {}, 475 | "output_type": "execute_result" 476 | } 477 | ], 478 | "source": [ 479 | "PX" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "metadata": {}, 485 | "source": [ 486 | "Please note that the calculations in earlier editions of the book are wrong. For instance, in Page 74 we have:" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 13, 492 | "metadata": {}, 493 | "outputs": [ 494 | { 495 | "data": { 496 | "text/plain": [ 497 | "0.501" 498 | ] 499 | }, 500 | "execution_count": 13, 501 | "metadata": {}, 502 | "output_type": "execute_result" 503 | } 504 | ], 505 | "source": [ 506 | "0.116+0.274+0.01+0.101" 507 | ] 508 | }, 509 | { 510 | "cell_type": "markdown", 511 | "metadata": {}, 512 | "source": [ 513 | "since they didn't correct the 0.01 to 0.009." 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "The gender distribution is:" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 14, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "PZ = PXYZ[['Z', 'Prob']].groupby('Z').sum()" 530 | ] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "execution_count": 15, 535 | "metadata": {}, 536 | "outputs": [ 537 | { 538 | "data": { 539 | "text/html": [ 540 | "
\n", 541 | "\n", 554 | "\n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | "
Prob
Z
Female0.49
Male0.51
\n", 576 | "
" 577 | ], 578 | "text/plain": [ 579 | " Prob\n", 580 | "Z \n", 581 | "Female 0.49\n", 582 | "Male 0.51" 583 | ] 584 | }, 585 | "execution_count": 15, 586 | "metadata": {}, 587 | "output_type": "execute_result" 588 | } 589 | ], 590 | "source": [ 591 | "PZ" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "And the conditional probability $P\\left(\\mathrm{Drug}|\\mathrm{Gender}\\right)$ is:" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 16, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "PX_Z = PXYZ[['X', 'Z', 'Prob']].groupby(['X', 'Z'], as_index=False).sum()\n", 608 | "PX_Z['Prob'] = PX_Z.apply(lambda x: x['Prob']/PZ.loc[x.Z], axis=1) #Normalization" 609 | ] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "execution_count": 17, 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "PX_Z.set_index(['X', 'Z'], inplace=True)" 618 | ] 619 | }, 620 | { 621 | "cell_type": "code", 622 | "execution_count": 18, 623 | "metadata": {}, 624 | "outputs": [ 625 | { 626 | "data": { 627 | "text/html": [ 628 | "
\n", 629 | "\n", 642 | "\n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | "
Prob
XZ
NoFemale0.234694
Male0.754902
YesFemale0.765306
Male0.245098
\n", 676 | "
" 677 | ], 678 | "text/plain": [ 679 | " Prob\n", 680 | "X Z \n", 681 | "No Female 0.234694\n", 682 | " Male 0.754902\n", 683 | "Yes Female 0.765306\n", 684 | " Male 0.245098" 685 | ] 686 | }, 687 | "execution_count": 18, 688 | "metadata": {}, 689 | "output_type": "execute_result" 690 | } 691 | ], 692 | "source": [ 693 | "PX_Z" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "The lack of correction for 0.009 subsists even in $P\\left(Y|Male\\right)$ which is why we obtain 0.245 instead of 0.247" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "The results of the intervention, as calculated the inverse probability weighing are then:" 708 | ] 709 | }, 710 | { 711 | "cell_type": "code", 712 | "execution_count": 19, 713 | "metadata": {}, 714 | "outputs": [], 715 | "source": [ 716 | "PYdoXZ = PXYZ.copy()" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 20, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "PYdoXZ['Prob'] = PYdoXZ.apply(lambda x: x.Prob/PX_Z.loc[x.X, x.Z], axis=1)" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": 21, 731 | "metadata": {}, 732 | "outputs": [ 733 | { 734 | "data": { 735 | "text/html": [ 736 | "
\n", 737 | "\n", 750 | "\n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | "
XYZProb
0YesYesMale0.473
1YesYesFemale0.358
2YesNoMale0.037
3YesNoFemale0.132
4NoYesMale0.442
5NoYesFemale0.337
6NoNoMale0.068
7NoNoFemale0.153
\n", 819 | "
" 820 | ], 821 | "text/plain": [ 822 | " X Y Z Prob\n", 823 | "0 Yes Yes Male 0.473\n", 824 | "1 Yes Yes Female 0.358\n", 825 | "2 Yes No Male 0.037\n", 826 | "3 Yes No Female 0.132\n", 827 | "4 No Yes Male 0.442\n", 828 | "5 No Yes Female 0.337\n", 829 | "6 No No Male 0.068\n", 830 | "7 No No Female 0.153" 831 | ] 832 | }, 833 | "execution_count": 21, 834 | "metadata": {}, 835 | "output_type": "execute_result" 836 | } 837 | ], 838 | "source": [ 839 | "PYdoXZ.round(3)" 840 | ] 841 | }, 842 | { 843 | "cell_type": "markdown", 844 | "metadata": {}, 845 | "source": [ 846 | "The small differences we observe with respect to the corrected version in the [online Errata](http://bayes.cs.ucla.edu/PRIMER/pearl-etal-2016-primer-errata-pages-july2020.pdf) are due to the lack of correction from 0.01 to 0.009 as noted above." 847 | ] 848 | }, 849 | { 850 | "cell_type": "markdown", 851 | "metadata": {}, 852 | "source": [ 853 | "And finally, the intervention values are:" 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": 22, 859 | "metadata": {}, 860 | "outputs": [], 861 | "source": [ 862 | "PYdoX = PYdoXZ[['X', 'Y', 'Prob']].groupby(['Y', 'X'], as_index=False).sum()" 863 | ] 864 | }, 865 | { 866 | "cell_type": "code", 867 | "execution_count": 23, 868 | "metadata": {}, 869 | "outputs": [ 870 | { 871 | "data": { 872 | "text/html": [ 873 | "
\n", 874 | "\n", 887 | "\n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | "
YXProb
0NoNo0.221
1NoYes0.169
2YesNo0.779
3YesYes0.831
\n", 923 | "
" 924 | ], 925 | "text/plain": [ 926 | " Y X Prob\n", 927 | "0 No No 0.221\n", 928 | "1 No Yes 0.169\n", 929 | "2 Yes No 0.779\n", 930 | "3 Yes Yes 0.831" 931 | ] 932 | }, 933 | "execution_count": 23, 934 | "metadata": {}, 935 | "output_type": "execute_result" 936 | } 937 | ], 938 | "source": [ 939 | "PYdoX.round(3)" 940 | ] 941 | }, 942 | { 943 | "cell_type": "markdown", 944 | "metadata": {}, 945 | "source": [ 946 | "
\n", 947 | " \"Data \n", 948 | "
" 949 | ] 950 | } 951 | ], 952 | "metadata": { 953 | "kernelspec": { 954 | "display_name": "Python 3", 955 | "language": "python", 956 | "name": "python3" 957 | }, 958 | "language_info": { 959 | "codemirror_mode": { 960 | "name": "ipython", 961 | "version": 3 962 | }, 963 | "file_extension": ".py", 964 | "mimetype": "text/x-python", 965 | "name": "python", 966 | "nbconvert_exporter": "python", 967 | "pygments_lexer": "ipython3", 968 | "version": "3.8.5" 969 | }, 970 | "varInspector": { 971 | "cols": { 972 | "lenName": 16, 973 | "lenType": 16, 974 | "lenVar": 40 975 | }, 976 | "kernels_config": { 977 | "python": { 978 | "delete_cmd_postfix": "", 979 | "delete_cmd_prefix": "del ", 980 | "library": "var_list.py", 981 | "varRefreshCmd": "print(var_dic_list())" 982 | }, 983 | "r": { 984 | "delete_cmd_postfix": ") ", 985 | "delete_cmd_prefix": "rm(", 986 | "library": "var_list.r", 987 | "varRefreshCmd": "cat(var_dic_list()) " 988 | } 989 | }, 990 | "types_to_exclude": [ 991 | "module", 992 | "function", 993 | "builtin_function_or_method", 994 | "instance", 995 | "_Feature" 996 | ], 997 | "window_display": false 998 | } 999 | }, 1000 | "nbformat": 4, 1001 | "nbformat_minor": 4 1002 | } 1003 | -------------------------------------------------------------------------------- /3.7 - Mediation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "
\n", 8 | "
\"Data
\n", 9 | "

Causal Inference In Statistics - A Primer

\n", 10 | "

3.7 Mediation

\n", 11 | "

Bruno Gonçalves
\n", 12 | " www.data4sci.com
\n", 13 | " @bgoncalves, @data4sci

\n", 14 | "

\n", 15 | "

\n", 16 | "
" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "from collections import Counter\n", 26 | "from pprint import pprint\n", 27 | "\n", 28 | "import pandas as pd\n", 29 | "import numpy as np\n", 30 | "\n", 31 | "import matplotlib\n", 32 | "import matplotlib.pyplot as plt \n", 33 | "\n", 34 | "from CausalModel import CausalModel\n", 35 | "\n", 36 | "import watermark\n", 37 | "\n", 38 | "%load_ext watermark\n", 39 | "%matplotlib inline" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "We start by print out the versions of the libraries we're using for future reference" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "Python implementation: CPython\n", 59 | "Python version : 3.8.5\n", 60 | "IPython version : 7.19.0\n", 61 | "\n", 62 | "Compiler : Clang 10.0.0 \n", 63 | "OS : Darwin\n", 64 | "Release : 20.3.0\n", 65 | "Machine : x86_64\n", 66 | "Processor : i386\n", 67 | "CPU cores : 16\n", 68 | "Architecture: 64bit\n", 69 | "\n", 70 | "Git hash: c41ed8befecf49059999a81adcec3b026fe5d326\n", 71 | "\n", 72 | "json : 2.0.9\n", 73 | "watermark : 2.1.0\n", 74 | "pandas : 1.1.3\n", 75 | "numpy : 1.19.2\n", 76 | "matplotlib: 3.3.2\n", 77 | "\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "%watermark -n -v -m -g -iv" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Load default figure style" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 3, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "plt.style.use('./d4sci.mplstyle')\n", 99 | "colors = plt.rcParams['axes.prop_cycle'].by_key()['color']" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "We build the DAG from Fig 3.11" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 4, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "G = CausalModel()\n", 116 | "G.add_causation('X', 'Y')\n", 117 | "G.add_causation('X', 'Z')\n", 118 | "G.add_causation('Z', 'Y')\n", 119 | "\n", 120 | "G.pos = {'X': (0, 0), 'Z': (1, 1), 'Y': (2, 0)}" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 5, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "image/png": "\n", 131 | "text/plain": [ 132 | "
" 133 | ] 134 | }, 135 | "metadata": {}, 136 | "output_type": "display_data" 137 | } 138 | ], 139 | "source": [ 140 | "fig, ax = plt.subplots(1, figsize=(2.2,2.2))\n", 141 | "G.plot(ax=ax)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "G.save_model('dags/Primer.Fig.3.11.dot')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "And the DAG from Fig 3.12" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "G = CausalModel()\n", 167 | "G.add_causation('X', 'Y')\n", 168 | "G.add_causation('X', 'Z')\n", 169 | "G.add_causation('Z', 'Y')\n", 170 | "G.add_causation('I', 'Y')\n", 171 | "G.add_causation('I', 'Z')\n", 172 | "\n", 173 | "G.pos = {'X': (0, 0), 'Z': (1, 1), 'Y': (2, 0), 'I': (2, 1)}" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 8, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "image/png": "\n", 184 | "text/plain": [ 185 | "
" 186 | ] 187 | }, 188 | "metadata": {}, 189 | "output_type": "display_data" 190 | } 191 | ], 192 | "source": [ 193 | "fig, ax = plt.subplots(1, figsize=(2.2,2.2))\n", 194 | "G.plot(ax=ax)" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": 9, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "G.save_model('dags/Primer.Fig.3.12.dot')" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "We can easily compute the modified graph resulting from intervening on X and Z:" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 10, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "image/png": "\n", 221 | "text/plain": [ 222 | "
" 223 | ] 224 | }, 225 | "metadata": {}, 226 | "output_type": "display_data" 227 | } 228 | ], 229 | "source": [ 230 | "G2 = G.intervention_graph(['X', 'Z'])\n", 231 | "fig, ax = plt.subplots(1, figsize=(2.2,2.2))\n", 232 | "G2.plot(ax=ax)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "
\n", 240 | " \"Data \n", 241 | "
" 242 | ] 243 | } 244 | ], 245 | "metadata": { 246 | "kernelspec": { 247 | "display_name": "Python 3", 248 | "language": "python", 249 | "name": "python3" 250 | }, 251 | "language_info": { 252 | "codemirror_mode": { 253 | "name": "ipython", 254 | "version": 3 255 | }, 256 | "file_extension": ".py", 257 | "mimetype": "text/x-python", 258 | "name": "python", 259 | "nbconvert_exporter": "python", 260 | "pygments_lexer": "ipython3", 261 | "version": "3.8.5" 262 | }, 263 | "varInspector": { 264 | "cols": { 265 | "lenName": 16, 266 | "lenType": 16, 267 | "lenVar": 40 268 | }, 269 | "kernels_config": { 270 | "python": { 271 | "delete_cmd_postfix": "", 272 | "delete_cmd_prefix": "del ", 273 | "library": "var_list.py", 274 | "varRefreshCmd": "print(var_dic_list())" 275 | }, 276 | "r": { 277 | "delete_cmd_postfix": ") ", 278 | "delete_cmd_prefix": "rm(", 279 | "library": "var_list.r", 280 | "varRefreshCmd": "cat(var_dic_list()) " 281 | } 282 | }, 283 | "types_to_exclude": [ 284 | "module", 285 | "function", 286 | "builtin_function_or_method", 287 | "instance", 288 | "_Feature" 289 | ], 290 | "window_display": false 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 4 295 | } 296 | -------------------------------------------------------------------------------- /CausalModel.py: -------------------------------------------------------------------------------- 1 | ### −∗− mode : python ; −∗− 2 | # @file CausalModel.py 3 | # @author Bruno Goncalves 4 | ###################################################### 5 | 6 | import networkx as nx 7 | from networkx.drawing.nx_pydot import graphviz_layout 8 | from itertools import combinations 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib as mpl 12 | import matplotlib.pyplot as plt 13 | import re 14 | import base64 15 | import requests 16 | 17 | import warnings 18 | warnings.filterwarnings("ignore") 19 | 20 | from tqdm import tqdm 21 | tqdm.pandas() 22 | 23 | plt.style.use('./d4sci.mplstyle') 24 | 25 | class CausalModel(object): 26 | """Simple Causal Model Implementation 27 | 28 | Provides a way to represent causal DAGs 29 | """ 30 | 31 | def __init__(self, filename=None): 32 | self.pos = None 33 | 34 | if filename is not None: 35 | self.load_model(filename) 36 | else: 37 | self.dag = nx.DiGraph() 38 | 39 | self.colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] 40 | 41 | def copy(self): 42 | G = CausalModel() 43 | G.dag = self.dag.copy() 44 | G.pos = dict(self.pos) 45 | G.colors = [color for color in self.colors] 46 | 47 | return G 48 | 49 | 50 | def add_causation(self, source, target, label=None): 51 | """Add a causal link between source and target with an optional label. 52 | 53 | Parameters 54 | ---------- 55 | source : node-like 56 | The source node 57 | 58 | target : node-like 59 | The target node 60 | 61 | label : string-like or None 62 | The label for the causal link 63 | 64 | Returns 65 | ------- 66 | None 67 | 68 | Examples 69 | -------- 70 | >>> G = CausalModel() 71 | >>> G.add_causation('X', 'Y') 72 | 73 | """ 74 | 75 | if label is None: 76 | self.dag.add_edge(source, target) 77 | else: 78 | self.dag.add_edge(source, target, label=label) 79 | 80 | def load_model(self, path): 81 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 82 | 83 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 84 | 85 | Parameters 86 | ---------- 87 | path : str or file 88 | Filename or file handle. 89 | 90 | Returns 91 | ------- 92 | None 93 | 94 | Examples 95 | -------- 96 | >>> G = CausalModel() 97 | >>> G.load_model('temp.dot') 98 | 99 | Notes 100 | ----- 101 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 102 | 103 | """ 104 | 105 | G = nx.drawing.nx_pydot.read_dot(path) 106 | 107 | pos = {} 108 | 109 | for key, values in G.nodes(data=True): 110 | if 'x' not in values: 111 | pos = None 112 | break 113 | 114 | x = values['x'] 115 | y = values['y'] 116 | 117 | if x[0] == '"': 118 | x = x[1:-1] 119 | 120 | if y[0] == '"': 121 | y = y[1:-1] 122 | 123 | pos[key] = (float(x), float(y)) 124 | 125 | self.dag = nx.DiGraph(G) 126 | self.pos = pos 127 | 128 | 129 | def save_model(self, path): 130 | """Save the causal model as a `dot` file. 131 | 132 | Parameters 133 | ---------- 134 | path : str or file 135 | Filename or file handle. 136 | 137 | Returns 138 | ------- 139 | None 140 | 141 | Examples 142 | -------- 143 | >>> G = CausalModel() 144 | >>> G.add_causation('X', 'Y') 145 | >>> G.add_causation('Y', 'Z') 146 | >>> G.pos = {'X':(-1, 0), 'Y': (0, 0), 'Z': (1, 0)} 147 | >>> G.save_model('temp.dot') 148 | 149 | Notes 150 | ----- 151 | The heavy lifting is done by `networkx.drawing.nx_pydot.write_dot` 152 | 153 | """ 154 | G = self.dag.copy() 155 | 156 | if self.pos is not None: 157 | nodes = list(G.nodes()) 158 | 159 | for node in nodes: 160 | G.nodes[node]['x'] = str(self.pos[node][0]) 161 | G.nodes[node]['y'] = str(self.pos[node][1]) 162 | 163 | nx.drawing.nx_pydot.write_dot(G, path) 164 | 165 | def layout(self): 166 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 167 | 168 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 169 | 170 | Parameters 171 | ---------- 172 | path : str or file 173 | Filename or file handle. 174 | 175 | Returns 176 | ------- 177 | None 178 | 179 | Examples 180 | -------- 181 | >>> G = CausalModel() 182 | >>> G.load_model('temp.dot') 183 | 184 | Notes 185 | ----- 186 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 187 | 188 | """ 189 | pos = graphviz_layout(self.dag, 'dot') 190 | 191 | keys = list(pos.keys()) 192 | coords = np.array([pos[key] for key in keys]) 193 | coords = nx.rescale_layout(coords, 1) 194 | pos = dict(zip(keys, coords)) 195 | 196 | xs = [] 197 | ys = [] 198 | 199 | for key, value in pos.items(): 200 | xs.append(value[0]) 201 | ys.append(value[1]) 202 | 203 | # All xx coordinates are the same, switch x and y 204 | # To make it horizontal instead of vertical 205 | if len(set(xs)) == 1: 206 | pos = {key: [-value[1], value[0]] for key, value in pos.items()} 207 | 208 | return pos 209 | 210 | def parents(self, node): 211 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 212 | 213 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 214 | 215 | Parameters 216 | ---------- 217 | path : str or file 218 | Filename or file handle. 219 | 220 | Returns 221 | ------- 222 | None 223 | 224 | Examples 225 | -------- 226 | >>> G = CausalModel() 227 | >>> G.load_model('temp.dot') 228 | 229 | Notes 230 | ----- 231 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 232 | 233 | """ 234 | return list(self.dag.predecessors(node)) 235 | 236 | def ancestors(self, node): 237 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 238 | 239 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 240 | 241 | Parameters 242 | ---------- 243 | path : str or file 244 | Filename or file handle. 245 | 246 | Returns 247 | ------- 248 | None 249 | 250 | Examples 251 | -------- 252 | >>> G = CausalModel() 253 | >>> G.load_model('temp.dot') 254 | 255 | Notes 256 | ----- 257 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 258 | 259 | """ 260 | return list(nx.ancestors(self.dag, node)) 261 | 262 | def children(self, source): 263 | """Obtain the children of a node. 264 | 265 | Children are the nodes at the other end of outgoing edges. 266 | 267 | Parameters 268 | ---------- 269 | source : node in `G` 270 | The parent node 271 | 272 | Returns 273 | ------- 274 | list() 275 | List of the children of `source` in `G` 276 | 277 | Examples 278 | -------- 279 | >>> G.children('X') 280 | 281 | Notes 282 | ----- 283 | The heavy lifting is done by `networkx.successors` 284 | 285 | """ 286 | return list(self.dag.successors(source)) 287 | 288 | def descendants(self, source): 289 | """Obtain the descendants of a node. 290 | 291 | Descendants are all the nodes reacheable through outgoing edges. 292 | 293 | Parameters 294 | ---------- 295 | path : str or file 296 | Filename or file handle. 297 | 298 | Returns 299 | ------- 300 | list() 301 | List of the descendants of `source` in `G` 302 | 303 | Examples 304 | -------- 305 | >>> G = CausalModel() 306 | >>> G.load_model('temp.dot') 307 | 308 | Notes 309 | ----- 310 | The heavy lifting is done by `networkx.descendants` 311 | 312 | """ 313 | return list(nx.descendants(self.dag, source)) 314 | 315 | def backdoor_paths(self, source, target): 316 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 317 | 318 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 319 | 320 | Parameters 321 | ---------- 322 | path : str or file 323 | Filename or file handle. 324 | 325 | Returns 326 | ------- 327 | None 328 | 329 | Examples 330 | -------- 331 | >>> G = CausalModel() 332 | >>> G.load_model('temp.dot') 333 | 334 | Notes 335 | ----- 336 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 337 | 338 | """ 339 | 340 | allPaths = self.all_paths(source, target) 341 | directed = self.directed_paths(source, target) 342 | 343 | return allPaths-directed 344 | 345 | 346 | def directed_paths(self, source, target): 347 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 348 | 349 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 350 | 351 | Parameters 352 | ---------- 353 | path : str or file 354 | Filename or file handle. 355 | 356 | Returns 357 | ------- 358 | None 359 | 360 | Examples 361 | -------- 362 | >>> G = CausalModel() 363 | >>> G.load_model('temp.dot') 364 | 365 | Notes 366 | ----- 367 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 368 | 369 | """ 370 | return {tuple(path) for path in nx.all_simple_paths(self.dag, source, target)} 371 | 372 | def all_paths(self, source, target): 373 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 374 | 375 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 376 | 377 | Parameters 378 | ---------- 379 | path : str or file 380 | Filename or file handle. 381 | 382 | Returns 383 | ------- 384 | None 385 | 386 | Examples 387 | -------- 388 | >>> G = CausalModel() 389 | >>> G.load_model('temp.dot') 390 | 391 | Notes 392 | ----- 393 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 394 | 395 | """ 396 | return {tuple(path) for path in nx.all_simple_paths(self.dag.to_undirected(), source, target)} 397 | 398 | 399 | def all_paths_conditional(self, source, target, remove): 400 | """Initialize the CausalModel object by reading the information from the dot file with the passed path. 401 | 402 | The file should be a `dot` file and if it contains multiple graphs, only the first such graph is returned. All graphs _except_ the first are silently ignored. 403 | 404 | Parameters 405 | ---------- 406 | path : str or file 407 | Filename or file handle. 408 | 409 | Returns 410 | ------- 411 | None 412 | 413 | Examples 414 | -------- 415 | >>> G = CausalModel() 416 | >>> G.load_model('temp.dot') 417 | 418 | Notes 419 | ----- 420 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 421 | 422 | """ 423 | 424 | dag = self.dag.to_undirected() 425 | dag.remove_nodes_from(remove) 426 | 427 | return {tuple(path) for path in nx.all_simple_paths(dag, source, target)} 428 | 429 | 430 | def plot_path(self, path, edges=False, ax=None, conditional=False, lw=3): 431 | """Initialize the CausalModel object by reading the information from the dot 432 | file with the passed path. 433 | 434 | The file should be a `dot` file and if it contains multiple graphs, only the 435 | first such graph is returned. All graphs _except_ the first are silently ignored. 436 | 437 | Parameters 438 | ---------- 439 | path : str or file 440 | Filename or file handle. 441 | 442 | Returns 443 | ------- 444 | None 445 | 446 | Examples 447 | -------- 448 | >>> G = CausalModel() 449 | >>> G.load_model('temp.dot') 450 | 451 | Notes 452 | ----- 453 | The heavy lifting is done by `networkx.drawing.nx_pydot.read_dot` 454 | 455 | 456 | """ 457 | fig = None 458 | 459 | if ax == None: 460 | fig, ax = plt.subplots(1) 461 | 462 | if edges: 463 | edgelist = path 464 | else: 465 | edgelist = {(path[i], path[i+1]) for i in range(len(path)-1)} 466 | 467 | edges = set(self.dag.edges()) - set(edgelist) 468 | 469 | nx.draw(self.dag, self.pos, node_color=self.colors[0], ax=ax, edgelist=[]) 470 | nx.draw_networkx_labels(self.dag, self.pos, ax=ax) 471 | 472 | if conditional: 473 | nx.draw_networkx_edges(self.dag, self.pos, 474 | edgelist=edgelist, 475 | width=lw, edge_color=self.colors[1], ax=ax, style='dotted') 476 | else: 477 | nx.draw_networkx_edges(self.dag, self.pos, 478 | edgelist=edgelist, 479 | width=lw, edge_color=self.colors[1], ax=ax) 480 | 481 | nx.draw_networkx_edges(self.dag, self.pos, 482 | edgelist=edges, 483 | width=1, ax=ax) 484 | 485 | if fig is not None: 486 | fig.tight_layout() 487 | 488 | 489 | def inputs(self): 490 | nodes = set() 491 | 492 | for node, deg in self.dag.in_degree(): 493 | if deg == 0: 494 | nodes.add(node) 495 | 496 | return nodes 497 | 498 | def outputs(self): 499 | nodes = set() 500 | 501 | for node, deg in self.dag.out_degree(): 502 | if deg == 0: 503 | nodes.add(node) 504 | 505 | return nodes 506 | 507 | def plot(self, output=None, pos=None, legend=False, ax=None, colors=False): 508 | if pos is None: 509 | if self.pos is None: 510 | self.pos = self.layout() 511 | 512 | pos = self.pos 513 | 514 | nodes = list(pos.keys()) 515 | inputs = self.inputs() 516 | outputs = self.outputs() 517 | 518 | node_colors = [] 519 | node_pos = [] 520 | 521 | for node in nodes: 522 | node_pos.append(pos[node]) 523 | 524 | if colors: 525 | if node in inputs: 526 | node_colors.append(self.colors[2]) 527 | elif node in outputs: 528 | node_colors.append(self.colors[1]) 529 | else: 530 | node_colors.append(self.colors[0]) 531 | else: 532 | node_colors.append(self.colors[0]) 533 | 534 | node_pos = np.array(node_pos) 535 | 536 | if ax is None: 537 | ax = nx.draw(self.dag, pos, nodelist=nodes, node_color=node_colors) 538 | else: 539 | nx.draw(self.dag, pos, nodelist=nodes, node_color=node_colors, ax=ax) 540 | 541 | 542 | labels = {(node_i, node_j) : label for node_i, node_j, label in self.dag.edges(data='label', default='')} 543 | 544 | nx.draw_networkx_labels(self.dag, pos, ax=ax) 545 | nx.draw_networkx_edge_labels(self.dag, pos, labels, ax=ax) 546 | 547 | if legend: 548 | node_types = ['Regular node', 'Input', 'Output'] 549 | node_colors = [self.colors[0], self.colors[2], self.colors[1]] 550 | 551 | patches = [mpl.patches.Patch(color=node_colors[i], label=label) for i, label in enumerate(node_types)] 552 | 553 | plt.legend(handles=patches, fontsize=10) 554 | 555 | plt.gcf().tight_layout() 556 | 557 | if output is None: 558 | plt.show() 559 | else: 560 | plt.savefig(output, dpi=300) 561 | plt.close() 562 | 563 | 564 | def v_structures(self): 565 | structs = set() 566 | 567 | degrees = dict(self.dag.in_degree()) 568 | 569 | for node in degrees: 570 | if degrees[node] >= 2: 571 | for edge_i, edge_j in combinations(self.dag.in_edges(node), 2): 572 | node_i = edge_i[0] 573 | node_j = edge_j[0] 574 | 575 | if not (node_i, node_j) in self.dag.edges and not (node_j, node_i) in self.dag.edges: 576 | structs.add(tuple(sorted([edge_i, edge_j]))) 577 | 578 | return structs 579 | 580 | def equivalence_class(self): 581 | edges = list(self.dag.edges(data=True)) 582 | 583 | equivalent = [[self.copy(), []]] 584 | 585 | structs = self.v_structures() 586 | 587 | for i, edge in enumerate(edges): 588 | new_edges = list(edges) 589 | 590 | new_edges[i] = (edge[1], edge[0], edge[2]) 591 | 592 | G = CausalModel() 593 | G.dag.add_edges_from(new_edges) 594 | 595 | new_structs = CausalModel.v_structures(G) 596 | 597 | if new_structs == structs and len(list(nx.simple_cycles(G.dag)))==0: 598 | G.pos = dict(self.pos) 599 | G.colors = [color for color in self.colors] 600 | equivalent.append([G, new_edges[i][:2]]) 601 | 602 | return equivalent 603 | 604 | 605 | def basis_set(self): 606 | nodes = set(self.dag.nodes()) 607 | 608 | eqn = [] 609 | 610 | for node in nodes: 611 | parents = set(self.parents(node)) 612 | descendants = set(self.descendants(node)) 613 | 614 | others = {n for n in nodes if n != node} 615 | others -= parents 616 | others -= descendants 617 | 618 | others = sorted(others) 619 | parents = sorted(parents) 620 | 621 | if len(others) > 0: 622 | if len(parents) > 0: 623 | eqn.append('%s _||_ %s | %s' % (node, ", ".join(others), ', '.join(parents))) 624 | else: 625 | eqn.append('%s _||_ %s' % (node, ", ".join(others))) 626 | 627 | return sorted(eqn) 628 | 629 | 630 | def intervention_graph(self, nodes, drop_nodes=False): 631 | G = self.copy() 632 | 633 | for node in nodes: 634 | G.dag.remove_edges_from(list(self.dag.in_edges(nodes))) 635 | 636 | if drop_nodes: 637 | degrees = dict(G.dag.degree()) 638 | 639 | remove = [] 640 | 641 | for node in degrees: 642 | if degrees[node] == 0: 643 | remove.append(node) 644 | del G.pos[node] 645 | 646 | G.dag.remove_nodes_from(remove) 647 | 648 | return G 649 | 650 | def conditional_intervention_graph(self, nodes, dependencies, drop_nodes=False): 651 | G = self.copy() 652 | 653 | for node in nodes: 654 | G.dag.remove_edges_from(list(self.dag.in_edges(nodes))) 655 | 656 | G.dag.add_edges_from(dependencies) 657 | 658 | if drop_nodes: 659 | degrees = dict(G.dag.degree()) 660 | 661 | remove = [] 662 | 663 | for node in degrees: 664 | if degrees[node] == 0: 665 | remove.append(node) 666 | del G.pos[node] 667 | 668 | G.dag.remove_nodes_from(remove) 669 | 670 | return G 671 | 672 | 673 | 674 | if __name__ == "__main__": 675 | names = ['m331', 'moAh6a6', 'vcFQ'] 676 | 677 | graph_id = 'temp'#names[2] 678 | 679 | G = CausalModel()#graph_id) 680 | G.load_model('dags/Primer.Fig.2.9.dot') 681 | 682 | Gx = G.intervention_graph('X') 683 | Gx2 = Gx.intervention_graph('Z3') 684 | print("ok") -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Data For Science 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![GitHub](https://img.shields.io/github/license/DataForScience/Causality) 2 | [![Twitter @data4sci](https://img.shields.io/twitter/follow/data4sci)](https://twitter.com/intent/follow?screen_name=data4sci) 3 | ![GitHub top language](https://img.shields.io/github/languages/top/DataForScience/Causality) 4 | ![GitHub repo size](https://img.shields.io/github/repo-size/DataForScience/Causality) 5 | ![GitHub last commit](https://img.shields.io/github/last-commit/DataForScience/Causality) 6 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/DataForScience/Causality/master) 7 | 8 | [![Sunday Briefing](https://img.shields.io/badge/Sunday_Briefing-Subscribe-blue)](https://data4sci.ck.page/8a51c452bc) 9 | 10 | # Causality 11 | 12 | How do causes lead to effects? Can you associate the cause leading to the observed effect? Big Data opens the doors for us to be able to answer questions such as this, but before we are able to do so, we must dive into the field of Causal Inference, a field championed by Judea Pearl. 13 | In this series of blog posts we will learn about the main ideas of Causality by working our way through “Causal Inference In Statistics” a nice Primer co-authored by Pearl himself. 14 | 15 |

16 | 17 |
18 | Amazon Affiliate Link: https://amzn.to/3gsFlkO 19 |

20 | 21 | 22 | The book is divided into Four chapters. The first chapter covers background material in probability and statistics. The other three chapters are (roughly) organized to match the “Three steps” in the ladder of causality as defined by Pearl: 23 | 24 | 1. — Association 25 | 2. — Intervention 26 | 3. — Counterfactuals 27 | 28 | In this series of blog posts we will cover most of the content of the book, with a special emphasis on the parts that I believe are more interesting or relevant to practical applications. In addition to summarizing and explaining the content, we will also explore some of the ideas using simple (or as simple as possible) Python code you can run on Binder: [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/DataForScience/Causality/master) 29 | 30 | 31 | 32 | ## Chapter 1 33 | 1.2 - [Simpson's Paradox](https://data4sci.substack.com/p/simpsons-paradox) -- [1.2 - Simpson's Paradox.ipynb](https://github.com/DataForScience/Causality/blob/master/1.2%20-%20Simpsons%20Paradox.ipynb) 34 | 35 | 1.3 - [Probability Theory](https://data4sci.substack.com/p/probability-theory) -- [1.3 - Probability and Statistics.ipynb](https://github.com/DataForScience/Causality/blob/master/1.3%20-%20Probability%20and%20Statistics.ipynb) 36 | 37 | 1.4 - [Graphs](https://data4sci.substack.com/p/graphs) -- [1.4 - Graphs.ipynb](https://github.com/DataForScience/Causality/blob/master/1.4%20-%20Graphs.ipynb) 38 | 39 | 1.5 - [Structural Causal Models](https://data4sci.substack.com/p/structural-causal-models-e4d) -- [1.5 - Structural Causal Models.ipynb](https://github.com/DataForScience/Causality/blob/master/1.5%20-%20Structural%20Causal%20Models.ipynb) 40 | 41 | ## Chapter 2 42 | 2.2 - [Chains and Forks](https://data4sci.substack.com/p/chains-and-forks) -- [2.2 - Chains and Forks.ipynb](https://github.com/DataForScience/Causality/blob/master/2.2%20-%20Chains%20and%20Forks.ipynb) 43 | 44 | 2.3 - [Colliders](https://data4sci.substack.com/p/colliders) -- [2.3 - Colliders.ipynb](https://github.com/DataForScience/Causality/blob/master/2.3%20-%20Colliders.ipynb) 45 | 46 | 2.4 - [d-separation](https://data4sci.substack.com/p/d-separation) -- [2.4 - d-separation.ipynb](https://github.com/DataForScience/Causality/blob/master/2.4%20-%20d-separation.ipynb) 47 | 48 | 2.5 - [Model Testing and Causal Search](https://data4sci.substack.com/p/model-testing-and-causal-search) -- [2.5 - Model Testing and Causal Search.ipynb](https://github.com/DataForScience/Causality/blob/master/2.5%20-%20Model%20Testing%20and%20Causal%20Search.ipynb) 49 | 50 | ## Chapter 3 51 | 52 | 3.1 - [Interventions](https://data4sci.substack.com/p/interventions) -- [3.1 - Interventions.ipynb](https://github.com/DataForScience/Causality/blob/master/3.1%20-%20Interventions.ipynb) 53 | 54 | 3.2 - [Adjustment Formula](https://data4sci.substack.com/p/the-adjustment-formula) -- [3.2 - The Adjustment Formula.ipynb](https://github.com/DataForScience/Causality/blob/master/3.2%20-%20The%20Adjustment%20Formula.ipynb) 55 | 56 | 3.3 - [Backdoor Criterion](https://data4sci.substack.com/p/backdoor-criterion) -- [3.3 - Backdoor Criterion.ipynb](https://github.com/DataForScience/Causality/blob/master/3.3%20-%20Backdoor%20Criterion.ipynb) 57 | 58 | 3.4 - [Front-Door Criterion](https://data4sci.substack.com/p/front-door-criterion) -- [3.4 - Front-Door Criterion.ipynb](https://github.com/DataForScience/Causality/blob/master/3.4%20-%20Front-Door%20Criterion.ipynb) 59 | 60 | 3.5 - [Conditional Interventions and Covariate-Specific Effects](https://data4sci.substack.com/p/conditional-interventions-and-covariate) -- [3.5 - Conditional Interventions and Covariate-Specific Effects.ipynb](https://github.com/DataForScience/Causality/blob/master/3.5%20-%20Conditional%20Interventions%20and%20Covariate-Specific%20Effects.ipynb) 61 | 62 | 3.6 - [Inverse Probability Weighing](https://data4sci.substack.com/p/inverse-probability-weighing) -- [3.6 - Inverse Probability Weighing.ipynb](https://github.com/DataForScience/Causality/blob/master/3.6%20-%20Inverse%20Probability%20Weighing.ipynb) 63 | 64 | 3.7 - [Mediation](https://data4sci.substack.com/p/mediation) -- [3.7 - Mediation.ipynb](https://github.com/DataForScience/Causality/blob/master/3.7%20-%20Mediation.ipynb) 65 | 66 | --- 67 | 68 | For a more in-depth analysis, checkout Pearl's more technical book: 69 | 70 |

71 | 72 |
73 | Amazon Affiliate Link: https://amzn.to/2OSBP6u 74 |

75 | 76 | --- 77 | 78 | Sign up to the [Data Science Briefing](http://data4sci.com/newsletter) newsletter to be the first to know when we publish new posts: 79 | 80 |

81 | 82 |

83 | -------------------------------------------------------------------------------- /d4sci.mplstyle: -------------------------------------------------------------------------------- 1 | # Data For Science style 2 | # Author: Bruno Goncalves 3 | # Modified from the matplotlib FiveThirtyEight style by 4 | # Author: Cameron Davidson-Pilon, replicated styles from FiveThirtyEight.com 5 | # See https://www.dataorigami.net/blogs/fivethirtyeight-mpl 6 | 7 | lines.linewidth: 4 8 | lines.solid_capstyle: butt 9 | 10 | legend.fancybox: true 11 | 12 | axes.prop_cycle: cycler('color', ['51a7f9', 'cf51f9', '70bf41', 'f39019', 'f9e351', 'f9517b', '6d904f', '8b8b8b','810f7c']) 13 | 14 | axes.labelsize: large 15 | axes.axisbelow: true 16 | axes.grid: true 17 | axes.edgecolor: f0f0f0 18 | axes.linewidth: 3.0 19 | axes.titlesize: x-large 20 | 21 | patch.edgecolor: f0f0f0 22 | patch.linewidth: 0.5 23 | 24 | svg.fonttype: path 25 | 26 | grid.linestyle: - 27 | grid.linewidth: 1.0 28 | 29 | xtick.major.size: 0 30 | xtick.minor.size: 0 31 | ytick.major.size: 0 32 | ytick.minor.size: 0 33 | 34 | font.size: 24.0 35 | 36 | savefig.edgecolor: f0f0f0 37 | savefig.facecolor: f0f0f0 38 | 39 | figure.subplot.left: 0.08 40 | figure.subplot.right: 0.95 41 | figure.subplot.bottom: 0.07 42 | figure.figsize: 12.8, 8.8 43 | figure.autolayout: True 44 | figure.dpi: 300 45 | -------------------------------------------------------------------------------- /dags/Causality.Fig.1.2.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X1 [x=0, y=2]; 3 | X2 [x=1, y=1]; 4 | X3 [x="-1", y=1]; 5 | X4 [x=0, y=0]; 6 | X5 [x=0, y="-1"]; 7 | X1 -> X2; 8 | X1 -> X3; 9 | X2 -> X4; 10 | X3 -> X4; 11 | X4 -> X5; 12 | } 13 | -------------------------------------------------------------------------------- /dags/Primer.Fig.1.10.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | "Z" [x=0, y=1]; 3 | "X" [x="-1", y=0]; 4 | "Y" [x=1, y=0]; 5 | "Z" -> "X"; 6 | "Z" -> "Y"; 7 | "X" -> "Y"; 8 | } 9 | -------------------------------------------------------------------------------- /dags/Primer.Fig.1.6.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x="-1.0", y="0.0"]; 3 | Y [x="-0.0", y="0.0"]; 4 | Z [x="1.0", y="0.0"]; 5 | X -> Y [label=A]; 6 | Y -> Z [label=B]; 7 | } 8 | -------------------------------------------------------------------------------- /dags/Primer.Fig.1.7a.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=1]; 3 | Y [x="-1", y=0]; 4 | Z [x=1, y=0]; 5 | X -> Y [label=A]; 6 | X -> Z [label=C]; 7 | Y -> Z [label=B]; 8 | } 9 | -------------------------------------------------------------------------------- /dags/Primer.Fig.1.8.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x="-1", y=0]; 3 | Y [x=0, y=0]; 4 | W [x=0, y=1]; 5 | Z [x=1, y=0]; 6 | T [x="0.5", y="-0.5"]; 7 | X -> Y; 8 | X -> W; 9 | Y -> T; 10 | Y -> Z; 11 | W -> Y; 12 | W -> Z; 13 | Z -> T; 14 | } 15 | -------------------------------------------------------------------------------- /dags/Primer.Fig.1.9.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x="-1", y=1]; 3 | Z [x=0, y=0]; 4 | Y [x=1, y=1]; 5 | X -> Z; 6 | Y -> Z; 7 | } 8 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.1.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x="-0.5", y="1.5"]; 3 | X [x=0, y=1]; 4 | Uy [x="-0.5", y="0.5"]; 5 | Y [x=0, y=0]; 6 | Uz [x="-0.5", y="-0.5"]; 7 | Z [x=0, y="-1"]; 8 | Ux -> X; 9 | X -> Y; 10 | Uy -> Y; 11 | Y -> Z; 12 | Uz -> Z; 13 | } 14 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.2.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x=0, y=2]; 3 | X [x=0, y=1]; 4 | Uy [x="-1", y=1]; 5 | Y [x="-1", y=0]; 6 | Uz [x=1, y=1]; 7 | Z [x=1, y=0]; 8 | Ux -> X; 9 | X -> Y; 10 | X -> Z; 11 | Uy -> Y; 12 | Uz -> Z; 13 | } 14 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.3.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x="-1", y=1]; 3 | X [x="-1", y=0]; 4 | Uy [x=1, y=1]; 5 | Y [x=1, y=0]; 6 | Uz [x=0, y=0]; 7 | Z [x=0, y="-1"]; 8 | Ux -> X; 9 | X -> Z; 10 | Uy -> Y; 11 | Y -> Z; 12 | Uz -> Z; 13 | } 14 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.5.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | R [x=1, y=0]; 4 | S [x=2, y=0]; 5 | T [x=3, y=0]; 6 | U [x=4, y=0]; 7 | V [x=5, y=0]; 8 | Y [x=6, y=0]; 9 | X -> R; 10 | R -> S; 11 | S -> T; 12 | U -> T; 13 | V -> U; 14 | V -> Y; 15 | } 16 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.6.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | R [x=1, y=0]; 4 | S [x=2, y=0]; 5 | T [x=3, y=0]; 6 | U [x=4, y=0]; 7 | V [x=5, y=0]; 8 | Y [x=6, y=0]; 9 | P [x=3, y="-1"]; 10 | X -> R; 11 | R -> S; 12 | S -> T; 13 | T -> P; 14 | U -> T; 15 | V -> U; 16 | V -> Y; 17 | } 18 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.7.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Uz [x="-1", y="1.5"]; 3 | Z [x="-1", y=1]; 4 | Uw [x=0, y="0.5"]; 5 | W [x=0, y=0]; 6 | Ux [x=1, y="1.5"]; 7 | X [x=1, y=1]; 8 | Uy [x=2, y="1.5"]; 9 | Y [x=2, y=1]; 10 | Uu [x="-0.5", y="-0.5"]; 11 | U [x=0, y="-1"]; 12 | Uz -> Z; 13 | Z -> W; 14 | Uw -> W; 15 | W -> U; 16 | Ux -> X; 17 | X -> W; 18 | X -> Y; 19 | Uy -> Y; 20 | Uu -> U; 21 | } 22 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.8.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Uz [x="-1", y="1.5"]; 3 | Z [x="-1", y=1]; 4 | Uw [x=0, y="0.5"]; 5 | W [x=0, y=0]; 6 | Ux [x=1, y="1.5"]; 7 | X [x=1, y=1]; 8 | Uy [x=2, y="1.5"]; 9 | Y [x=2, y=1]; 10 | Uu [x="-0.5", y="-0.5"]; 11 | U [x=0, y="-1"]; 12 | Ut [x="0.5", y="2.5"]; 13 | T [x="0.5", y=2]; 14 | Uz -> Z; 15 | Z -> W; 16 | Uw -> W; 17 | W -> U; 18 | Ux -> X; 19 | X -> W; 20 | X -> Y; 21 | Uy -> Y; 22 | Uu -> U; 23 | Ut -> T; 24 | T -> Z; 25 | T -> Y; 26 | } 27 | -------------------------------------------------------------------------------- /dags/Primer.Fig.2.9.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Z1 [x="-1", y=1]; 3 | Z3 [x=0, y=0]; 4 | X [x="-1", y="-1"]; 5 | Z2 [x=1, y=1]; 6 | Y [x=1, y="-1"]; 7 | W [x=0, y="-1"]; 8 | Z1 -> Z3; 9 | Z1 -> X; 10 | Z3 -> X; 11 | Z3 -> Y; 12 | X -> W; 13 | Z2 -> Z3; 14 | Z2 -> Y; 15 | W -> Y; 16 | } 17 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.1.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x="-1", y=1]; 3 | X [x="-1", y=0]; 4 | Uy [x=1, y=1]; 5 | Y [x=1, y=0]; 6 | Uz [x=0, y=2]; 7 | Z [x=0, y=1]; 8 | Ux -> X; 9 | Uy -> Y; 10 | Uz -> Z; 11 | Z -> X; 12 | Z -> Y; 13 | } 14 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.10b.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | Z [x="0.5", y=0]; 4 | Y [x=1, y=0]; 5 | U [x="0.5", y=1]; 6 | X -> Z; 7 | Z -> Y; 8 | U -> X; 9 | U -> Y; 10 | } 11 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.11.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | Y [x=2, y=0]; 4 | Z [x=1, y=1]; 5 | X -> Y; 6 | X -> Z; 7 | Z -> Y; 8 | } 9 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.12.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | Y [x=2, y=0]; 4 | Z [x=1, y=1]; 5 | I [x=2, y=1]; 6 | X -> Y; 7 | X -> Z; 8 | Z -> Y; 9 | I -> Y; 10 | I -> Z; 11 | } 12 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.2.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x="-1", y=0]; 3 | Uy [x=1, y=1]; 4 | Y [x=1, y=0]; 5 | Uz [x=0, y=2]; 6 | Z [x=0, y=1]; 7 | Uy -> Y; 8 | Uz -> Z; 9 | Z -> Y; 10 | } 11 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.3.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x="-1", y=1]; 3 | X [x="-1", y=0]; 4 | Uy [x=1, y=1]; 5 | Y [x=1, y=0]; 6 | Uz [x=0, y=2]; 7 | Z [x=0, y=1]; 8 | Ux -> X; 9 | X -> Y; 10 | Uy -> Y; 11 | Uz -> Z; 12 | Z -> X; 13 | Z -> Y; 14 | } 15 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.4.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | Ux [x="-1", y=1]; 3 | X [x="-1", y=0]; 4 | Uy [x=1, y=1]; 5 | Y [x=1, y=0]; 6 | Uz [x=0, y=2]; 7 | Z [x=0, y=1]; 8 | Ux -> X; 9 | X -> Y; 10 | Uy -> Y; 11 | Uz -> Z; 12 | Z -> X; 13 | Z -> Y; 14 | } 15 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.5.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x="-1", y=0]; 3 | Z [x=0, y=1]; 4 | Y [x=1, y=0]; 5 | X -> Z; 6 | X -> Y; 7 | Z -> Y; 8 | } 9 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.6.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | X [x=0, y=0]; 3 | Y [x=1, y=0]; 4 | W [x=1, y=1]; 5 | Z [x=0, y=1]; 6 | X -> Y; 7 | W -> Y; 8 | Z -> X; 9 | Z -> W; 10 | } 11 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.7.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | E [x="-1", y=2]; 3 | Z [x=1, y=1]; 4 | X [x=0, y=0]; 5 | A [x=3, y=2]; 6 | Y [x=2, y=0]; 7 | E -> Z; 8 | E -> X; 9 | Z -> X; 10 | Z -> Y; 11 | X -> Y; 12 | A -> Z; 13 | A -> Y; 14 | } 15 | -------------------------------------------------------------------------------- /dags/Primer.Fig.3.8.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | B [x="-1", y=1]; 3 | A [x="-1", y=0]; 4 | Z [x=0, y=0]; 5 | C [x=1, y=1]; 6 | D [x=1, y=0]; 7 | X [x="-1", y="-1"]; 8 | W [x=0, y="-1"]; 9 | Y [x=1, y="-1"]; 10 | B -> A; 11 | B -> Z; 12 | A -> X; 13 | Z -> X; 14 | Z -> Y; 15 | C -> Z; 16 | C -> D; 17 | D -> Y; 18 | X -> W; 19 | W -> Y; 20 | } 21 | -------------------------------------------------------------------------------- /dags/Primer.SCM.1.5.3.dot: -------------------------------------------------------------------------------- 1 | strict digraph { 2 | "$U_X$" [x="-1", y=1]; 3 | X [x="-1", y=0]; 4 | "$U_Y$" [x=0, y=1]; 5 | Y [x=0, y=0]; 6 | "$U_Z$" [x=1, y=1]; 7 | Z [x=1, y=0]; 8 | "$U_X$" -> X; 9 | X -> Y; 10 | "$U_Y$" -> Y; 11 | Y -> Z; 12 | "$U_Z$" -> Z; 13 | } 14 | -------------------------------------------------------------------------------- /data/D4Sci_logo_ball.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataForScience/Causality/f7b1abb57541776dc786174deb102ca4ab864df6/data/D4Sci_logo_ball.png -------------------------------------------------------------------------------- /data/D4Sci_logo_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataForScience/Causality/f7b1abb57541776dc786174deb102ca4ab864df6/data/D4Sci_logo_full.png -------------------------------------------------------------------------------- /data/book2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataForScience/Causality/f7b1abb57541776dc786174deb102ca4ab864df6/data/book2.jpeg -------------------------------------------------------------------------------- /data/causality.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataForScience/Causality/f7b1abb57541776dc786174deb102ca4ab864df6/data/causality.jpeg -------------------------------------------------------------------------------- /data/iris.csv: -------------------------------------------------------------------------------- 1 | sepal_length,sepal_width,petal_length,petal_width,species 2 | 5.1,3.5,1.4,0.2,setosa 3 | 4.9,3.0,1.4,0.2,setosa 4 | 4.7,3.2,1.3,0.2,setosa 5 | 4.6,3.1,1.5,0.2,setosa 6 | 5.0,3.6,1.4,0.2,setosa 7 | 5.4,3.9,1.7,0.4,setosa 8 | 4.6,3.4,1.4,0.3,setosa 9 | 5.0,3.4,1.5,0.2,setosa 10 | 4.4,2.9,1.4,0.2,setosa 11 | 4.9,3.1,1.5,0.1,setosa 12 | 5.4,3.7,1.5,0.2,setosa 13 | 4.8,3.4,1.6,0.2,setosa 14 | 4.8,3.0,1.4,0.1,setosa 15 | 4.3,3.0,1.1,0.1,setosa 16 | 5.8,4.0,1.2,0.2,setosa 17 | 5.7,4.4,1.5,0.4,setosa 18 | 5.4,3.9,1.3,0.4,setosa 19 | 5.1,3.5,1.4,0.3,setosa 20 | 5.7,3.8,1.7,0.3,setosa 21 | 5.1,3.8,1.5,0.3,setosa 22 | 5.4,3.4,1.7,0.2,setosa 23 | 5.1,3.7,1.5,0.4,setosa 24 | 4.6,3.6,1.0,0.2,setosa 25 | 5.1,3.3,1.7,0.5,setosa 26 | 4.8,3.4,1.9,0.2,setosa 27 | 5.0,3.0,1.6,0.2,setosa 28 | 5.0,3.4,1.6,0.4,setosa 29 | 5.2,3.5,1.5,0.2,setosa 30 | 5.2,3.4,1.4,0.2,setosa 31 | 4.7,3.2,1.6,0.2,setosa 32 | 4.8,3.1,1.6,0.2,setosa 33 | 5.4,3.4,1.5,0.4,setosa 34 | 5.2,4.1,1.5,0.1,setosa 35 | 5.5,4.2,1.4,0.2,setosa 36 | 4.9,3.1,1.5,0.2,setosa 37 | 5.0,3.2,1.2,0.2,setosa 38 | 5.5,3.5,1.3,0.2,setosa 39 | 4.9,3.6,1.4,0.1,setosa 40 | 4.4,3.0,1.3,0.2,setosa 41 | 5.1,3.4,1.5,0.2,setosa 42 | 5.0,3.5,1.3,0.3,setosa 43 | 4.5,2.3,1.3,0.3,setosa 44 | 4.4,3.2,1.3,0.2,setosa 45 | 5.0,3.5,1.6,0.6,setosa 46 | 5.1,3.8,1.9,0.4,setosa 47 | 4.8,3.0,1.4,0.3,setosa 48 | 5.1,3.8,1.6,0.2,setosa 49 | 4.6,3.2,1.4,0.2,setosa 50 | 5.3,3.7,1.5,0.2,setosa 51 | 5.0,3.3,1.4,0.2,setosa 52 | 7.0,3.2,4.7,1.4,versicolor 53 | 6.4,3.2,4.5,1.5,versicolor 54 | 6.9,3.1,4.9,1.5,versicolor 55 | 5.5,2.3,4.0,1.3,versicolor 56 | 6.5,2.8,4.6,1.5,versicolor 57 | 5.7,2.8,4.5,1.3,versicolor 58 | 6.3,3.3,4.7,1.6,versicolor 59 | 4.9,2.4,3.3,1.0,versicolor 60 | 6.6,2.9,4.6,1.3,versicolor 61 | 5.2,2.7,3.9,1.4,versicolor 62 | 5.0,2.0,3.5,1.0,versicolor 63 | 5.9,3.0,4.2,1.5,versicolor 64 | 6.0,2.2,4.0,1.0,versicolor 65 | 6.1,2.9,4.7,1.4,versicolor 66 | 5.6,2.9,3.6,1.3,versicolor 67 | 6.7,3.1,4.4,1.4,versicolor 68 | 5.6,3.0,4.5,1.5,versicolor 69 | 5.8,2.7,4.1,1.0,versicolor 70 | 6.2,2.2,4.5,1.5,versicolor 71 | 5.6,2.5,3.9,1.1,versicolor 72 | 5.9,3.2,4.8,1.8,versicolor 73 | 6.1,2.8,4.0,1.3,versicolor 74 | 6.3,2.5,4.9,1.5,versicolor 75 | 6.1,2.8,4.7,1.2,versicolor 76 | 6.4,2.9,4.3,1.3,versicolor 77 | 6.6,3.0,4.4,1.4,versicolor 78 | 6.8,2.8,4.8,1.4,versicolor 79 | 6.7,3.0,5.0,1.7,versicolor 80 | 6.0,2.9,4.5,1.5,versicolor 81 | 5.7,2.6,3.5,1.0,versicolor 82 | 5.5,2.4,3.8,1.1,versicolor 83 | 5.5,2.4,3.7,1.0,versicolor 84 | 5.8,2.7,3.9,1.2,versicolor 85 | 6.0,2.7,5.1,1.6,versicolor 86 | 5.4,3.0,4.5,1.5,versicolor 87 | 6.0,3.4,4.5,1.6,versicolor 88 | 6.7,3.1,4.7,1.5,versicolor 89 | 6.3,2.3,4.4,1.3,versicolor 90 | 5.6,3.0,4.1,1.3,versicolor 91 | 5.5,2.5,4.0,1.3,versicolor 92 | 5.5,2.6,4.4,1.2,versicolor 93 | 6.1,3.0,4.6,1.4,versicolor 94 | 5.8,2.6,4.0,1.2,versicolor 95 | 5.0,2.3,3.3,1.0,versicolor 96 | 5.6,2.7,4.2,1.3,versicolor 97 | 5.7,3.0,4.2,1.2,versicolor 98 | 5.7,2.9,4.2,1.3,versicolor 99 | 6.2,2.9,4.3,1.3,versicolor 100 | 5.1,2.5,3.0,1.1,versicolor 101 | 5.7,2.8,4.1,1.3,versicolor 102 | 6.3,3.3,6.0,2.5,virginica 103 | 5.8,2.7,5.1,1.9,virginica 104 | 7.1,3.0,5.9,2.1,virginica 105 | 6.3,2.9,5.6,1.8,virginica 106 | 6.5,3.0,5.8,2.2,virginica 107 | 7.6,3.0,6.6,2.1,virginica 108 | 4.9,2.5,4.5,1.7,virginica 109 | 7.3,2.9,6.3,1.8,virginica 110 | 6.7,2.5,5.8,1.8,virginica 111 | 7.2,3.6,6.1,2.5,virginica 112 | 6.5,3.2,5.1,2.0,virginica 113 | 6.4,2.7,5.3,1.9,virginica 114 | 6.8,3.0,5.5,2.1,virginica 115 | 5.7,2.5,5.0,2.0,virginica 116 | 5.8,2.8,5.1,2.4,virginica 117 | 6.4,3.2,5.3,2.3,virginica 118 | 6.5,3.0,5.5,1.8,virginica 119 | 7.7,3.8,6.7,2.2,virginica 120 | 7.7,2.6,6.9,2.3,virginica 121 | 6.0,2.2,5.0,1.5,virginica 122 | 6.9,3.2,5.7,2.3,virginica 123 | 5.6,2.8,4.9,2.0,virginica 124 | 7.7,2.8,6.7,2.0,virginica 125 | 6.3,2.7,4.9,1.8,virginica 126 | 6.7,3.3,5.7,2.1,virginica 127 | 7.2,3.2,6.0,1.8,virginica 128 | 6.2,2.8,4.8,1.8,virginica 129 | 6.1,3.0,4.9,1.8,virginica 130 | 6.4,2.8,5.6,2.1,virginica 131 | 7.2,3.0,5.8,1.6,virginica 132 | 7.4,2.8,6.1,1.9,virginica 133 | 7.9,3.8,6.4,2.0,virginica 134 | 6.4,2.8,5.6,2.2,virginica 135 | 6.3,2.8,5.1,1.5,virginica 136 | 6.1,2.6,5.6,1.4,virginica 137 | 7.7,3.0,6.1,2.3,virginica 138 | 6.3,3.4,5.6,2.4,virginica 139 | 6.4,3.1,5.5,1.8,virginica 140 | 6.0,3.0,4.8,1.8,virginica 141 | 6.9,3.1,5.4,2.1,virginica 142 | 6.7,3.1,5.6,2.4,virginica 143 | 6.9,3.1,5.1,2.3,virginica 144 | 5.8,2.7,5.1,1.9,virginica 145 | 6.8,3.2,5.9,2.3,virginica 146 | 6.7,3.3,5.7,2.5,virginica 147 | 6.7,3.0,5.2,2.3,virginica 148 | 6.3,2.5,5.0,1.9,virginica 149 | 6.5,3.0,5.2,2.0,virginica 150 | 6.2,3.4,5.4,2.3,virginica 151 | 5.9,3.0,5.1,1.8,virginica 152 | -------------------------------------------------------------------------------- /data/newsletter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DataForScience/Causality/f7b1abb57541776dc786174deb102ca4ab864df6/data/newsletter.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - matplotlib==3.1.3 3 | - networkx==2.4 4 | - numpy==1.18.1 5 | - pandas==1.0.1 6 | - requests=2.21.0 7 | - scikit-learn==0.22.1 8 | - statsmodels==0.11.1 9 | - tqdm==4.46.0 10 | - watermark==2.0.2 11 | - pygraphviz==1.5 12 | - pydot==1.4.1 --------------------------------------------------------------------------------