├── .devcontainer ├── Dockerfile ├── devcontainer.json └── noop.txt ├── .gitattributes ├── .github └── workflows │ └── docs.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTORS.md ├── LICENSE ├── README.md ├── docs ├── 01-causality-linear-simulation.ipynb ├── 02-instrument-variables.ipynb ├── 03-d-separation.ipynb ├── 04-finding-confounding-set.ipynb ├── 05-collider-effect.ipynb ├── 06-causality-identifiability.ipynb ├── 07-do-operator.ipynb └── index.md ├── environment.yml ├── mkdocs.yml └── src ├── causality_notes └── __init__.py ├── setup.py └── test_causality.py /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. 4 | #------------------------------------------------------------------------------------------------------------- 5 | 6 | FROM continuumio/miniconda3 7 | 8 | # Avoid warnings by switching to noninteractive 9 | ENV DEBIAN_FRONTEND=noninteractive 10 | 11 | # This Dockerfile adds a non-root user with sudo access. Use the "remoteUser" 12 | # property in devcontainer.json to use it. On Linux, the container user's GID/UIDs 13 | # will be updated to match your local UID/GID (when using the dockerFile property). 14 | # See https://aka.ms/vscode-remote/containers/non-root-user for details. 15 | ARG USERNAME=vscode 16 | ARG USER_UID=1000 17 | ARG USER_GID=$USER_UID 18 | 19 | # Copy environment.yml (if found) to a temp locaition so we update the environment. Also 20 | # copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists. 21 | COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/ 22 | 23 | # Configure apt and install packages 24 | RUN apt-get update \ 25 | && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \ 26 | # 27 | # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed 28 | && apt-get -y install git openssh-client less iproute2 procps iproute2 lsb-release \ 29 | # 30 | # Install pylint 31 | && /opt/conda/bin/pip install pylint \ 32 | # Install mamba 33 | && /opt/conda/bin/conda install -c conda-forge mamba \ 34 | # 35 | # Update Python environment based on environment.yml (if present) 36 | && if [ -f "/tmp/conda-tmp/environment.yml" ]; then /opt/conda/bin/mamba env update -n base -f /tmp/conda-tmp/environment.yml; fi \ 37 | && rm -rf /tmp/conda-tmp \ 38 | # 39 | # Create a non-root user to use if preferred - see https://aka.ms/vscode-remote/containers/non-root-user. 40 | && groupadd --gid $USER_GID $USERNAME \ 41 | && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ 42 | # [Optional] Add sudo support for the non-root user 43 | && apt-get install -y sudo nano emacs vim \ 44 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME\ 45 | && chmod 0440 /etc/sudoers.d/$USERNAME \ 46 | # 47 | # Clean up 48 | && apt-get autoremove -y \ 49 | && apt-get clean -y \ 50 | && rm -rf /var/lib/apt/lists/* 51 | 52 | # Switch back to dialog for any ad-hoc use of apt-get 53 | ENV DEBIAN_FRONTEND=dialog 54 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.117.1/containers/python-3-miniconda 3 | { 4 | "name": "Causality Development Container", 5 | "build": { 6 | "dockerfile": "./Dockerfile", 7 | "context": ".." 8 | }, 9 | // Set *default* container specific settings.json values on container create. 10 | "settings": { 11 | "python.pythonPath": "/opt/conda/bin/python", 12 | "python.linting.enabled": true, 13 | "python.linting.pylintEnabled": true, 14 | "python.linting.pylintPath": "/opt/conda/bin/pylint" 15 | }, 16 | // Add the IDs of extensions you want installed when the container is created. 17 | "extensions": [ 18 | "ms-python.python" 19 | ], 20 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 21 | // "forwardPorts": [ 22 | // 8000, 23 | // 8001, 24 | // 8002, 25 | // 8003, 26 | // 8004, 27 | // 8005 28 | // ], 29 | // Use 'postCreateCommand' to run commands after the container is created. 30 | "postCreateCommand": [ 31 | "pre-commit install", 32 | "cd src && python setup.py develop" 33 | ] 34 | // Uncomment to connect as a non-root user. See https://aka.ms/vscode-remote/containers/non-root. 35 | // "remoteUser": "vscode" 36 | } 37 | -------------------------------------------------------------------------------- /.devcontainer/noop.txt: -------------------------------------------------------------------------------- 1 | This file is copied into the container along with environment.yml* from the 2 | parent folder. This is done to prevent the Dockerfile COPY instruction from 3 | failing if no environment.yml is found. 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb filter=nbstripout 2 | *.ipynb diff=ipynb 3 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | build-docs: 10 | runs-on: ubuntu-20.04 11 | name: Build docs 12 | 13 | # https://github.com/marketplace/actions/setup-miniconda#use-a-default-shell 14 | defaults: 15 | run: 16 | shell: bash -l {0} 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | name: Checkout repository 21 | 22 | # See: https://github.com/marketplace/actions/setup-miniconda 23 | - name: Setup miniconda 24 | uses: conda-incubator/setup-miniconda@v2 25 | with: 26 | auto-update-conda: true 27 | miniforge-variant: Mambaforge 28 | channels: conda-forge 29 | python-version: 3.9 30 | activate-environment: causality 31 | environment-file: environment.yml 32 | use-mamba: true 33 | 34 | - name: Build docs 35 | run: | 36 | conda activate causality 37 | python -m ipykernel install --user --name causality 38 | pip install -e src/. 39 | mkdocs build 40 | 41 | - name: Deploy website 42 | uses: peaceiris/actions-gh-pages@v3 43 | with: 44 | # https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-set-personal-access-token-personal_token 45 | personal_token: ${{ secrets.GHPAGES_TOKEN }} 46 | publish_dir: ./site 47 | publish_branch: gh-pages 48 | # destination_dir: manuscript 49 | allow_empty_commit: false 50 | keep_files: false 51 | force_orphan: true 52 | enable_jekyll: false 53 | disable_nojekyll: false 54 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom 2 | .ipynb_checkpoints/* 3 | notebooks/.ipynb_checkpoints/* 4 | .pytest_cache/* 5 | 6 | # Byte-compiled / optimized / DLL files 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | env/ 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *,cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | 60 | # Sphinx documentation 61 | docs/_build/ 62 | 63 | # PyBuilder 64 | target/ 65 | *.DS_Store 66 | .DS_Store 67 | 68 | # mkdocs 69 | site/ 70 | 71 | # Custom 72 | .vscode/settings.json 73 | default.profraw 74 | *.md.tmp 75 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.0.1 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - id: debug-statements 12 | - repo: https://github.com/psf/black 13 | rev: 21.7b0 14 | hooks: 15 | - id: black 16 | - repo: https://github.com/kynan/nbstripout 17 | rev: 0.5.0 18 | hooks: 19 | - id: nbstripout 20 | files: ".ipynb" 21 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | causality contributors 2 | ============================= 3 | 4 | * **[Eric Ma](https://github.com/ericmjl)** 5 | 6 | * Creator and owner of the repository. 7 | 8 | * **[Hosein Fooladi](https://github.com/HFooladi)** 9 | 10 | * Minor modification of the notebooks 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Eric Ma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | pagetitle: ericmjl/causality 3 | --- 4 | 5 | ![](https://travis-ci.org/ericmjl/causality.svg?branch=master) 6 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ericmjl/causality/master) 7 | 8 | # causality 9 | 10 | A small repo in which I play with the ideas of causal modelling. 11 | 12 | # why this repo exists 13 | 14 | I'm interested in causal modelling; having read Judea Pearl's [The Book of Why: The New Science of Cause and Effect](https://www.amazon.com/Book-Why-Science-Cause-Effect-ebook/dp/B075CR9QBJ/ref=cm_cr_arp_d_product_top?ie=UTF8), I then followed up with Jonas Peters' [mini-course on causality](https://www.youtube.com/playlist?list=PLW01hpWnEtbTcuY0a0jhZyanHX3GPImAy). Pearl's book is a good layman's introduction to the history of causal inference research, even if mostly written from the viewpoint of one deeply invested in the field. Peters' lecture series turns out to be a great follow-up to the book. 15 | 16 | # getting started 17 | 18 | ## installation 19 | 20 | To get started, install the packages as specified in the `environment.yml` conda specification file. 21 | 22 | ``` 23 | $ conda env create -f environment.yml 24 | ``` 25 | 26 | If you prefer to install by pip, install the packages listed there manually. (They are all available on PyPI.) 27 | 28 | 29 | ## running the notebooks 30 | 31 | There are two options for running the notebooks. The first one is the simplest: just click on the Binder icon below. 32 | 33 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/ericmjl/causality/master) 34 | 35 | The second way assumes you've cloned the repository locally, or have downloaded and unzipped it from GitHub. In your terminal (or command prompt), run the following commands: 36 | 37 | ``` 38 | $ source activate causality 39 | $ jupyter lab 40 | ``` 41 | 42 | # contributing 43 | 44 | If you are an expert on causal modelling, and see issues with my notebooks, I would love to hear about them! Please post it as an [issue](https://github.com/ericmjl/causality/issues). I would also love to accept a pull request. 45 | -------------------------------------------------------------------------------- /docs/01-causality-linear-simulation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2\n", 11 | "%matplotlib inline\n", 12 | "%config InlineBackend.figure_format = 'retina'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "import networkx as nx\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "import numpy as np\n", 24 | "import numpy.random as npr\n", 25 | "import pandas as pd\n", 26 | "from causality_notes import draw_graph, noise" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Introduction\n", 34 | "\n", 35 | "In this notebook, I would like to simulate a complex causal process (with only linear relationships between the variables), but in a complex DAG that isn't just a triangle. Then, I would like to use this simulated data to convince myself that with the right conditioning on variables, we can recover the correct parameters back." 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Graphical Structure\n", 43 | "\n", 44 | "First off, let's assume that there is the following graphical structure." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "G = nx.DiGraph()\n", 54 | "G.add_edge('c', 'x', coeff=5)\n", 55 | "G.add_edge('a', 'x', coeff=2)\n", 56 | "G.add_edge('a', 'k', coeff=-3)\n", 57 | "G.add_edge('x', 'f', coeff=6)\n", 58 | "G.add_edge('x', 'd', coeff=-2)\n", 59 | "G.add_edge('d', 'g', coeff=-8)\n", 60 | "G.add_edge('k', 'y', coeff=3)\n", 61 | "G.add_edge('d', 'y', coeff=5)\n", 62 | "G.add_edge('y', 'h', coeff=-4)\n", 63 | " \n", 64 | "draw_graph(G)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Written as a set of equations, it might look like the following:" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "n = 1000 # number of samples taken\n", 81 | "\n", 82 | "c = noise(n)\n", 83 | "a = noise(n)\n", 84 | "x = 5 * c + 2 * a + noise(n)\n", 85 | "k = -3 * a + noise(n)\n", 86 | "f = 6 * x + noise(n)\n", 87 | "d = -2 * x + noise(n)\n", 88 | "g = -8 * d + noise(n)\n", 89 | "y = 3 * k + 5 * d + noise(n)\n", 90 | "h = -4 * y + noise(n)\n", 91 | "\n", 92 | "data = dict(c=c, a=a, x=x, k=k, f=f, d=d, g=g, y=y, h=h)\n", 93 | "df = pd.DataFrame(data)\n", 94 | "df.sample(5)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Note how the coefficients on the edges are basically the linear multipliers.\n", 102 | "\n", 103 | "Before we go on, let's get a feel for how the data are distributed." 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "import seaborn as sns\n", 113 | "sns.pairplot(df)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "Looking at the pair plot above, we should compute the pairwise correlation between each pair of variables." 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "plt.imshow(df.corr(method='pearson').values, cmap='RdBu')\n", 130 | "plt.xticks(range(len(df.columns)), df.columns)\n", 131 | "plt.yticks(range(len(df.columns)), df.columns)\n", 132 | "plt.colorbar()" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "Compare the graphical structure below against the correlation plots above. Some things are quite neat.\n", 140 | "\n", 141 | "- $c$ and $k$ are uncorrelated, because there is no causal path from $c$ to $k$. Same goes for $c$ and $a$.\n", 142 | "- On the other hand, $c$ is causally related to all of the other variables. It has a negative correlation with $d$ and $y$, and this comes directly from Sewall Wright's path rules: positive coeff $\\times$ negative coeff gives a negative coeff." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "draw_graph(G)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "From the graph, we know that the direct effect of $x$ on $y$ is going to be $-2 \\times 5 = -10$ (Sewall Wright's path analysis). However, if we only regress $y$ on $x$, the coefficients are going to be wrong, because we have a confounder between $x$ and $y$, primarily originating from $a$. \n", 159 | "\n", 160 | "Now, let's try naïvely regressing $y$ on $x$, given the causal structure above." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "from statsmodels.regression.linear_model import OLS\n", 170 | "\n", 171 | "model = OLS.from_formula('y ~ x', data=df)\n", 172 | "results = model.fit()\n", 173 | "results.params" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "We almost recover the correct coefficients, but because we didn't condition on the confounding path from $x$ to $y$, that is, the path `x <- a -> k -> y`. Thus, we're still off. \n", 181 | "\n", 182 | "What if we conditioned on $a$? To condition on $a$ means adding it as a term in the linear regression." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "model = OLS.from_formula('y ~ x + a', data=df)\n", 192 | "results = model.fit()\n", 193 | "results.params" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Much better! What if we conditioned on $k$ only?" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "model = OLS.from_formula('y ~ x + k', data=df)\n", 210 | "results = model.fit()\n", 211 | "results.params" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "Wonderful! We get coefficients that are much closer to -10, which is exactly what we had expected. Notice how we also recovered the effect of $a$ and $k$ respectively on $y$.\n", 219 | "\n", 220 | "One thing that is quite nice about this scheme is that if we know the causality structure ahead of time, then we need not condition on every last variable. We needn't even condition on every single variable on the confounding path; conditioning on a single variable in each confounding path is sufficient. \n", 221 | "\n", 222 | "This property comes in handy in scenarios where we don't have perfect information: if we weren't able to measure $a$, or just forgot to measure it, $k$ is a sufficiently good variable to condition on.\n", 223 | "\n", 224 | "What would happen if we conditioned on a variable that wasn't involved in the causal path from $x$ to $y$? Let's try conditioning on $g$." 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "model = OLS.from_formula('y ~ x + g', data=df)\n", 234 | "results = model.fit()\n", 235 | "results.params" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "We are way off! This is because $g$ is not a confounder of $x$ and $y$, therefore, conditioning on it is the wrong thing to do." 243 | ] 244 | } 245 | ], 246 | "metadata": { 247 | "kernelspec": { 248 | "display_name": "Python 3", 249 | "language": "python", 250 | "name": "python3" 251 | }, 252 | "language_info": { 253 | "codemirror_mode": { 254 | "name": "ipython", 255 | "version": 3 256 | }, 257 | "file_extension": ".py", 258 | "mimetype": "text/x-python", 259 | "name": "python", 260 | "nbconvert_exporter": "python", 261 | "pygments_lexer": "ipython3", 262 | "version": "3.9.9" 263 | } 264 | }, 265 | "nbformat": 4, 266 | "nbformat_minor": 2 267 | } 268 | -------------------------------------------------------------------------------- /docs/02-instrument-variables.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import networkx as nx\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import pandas as pd\n", 13 | "from causality_notes import noise, draw_graph\n", 14 | "from statsmodels.regression.linear_model import OLS\n", 15 | "\n", 16 | "%load_ext autoreload\n", 17 | "%autoreload 2\n", 18 | "%matplotlib inline\n", 19 | "%config InlineBackend.figure_format = 'retina'" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "# Introduction\n", 27 | "\n", 28 | "This notebook serves to introduce the use of instrument variables, which can be used in **linear** models to figure out the effect of `x` on `y` in the absence of the ability to measure the known confounder.\n", 29 | "\n", 30 | "Note: The method here was introduced to me via Jonas Peters' videos. I believe the assumption of linearity is a strong one: we must assume linear models, otherwise the math doesn't check out." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Model\n", 38 | "\n", 39 | "Assume we have the following model, specified via linear equations in NumPy code:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# Set up model with hidden coefficients (pretend that we know them for \n", 49 | "# now) and instrument variable.\n", 50 | "size = 1000\n", 51 | "\n", 52 | "gamma = 2\n", 53 | "alpha = 3\n", 54 | "beta = 1\n", 55 | "delta = -1\n", 56 | "\n", 57 | "h = 2 * noise(size) # the hidden, unmeasured variable.\n", 58 | "i = 4 * noise(size) # the instrument variable\n", 59 | "x = delta * i + beta * h + 2 * noise(size)\n", 60 | "y = gamma * h + alpha * x + 3 * noise(size)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "Graphically, this looks like the following:" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "G = nx.DiGraph()\n", 77 | "G.add_edge('x', 'y', coeff=alpha)\n", 78 | "G.add_edge('h', 'y', coeff=gamma)\n", 79 | "G.add_edge('h', 'x', coeff=beta)\n", 80 | "G.add_edge('i', 'x', coeff=delta)\n", 81 | "\n", 82 | "draw_graph(G)" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "If we were to regress $y$ directly on $x$, we would run into issues: Because we didn't measure the confounder variable $h$ (believe me for a moment that this is assumed to be true), our coefficients will be way off.\n", 90 | "\n", 91 | "To show this, first, let's create the `pandas` DataFrame that will be used with `statsmodels`." 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "data = dict(x=x, i=i, h=h, y=y)\n", 101 | "df = pd.DataFrame(data)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "Now, we regress $y$ on $x$, and let's observe the output to see how good of a fit it is." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "model = OLS.from_formula('y ~ x', data=df)\n", 118 | "results = model.fit()\n", 119 | "results.params" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "We're close, but not really there. (Remember, though, we wouldn't know this in a real-world scenario, where we might have postulated the presence of a hidden variable but didn't have the know-how to go out and measure it.)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "In the real-world scenario above, we might want to see if there's an **instrument variable** to help us out with this problem.\n", 134 | "\n", 135 | "The use of an instrumental variable works as such: \n", 136 | "We first regress the instrument variable $x$ on $i$, \n", 137 | "to obtain 'estimated' values of $\\delta i$. \n", 138 | "We then regress $y$ on $\\delta i$, which gives us the coefficient $alpha$.\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": {}, 144 | "source": [ 145 | "\n", 146 | "Don't believe me? Look at the math below:\n", 147 | "\n", 148 | "As a structured causal equation, the graphical model can be expressed as such:\n", 149 | "\n", 150 | "$$y = \\alpha x + \\gamma h + n_{y}$$\n", 151 | "$$x = \\delta i + \\beta h + n_{x}$$\n", 152 | "\n", 153 | "where $n_{x}$ is the noise that generates uncertainty around $x$, and $n_{y}$ is the noise that generates uncertainty around $y$.\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "\n", 161 | "Substuting $x$ into the $y$ (don't take my word for it, try it yourself!), we get:\n", 162 | "\n", 163 | "$$y = (\\alpha \\beta + \\gamma) h + (\\alpha) (\\delta i) + \\alpha n_{x} + n_{y}$$\n" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "\n", 171 | "The parentheses have been rearranged intentionally for the variable $i$. If we regress $x$ on $i$, we will get an estimate for the value of $\\delta$. By then multiplying $\\delta$ by $i$, we will get \"fitted\" values of $i$. We can then regress $y$ on $\\delta i$ to get the value of $\\alpha$, which is exactly what we want!\n", 172 | "\n", 173 | "Enough in words, let's look at the code for this!" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "## Mechanics \n", 181 | "\n", 182 | "First, we regress $x$ on $i$." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "model = OLS.from_formula('x ~ i', data=df)\n", 192 | "results = model.fit()\n", 193 | "results.params['i']" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Notice how we get an estimator that is kind of off. It isn't quite accurate, but my gut feeling tells me that's ok.\n", 201 | "\n", 202 | "To create the fitted $i$, we multiply the learned regression parameter by the original values." 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "df['δi'] = df['i'] * results.params['i']" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "Then, we regress $y$ on $\\delta i$:" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "model = OLS.from_formula('y ~ δi', data=df)\n", 228 | "results = model.fit()\n", 229 | "results.params" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "Voila! We get back the effect of $x$ on $y$ by use of this instrument variable $i$! \n", 237 | "\n", 238 | "Really happy having seen that it works, and having seen some of the math that goes on behind it!\n", 239 | "\n", 240 | "## Assumptions\n", 241 | "\n", 242 | "Now, all of this sounds good and nice, but it does seem a bit \"magical\", to say the least. After all, \"linearity\" does seem like a very strong assumption. Moreover, in order to use an instrument variable, we have to justify that it has:\n", 243 | "\n", 244 | "1. a causal effect on $x$,\n", 245 | "2. no causal effect on $y$, and\n", 246 | "3. no causal effect on $h$\n", 247 | "\n", 248 | "Indeed, there is no free lunch here: we have to use background knowledge (or other means) to justify why $i$ is a suitable instrument variable; simply asserting this point is not sufficient." 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "## Acknowledgments\n", 256 | "\n", 257 | "I would like to thank [Maxmillian Gobel](https://www.maximiliangoebel.com)\n", 258 | "for pointing out linguistic inconsistencies in this notebook and raising them with me." 259 | ] 260 | } 261 | ], 262 | "metadata": { 263 | "kernelspec": { 264 | "display_name": "Python 3.9.9 ('base')", 265 | "language": "python", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "codemirror_mode": { 270 | "name": "ipython", 271 | "version": 3 272 | }, 273 | "file_extension": ".py", 274 | "mimetype": "text/x-python", 275 | "name": "python", 276 | "nbconvert_exporter": "python", 277 | "pygments_lexer": "ipython3", 278 | "version": "3.9.9" 279 | }, 280 | "vscode": { 281 | "interpreter": { 282 | "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" 283 | } 284 | } 285 | }, 286 | "nbformat": 4, 287 | "nbformat_minor": 2 288 | } 289 | -------------------------------------------------------------------------------- /docs/03-d-separation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# $d$-separation\n", 8 | "\n", 9 | "$d$-separation is a key concept in causal inference. \n", 10 | "\n", 11 | "*Why is $d$-separation important?* Looking at [this page](http://bayes.cs.ucla.edu/BOOK-2K/d-sep.html) (by Pearl himself):\n", 12 | "\n", 13 | "> $d$-separation is a criterion for deciding, from a given a causal graph, whether a set $X$ of variables is independent of another set $Y$, given a third set $S$. (I modified that last symbol for consistency here.)" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "%load_ext autoreload\n", 23 | "%autoreload 2\n", 24 | "%matplotlib inline\n", 25 | "%config InlineBackend.figure_format = 'retina'\n", 26 | "\n", 27 | "import networkx as nx\n", 28 | "from causality_notes import draw_graph" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "To get into $d$-separation, we first have to understand **paths** and **blocked paths**.\n", 36 | "\n", 37 | "One thing that I didn't grok immediately when reading Judea Pearl's book on Causality was that the difference between a **path** and a **directed path**. For me, coming in with a fairly shallow network science background, I could not see how \"a path\" could be traced from $A$ to $D$ in the following graph:\n", 38 | "\n", 39 | "$$A \\rightarrow B \\leftarrow C \\rightarrow D$$\n", 40 | "\n", 41 | "By simply traversing the graph from A, we can only ever arrive at B... right?\n", 42 | "\n", 43 | "Yes, but only if we accept the traditional \"network science\" definitions of a path. In causal inference, a path is just any undirected connection between variables; a directed path, on the other hand, has to follow the directions on the edges. Therefore, we have to consider the \"undirected\" version of the graph:\n", 44 | "\n", 45 | "$$A - B - C - D$$\n", 46 | "\n", 47 | "Mechanically, what we are doing when we finding out whether two nodes $n_1$ and $n_2$ are $d$-separated or not is to first start with the undirected version of the causal graph, then find every single path between the two nodes in the undirected graph, then see if there are blockers to independence between $n_1$ and $n_2$ in the directed version of the graph (as determined by three rules, which I will go through below).\n", 48 | "\n", 49 | "This notebook is going to be structured as a hybrid between \"my notes from Jonas Peters' lecture series\" and \"an exercise in implementing algorithms\" related to $d$-separation and inferring causal structures from observational data (under certain assumptions)." 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Let's say we have the following causal structure:" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "G = nx.DiGraph()\n", 66 | "G.add_edge('x2', 'x1')\n", 67 | "G.add_edge('x3', 'x1')\n", 68 | "G.add_edge('x4', 'x3')\n", 69 | "G.add_edge('x4', 'x5')\n", 70 | "G.add_edge('x1', 'x5')\n", 71 | "draw_graph(G)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "There are some definitions that we have to get clear with.\n", 79 | "\n", 80 | "- **Path:** Convert the graph to an undirected graph. Then ask if there is a connection between the two nodes or not.\n", 81 | "- **Directed Path:** Follow the arrows!\n", 82 | "- **V-structure:** An example in the above graph is $x_1$: it has two parents, $x_3$ and $x_2$ which are not connected by an arrow to each other." 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "From this, we then get to the definition of $d$-separation: Two nodes $x_i$ and $x_j$ are $d$-separated by the node set $S$ if all paths between $x_i$ and $x_j$ are blocked by the node set $S$.\n", 90 | "\n", 91 | "We also call node set $S$ the \"conditioning set\".\n", 92 | "\n", 93 | "There are three rules to determine this. For each node $n$ in $S$, we check whether it looks like the following:\n", 94 | "\n", 95 | "1. $\\rightarrow n \\rightarrow$, where $n$ is in the conditioning set $S$,\n", 96 | "1. $\\leftarrow n \\rightarrow$, where $n$ is in the conditioning set $S$\n", 97 | "1. $\\rightarrow n \\leftarrow$, where $n$ is **not** in the conditioning set $S$ (these are the **v-structures**).\n", 98 | "\n", 99 | "There is a supplemental rule:\n", 100 | "\n", 101 | "1. If $n$ has a descendant that is in $S$, and $n$ is **not** in $S$, then then $x_i$ and $x_j$ are **not** $d$-separated.\n", 102 | "\n", 103 | "(recall: don't follow the arrows, as we're not referring to directed paths)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "draw_graph(G)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## Example 1\n", 120 | "\n", 121 | "Anchoring ourself in the above example, let's ask if $x_2$ and $x_5$ are $d$-separated by the node set $S = \\{x_1, x_4\\}$. \n", 122 | "\n", 123 | "1. $x_1$ lies on the path from $x_2$ to $x_5$, and looks like Rule \\#1.\n", 124 | "1. $x_4$ lies on the path from $x_2$ to $x_5$ (the path is $x_2 \\rightarrow x_1 \\leftarrow x_3 \\leftarrow x_4 \\rightarrow x_5$), and looks like Rule \\#2.\n", 125 | "\n", 126 | "Therefore, by rules \\#1 and \\#2, $\\{x_2, x_5\\}$ are $d$-separated by $S = \\{x_1, x_4\\}$." 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "draw_graph(G)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "## Example 2\n", 143 | "\n", 144 | "Let's ask if $x_1$ and $x_4$ are $d$-separated by the node set $S = \\{x_2, x_3\\}$.\n", 145 | "\n", 146 | "1. $x_2$ does not lie on a causal path from $x_1$ to $x_4$.\n", 147 | "1. $x_3$ lies on a causal path from $x_1$ to $x_4$ (the path is $x_1 \\leftarrow x_3 \\leftarrow x_4$), and looks like Rule \\#1.\n", 148 | "1. The other path from $x_1$ to $x_4$ is $x_1 \\rightarrow x_5 \\leftarrow x_4$, and $x_5$ is not in the node set $S$, therefore this looks like Rule \\#3.\n", 149 | "\n", 150 | "Therefore, by rules \\#1 and \\#3, $\\{x_1, x_4\\}$ are $d$-separated by $S = \\{x_2, x_3\\}$." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "draw_graph(G)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "## Example 3\n", 167 | "\n", 168 | "Finally, let's ask if $x_2$ and $x_4$ are $d$-separated by the node set $S = \\{\\}$.\n", 169 | "\n", 170 | "There are two sets of paths to go from $x_2$ to $x_4$:\n", 171 | "\n", 172 | "1. $x_2 \\rightarrow x_1 \\rightarrow x_5 \\leftarrow x_4$\n", 173 | "1. $x_2 \\rightarrow x_1 \\leftarrow x_3 \\leftarrow x_4$\n", 174 | "\n", 175 | "In both cases, $x_1$ is not in node set $S=\\{\\}$, and looks like Rule \\#3.\n", 176 | "\n", 177 | "Therefore, by rule \\#3, $x_2$ and $x_4$ are $d$-separated by $S = \\{\\}$." 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "# Algorithm\n", 185 | "\n", 186 | "From the above examples, I think I have a way of writing an algorithm that can automatically check for $d$-separation.\n", 187 | "\n", 188 | "Firstly, we have to define the three rules as functions." 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": {}, 195 | "outputs": [], 196 | "source": [ 197 | "from causality_notes import rule1, rule2, rule3" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# rule1??" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": {}, 213 | "outputs": [], 214 | "source": [ 215 | "# rule2??" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "# rule3??" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "Then, we define the $d$-separation algorithm.\n", 232 | "\n", 233 | "_Read through the code and comments to learn what's going on!_" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "def d_separation(n1, n2, S, G: nx.DiGraph):\n", 243 | " \"\"\"\n", 244 | " Checks whether nodes n1 and n2 are d-separated by the set S.\n", 245 | " \n", 246 | " :param n1: A node in the graph G.\n", 247 | " :param n2: A node in the graph G.\n", 248 | " :param S: The conditioning set of interest.\n", 249 | " :param G: A NetworkX directed graph.\n", 250 | " :returns: (bool) dsep.\n", 251 | " \"\"\"\n", 252 | " # Defensive programming checks.\n", 253 | " def error(n):\n", 254 | " \"\"\"Custom error message for assertions below.\"\"\"\n", 255 | " return f\"node {n} not in graph G\"\n", 256 | " \n", 257 | " assert n1 in G.nodes(), error(n1)\n", 258 | " assert n2 in G.nodes(), error(n2)\n", 259 | " for n in S:\n", 260 | " assert n in G.nodes(), error(n)\n", 261 | " \n", 262 | " # First, we hold an undirected copy of the graph.\n", 263 | " Gpath = G.to_undirected()\n", 264 | " \n", 265 | " # Next, we check whether there is a path from node n1 to n2.\n", 266 | " assert nx.has_path(Gpath, n1, n2)\n", 267 | " \n", 268 | " # Next, we iterate over each path between n1 and n2, and check for the three rules.\n", 269 | " #\n", 270 | " # Any one of the three rules has to be fulfilled on a path for the path to be\n", 271 | " # blocked by the set S.\n", 272 | " #\n", 273 | " # However, blocking must occur on all paths, otherwise, the two nodes n1 and n2 are\n", 274 | " # not d-separated.\n", 275 | " paths_blocked = []\n", 276 | " for path in nx.all_simple_paths(G.to_undirected(), n1, n2):\n", 277 | " is_blocked = False\n", 278 | " for node in path:\n", 279 | " if node is not n1 and node is not n2:\n", 280 | " pass1 = rule1(node, S, G, path) \n", 281 | " pass2 = rule2(node, S, G, path)\n", 282 | " pass3 = rule3(node, S, G, path)\n", 283 | " if (pass1 or pass2 or pass3):\n", 284 | " is_blocked = True\n", 285 | " paths_blocked.append(is_blocked)\n", 286 | " return all(paths_blocked)\n", 287 | " " 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "Finally, let's run the test cases.\n", 295 | "\n", 296 | "From the examples above, $x_2$ and $x_5$ are $d$-separated by $\\{x_1, x_4\\}$:" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "d_separation('x2', 'x5', set(['x1', 'x4']), G)" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "Also, $x_1$ and $x_4$ are $d$-separated by $\\{x_2, x_3\\}$:" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "d_separation('x1', 'x4', set(['x2', 'x3']), G)" 322 | ] 323 | }, 324 | { 325 | "cell_type": "markdown", 326 | "metadata": {}, 327 | "source": [ 328 | "Finally, $x_2$ and $x_4$ are $d$-separated by $\\{\\}$ (an empty set of nodes):" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "metadata": {}, 335 | "outputs": [], 336 | "source": [ 337 | "d_separation('x2', 'x4', set([]), G)" 338 | ] 339 | }, 340 | { 341 | "cell_type": "markdown", 342 | "metadata": {}, 343 | "source": [ 344 | "Woohoo! \n", 345 | "\n", 346 | "The hard part about doing this manually is that it's difficult to manually enumerate all simple paths between two nodes on a graph. Like, tracing it and keeping it in memory is difficult. But implementing the rules as an algorithm helps.\n", 347 | "\n", 348 | "A few more tests: Edges should not be $d$-separated." 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "for n1, n2 in G.edges():\n", 358 | " assert not d_separation(n1, n2, set([]), G)" 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "## Example 4\n", 366 | "\n", 367 | "Let's try a different causal graph, $G2$, which is part of [Example 3 in Pearl's $d$-separation without tears](http://bayes.cs.ucla.edu/BOOK-2K/d-sep.html)." 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "import matplotlib.pyplot as plt\n", 377 | "\n", 378 | "G2 = nx.DiGraph()\n", 379 | "edges = ['xr', 'rw', 'rs', 'st', 'tp', 'ut', 'vu', 'vq', 'vy']\n", 380 | "edges = [(f'{i[0]}', f'{i[1]}') for i in edges]\n", 381 | "G2.add_edges_from(edges)\n", 382 | "\n", 383 | "fig = plt.figure()\n", 384 | "ax = fig.add_subplot(111)\n", 385 | "pos = {'x': (1, 0), 'r': (2, 0), 's': (3, 0), 't': (4, 0),\n", 386 | " 'u': (5, 0), 'v': (6, 0), 'y': (7, 0), 'w': (2, -1),\n", 387 | " 'p' : (4, -1), 'q': (6, -1)}\n", 388 | "# pos = nx.spring_layout(G2)\n", 389 | "nx.draw(G2, pos=pos, with_labels=True, ax=ax)\n", 390 | "ax.set_aspect('equal')\n", 391 | "ax.set_ylim(-2, 1)\n", 392 | "ax.set_xlim(-1, 8)\n", 393 | "plt.tight_layout()" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "In Pearl's page, he sets up a hypothetical regression of $y$ on $p$, $r$ and $x$:\n", 401 | "\n", 402 | "$$y = c_1p + c_2r + c_3x$$\n", 403 | "\n", 404 | "*A priori*, it is possible to know which regression coefficient is going to be zero, if we know the causal graph and assume that the relationship is linear.\n", 405 | "\n", 406 | "To check whether $c_3$ will be zero, we ask whether $y$ and $x$ are $d$-separated by $\\{p, r\\}$:" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "d_separation('x', 'y', set(['r', 'p']), G2)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "To check whether $c_1$ will be zero, we ask whether $y$ and $p$ are $d$-separated by $\\{r, x\\}$:" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": null, 428 | "metadata": {}, 429 | "outputs": [], 430 | "source": [ 431 | "d_separation('p', 'y', set(['x', 'r']), G2)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "To check whether $c_2$ will be zero, we ask whether $y$ and $r$ are $d$-separated by $\\{x, p\\}$:" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": null, 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "d_separation('r', 'y', set(['x', 'p']), G2)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "metadata": {}, 453 | "source": [ 454 | "$y$ and $r$ are not $d$-separated (i.e. they are $d$-connected), because $t$, which is a collider (and would originally have blocked the path), has a descendant $p$ that is part of the conditioning set." 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": {}, 460 | "source": [ 461 | "So, why is this important? It allows us to state a thing called the \"Markov condition\":\n", 462 | "\n", 463 | "> The joint probability distribution $P$ between two variables $x_i$ and $x_j$ is Markov w.r.t. the graph $G$ if, for the conditioning set $S$:\n", 464 | ">\n", 465 | "> $x_i$ and $x_j$ are $d$-separated by $S$ in $G$.\n", 466 | ">\n", 467 | "> $\\implies$ (implies) $x_i$ is conditionally independent of $x_j$, conditioned on $S$." 468 | ] 469 | } 470 | ], 471 | "metadata": { 472 | "kernelspec": { 473 | "display_name": "Python 3", 474 | "language": "python", 475 | "name": "python3" 476 | }, 477 | "language_info": { 478 | "codemirror_mode": { 479 | "name": "ipython", 480 | "version": 3 481 | }, 482 | "file_extension": ".py", 483 | "mimetype": "text/x-python", 484 | "name": "python", 485 | "nbconvert_exporter": "python", 486 | "pygments_lexer": "ipython3", 487 | "version": "3.8.2-final" 488 | } 489 | }, 490 | "nbformat": 4, 491 | "nbformat_minor": 2 492 | } 493 | -------------------------------------------------------------------------------- /docs/04-finding-confounding-set.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "Following up on $d$-separation, my colleagues and I chatted about how to find the confounding set of variables in a causal graph. This is another graph search problem. Let's see how this can be applied." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from causality_notes import rule1, rule2, rule3, path_nodes\n", 19 | "import networkx as nx\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "\n", 22 | "%load_ext autoreload\n", 23 | "%autoreload 2\n", 24 | "%matplotlib inline\n", 25 | "%config InlineBackend.figure_format = 'retina'" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "From Judea Pearl's book, there is a diagram in chapter 4, `Figure 4.7`. Let's reproduce it here." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "G = nx.DiGraph()\n", 42 | "edges = [\n", 43 | " ('D', 'A'), ('D', 'C'), ('F', 'C'),\n", 44 | " ('A', 'B'), ('C', 'B'), ('C', 'Y'),\n", 45 | " ('F', 'X'), ('F', 'Y'), ('C', 'E'),\n", 46 | " ('A', 'X'), ('E', 'X'), ('E', 'Y'),\n", 47 | " ('B', 'X'), ('X', 'Y'), ('G', 'X'), \n", 48 | " ('G', 'Y')\n", 49 | "]\n", 50 | "G.add_edges_from(edges)\n", 51 | "pos = {\n", 52 | " 'D': (0, 0),\n", 53 | " 'A': (1, 0.5),\n", 54 | " 'C': (1, -1),\n", 55 | " 'F': (1, -2),\n", 56 | " 'B': (2, -0.3),\n", 57 | " 'E': (2, 1),\n", 58 | " 'X': (4, 0.5),\n", 59 | " 'G': (4.5, -2),\n", 60 | " 'Y': (5, 0.5)\n", 61 | "}\n", 62 | "nx.draw(G, pos=pos, with_labels=True)" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "To reveal the answer, the minimum confounding set is $\\{A, B, E, F, G\\}$." 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "What we would like to know is what is the set of confounders that we need to control for in order to correctly estimate the effect of $X$ on $Y$.\n", 77 | "\n", 78 | "To do this, we use the following logic:\n", 79 | "\n", 80 | "1. Find all undirected paths between $X$ and $Y$.\n", 81 | "1. Traverse each node in the undirected paths. \n", 82 | "1. Check to see if, in the directed graph, the node blocks the path between $X$ and $Y$ if it were in the conditioning set.\n", 83 | " 1. If yes, then it should be included as a confounder. Break out and continue on to next path.\n", 84 | " 1. If no, it should not be included as a confounder." 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "Gpath = G.to_undirected()\n", 94 | "\n", 95 | "confounders = set()\n", 96 | "n1 = 'X'\n", 97 | "n2 = 'Y'\n", 98 | "for i, path in enumerate(nx.all_simple_paths(Gpath, n1, n2)):\n", 99 | " for n in path:\n", 100 | " if n is not n1 and n is not n2:\n", 101 | " pass1 = rule1(n, [n], G, path)\n", 102 | " pass2 = rule2(n, [n], G, path)\n", 103 | " pass3 = rule3(n, [], G, path)\n", 104 | " if pass1 or pass2 or pass3:\n", 105 | " confounders.add(n)\n", 106 | " # We break, because as soon as we find a good\n", 107 | " # blocking node, there is no need to continue\n", 108 | " # looking at other nodes.\n", 109 | " break\n", 110 | "confounders" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "We did it!" 118 | ] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "causality", 124 | "language": "python", 125 | "name": "causality" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 3 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython3", 137 | "version": "3.6.6" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 2 142 | } 143 | -------------------------------------------------------------------------------- /docs/05-collider-effect.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "In this notebook, we will take a quick look at the \"collider\" effect.\n", 10 | "\n", 11 | "Let's say we have the following causal graph:\n", 12 | "\n", 13 | "$$a \\rightarrow b \\leftarrow c$$\n", 14 | "\n", 15 | "Apparently, if we \"condition\" on $b$, then $a$ and $c$ will be correlated, even though they are independent." 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": null, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "import numpy as np\n", 25 | "from causality_notes import noise\n", 26 | "import pandas as pd\n", 27 | "import seaborn as sns\n", 28 | "\n", 29 | "%load_ext autoreload\n", 30 | "%autoreload 2\n", 31 | "%matplotlib inline\n", 32 | "%config InlineBackend.figure_format = 'retina'" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Generate Data\n", 40 | "\n", 41 | "Let's assume we have a causal model that follows the equations below:\n", 42 | "\n", 43 | "$$a \\sim N(0, 1)$$\n", 44 | "$$c \\sim N(0, 1)$$\n", 45 | "$$b = 20a - 20c$$\n", 46 | "\n", 47 | "This is expressed in the code below." 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "size = 1000\n", 57 | "a = noise(size)\n", 58 | "c = noise(size)\n", 59 | "b = 20*a - 20*c + noise(size)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "We now make it into a pandas DataFrame." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "df = pd.DataFrame({'a': a, 'b': b, 'c': c})" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Let's view a pair plot to see the pairwise correlation (dependency) between the variables." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "sns.pairplot(df)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "Ok, as shown in the causal graph, $a$ and $c$ are independent of one another, and so distributionally, there's no trend between them." 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# Conditioning\n", 106 | "\n", 107 | "When we \"condition\" on a variable, remember that we are essentially taking a \"slice\" of a variable, and seeing what the distributions for the other variables are. I illustrated this on [my blog](https://ericmjl.github.io/blog/2018/8/7/joint-conditional-and-marginal-probability-distributions/).\n", 108 | "\n", 109 | "In our problem, this means that we have to slice out a range of the values of $b$:" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "df_new = df[(df['b'] < df['b'].mean()) & (df['b'] > np.percentile(df['b'], 25))]" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Now, let's visualize the relationship between $a$ and $c$, now conditioned on $b$." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "ax = df_new.plot(kind='scatter', x='a', y='c')\n", 135 | "ax.set_aspect('equal')\n", 136 | "ax.set_title('conditioned on b')" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "We can also look at the full joint distribution of $a$ and $c$, colouring $b$ to illustrate what would happen if we conditioned on particular values of $b$." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "ax = sns.scatterplot(data=df, x='a', y='c', hue='b')\n", 153 | "ax.set_aspect('equal')" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# Conclusion\n", 161 | "\n", 162 | "Here, we see that in a collider situation, if we condition on the child variable, the parents will be unduly correlated." 163 | ] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Python 3", 169 | "language": "python", 170 | "name": "python3" 171 | }, 172 | "language_info": { 173 | "codemirror_mode": { 174 | "name": "ipython", 175 | "version": 3 176 | }, 177 | "file_extension": ".py", 178 | "mimetype": "text/x-python", 179 | "name": "python", 180 | "nbconvert_exporter": "python", 181 | "pygments_lexer": "ipython3", 182 | "version": "3.7.1" 183 | } 184 | }, 185 | "nbformat": 4, 186 | "nbformat_minor": 2 187 | } 188 | -------------------------------------------------------------------------------- /docs/06-causality-identifiability.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Directional Identifiability\n", 8 | "\n", 9 | "From Jonas Peters' lecture 3 on causality at the Broad.\n", 10 | "\n", 11 | "If we have a presumed causal model of the linear form:\n", 12 | "\n", 13 | "$$Y = \\alpha X + N_y$$\n", 14 | "\n", 15 | "where $N_y$ is i.i.d. noise in $Y$, and $X$ and $N_y$ are both independent and non-Gaussian, then we cannot find\n", 16 | "\n", 17 | "$$X = \\beta Y + N_x$$\n", 18 | "\n", 19 | "where $N_x$ is i.i.d. noise in $X$ that also satisfies the independence constraints. \n", 20 | "\n", 21 | "In simpler words, if we assume that the distributions of $X$ and $N_y$ are non-Gaussian, then we will know that the causal model goes from $X \\rightarrow Y$ and not $Y \\rightarrow X$.\n", 22 | "\n", 23 | "Let's simulate this." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numpy as np\n", 33 | "import matplotlib.pyplot as plt\n", 34 | "\n", 35 | "%load_ext autoreload\n", 36 | "%autoreload 2\n", 37 | "%matplotlib inline\n", 38 | "%config InlineBackend.figure_format = 'retina'" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "Firstly, we will generate non-Gaussian Xs and Ys." 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "X_ng = np.random.uniform(-1, 1, size=1000)\n", 55 | "alpha = 2\n", 56 | "N_y_ng = np.random.uniform(-0.4, 0.4, size=1000)\n", 57 | "y_ng = alpha * X_ng + N_y_ng" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Now, let's plot Y against X." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "plt.scatter(X_ng, y_ng)\n", 74 | "plt.ylabel(\"Y\")\n", 75 | "plt.xlabel(\"X\")\n", 76 | "plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Now, let's also simulate the case where $X$ and $N_y$ are Gaussian-distributed and independent." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "X_g = np.random.normal(0, 0.5, size=1000)\n", 93 | "alpha_g = 2\n", 94 | "N_y_g = np.random.normal(0, 1, size=1000)\n", 95 | "y_g = alpha_g * X_g + N_y_g" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [ 104 | "plt.scatter(X_g, y_g)\n", 105 | "plt.xlabel(\"X\")\n", 106 | "plt.ylabel(\"Y\")\n", 107 | "plt.show()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "We will now fit X as a function of Y, and do a residual analysis to see whether our residuals (i.e. noise) are independent of the input (in this case Y). Remember, we are looking to check that the inverse condition holds." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "from sklearn.linear_model import LinearRegression\n", 124 | "lm = LinearRegression()" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "Firstly, we X as function of Y and obtain a coefficient." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "lm.fit(y_g.reshape(-1, 1), X_g)\n", 141 | "coeff_g = lm.coef_" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "We then do the same for the non-Gaussian case." 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "lm.fit(y_ng.reshape(-1, 1), X_ng)\n", 158 | "coeff_ng = lm.coef_" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "Great! Now that we have the coefficients out, let's move on to the analysis of residuals. We will be checking that the residuals ($X - \\beta Y$) should be independent of $Y$." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "First off, the Gaussian case." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [ 181 | "residuals_g = X_g - coeff_g * y_g\n", 182 | "plt.scatter(y_g, residuals_g)\n", 183 | "plt.xlabel(\"Y\")\n", 184 | "plt.ylabel(\"residual\")\n", 185 | "plt.title(\"Gaussian\")\n", 186 | "plt.show()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "We see that there is no trend in the residuals." 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "metadata": {}, 199 | "source": [ 200 | "Now, the non-gaussian case." 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "residuals_ng = X_ng - coeff_ng * y_ng\n", 210 | "plt.scatter(y_ng, residuals_ng)\n", 211 | "plt.xlabel(\"Y\")\n", 212 | "plt.ylabel(\"residuals\")\n", 213 | "plt.title(\"non-Gaussian\")\n", 214 | "plt.show()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "We see that there is a clear trend - residual depends on the value of y in the non-Gaussian case, whereas it does not in the Gaussian case.\n", 222 | "\n", 223 | "This empirical simulation illustrates how we cannot recover an inverse model where the noise in X ($N_x$) is independent of the value of $Y$. Hence, we have an **identifiable** model under non-Gaussian assumptions." 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "Python 3", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.7.1" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 2 248 | } 249 | -------------------------------------------------------------------------------- /docs/07-do-operator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The Do-Operator and Bayesian Models\n", 8 | "\n", 9 | "The do-operator is a foundational idea in causal methods,\n", 10 | "used to express, in precise mathematical language,\n", 11 | "what counterfactual interventions would look like.\n", 12 | "The do-operator from causal inference has a tight connection to probabilistic modelling.\n", 13 | "But what exactly is that connection?\n", 14 | "\n", 15 | "Having myself been previously confused about the link between graphical models,\n", 16 | "causal structure, and more,\n", 17 | "here's my current understanding of the connection.\n", 18 | "What I hope you'll see here is that the do-operator\n", 19 | "gives us the ability to simulate what an experiment might look like...\n", 20 | "without actually doing the experiment,\n", 21 | "and that at its implementation core,\n", 22 | "it ends up being nothing more than \"set this thing to a particular value\".\n", 23 | "If you're curious now, it's time to read on :)." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "## Causal models and Bayesian models, in brief\n", 31 | "\n", 32 | "Let’s start first with a brief overview of the connection between \n", 33 | "causal models and Bayesian models. \n", 34 | "In doing [my own study on causality](https://github.com/ericmjl/causality), \n", 35 | "it soon became clear to me that causal models \n", 36 | "can be read off from mathematical equations quite easily. \n", 37 | "For a moment, let's assume we have the following set of equations \n", 38 | "that we presume to describe some data that we observe:" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "\n", 46 | "$$b = \\delta d + \\epsilon e + \\sigma_b$$\n", 47 | "\n", 48 | "and \n", 49 | "\n", 50 | "$$a = \\beta b + \\gamma c + \\sigma_a$$\n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "\n", 58 | "My convention here is that the English letters represent data, \n", 59 | "while the Greek letters represent parameters of the model.\n" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "To make this abstract example a bit more concrete,\n", 67 | "let's connect these equations to something in real life,\n", 68 | "such as scholastic achievement.\n", 69 | "In this highly contrived and fictitious example,\n", 70 | "let's say that $a$ is the scholastic achievement of a student \n", 71 | "as measured by the sum total of subject scores\n", 72 | "across 11 subjects in the O-level examinations \n", 73 | "administered by the University of Cambridge,\n", 74 | "and it is thought to be a function of:\n", 75 | "\n", 76 | "- $b$, the intelligence quotient (IQ) of the student,\n", 77 | "- $c$, the cost of a student's tuition fees in thousands of dollars (private vs. public school), and\n", 78 | "- $\\sigma_a$, the intrinsic variation in student performance as a whole\n", 79 | "\n", 80 | "For the term $b$, we think it is a function of:\n", 81 | "\n", 82 | "- $d$, the crime rate of their neighborhood, \n", 83 | "- $e$, the household income of that student, in thousands of dollars, and\n", 84 | "- $\\sigma_b$, the intrinsic variation in IQ.\n", 85 | "\n", 86 | "The astute reader will immediately see the controversy in this example.\n", 87 | "The causal path we assume here isn't going to be the only plausible model,\n", 88 | "and there are many, many variables we're leaving out here.\n", 89 | "However, I'd ask that you let your mind suspend the controversy for a moment;\n", 90 | "it's always a big brain teaser to try to come up with plausible but fictitious examples." 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "**To read off the causal model from this set of equations, \n", 98 | "everything on the right is causal for the stuff on the left.**\n", 99 | "By those equations, \n", 100 | "we state that the values of $a$ that we observe \n", 101 | "are caused by values of $b$ and $c$ and their Greek coefficients $\\beta$ and $\\gamma$\n", 102 | "plus some noise $\\sigma_a$,\n", 103 | "while the values of $b$ that we observe \n", 104 | "are caused by the values of $d$ and $e$ and their Greek coefficients $\\delta$ and $\\epsilon$\n", 105 | "plus some noise $\\sigma_b$.\n" 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "**With Bayesian inference on this model, \n", 113 | "our task here is to estimate the coefficients**, \n", 114 | "$\\beta$, $\\gamma$, $\\delta$, and $\\epsilon$. \n", 115 | "More precisely, we are interested in estimating their expected value and uncertainty. " 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "In order to express the model in such a way that reflects its probabilistic in nature, \n", 123 | "we need some syntax to express the idea that \n", 124 | "because $\\delta$, $\\epsilon$ and $\\sigma_b$ are uncertain \n", 125 | "and are modelled by a probability distribution, \n", 126 | "therefore $b$ is uncertain and has a corresponding probability distribution too. \n", 127 | "\n", 128 | "At the same time, \n", 129 | "we need an analogous syntax to express that\n", 130 | "because $\\beta$, $\\gamma$, and $\\sigma_a$ are uncertain\n", 131 | "and are modelled by a probability distribution,\n", 132 | "therefore $a$ is also uncertain and has a corresponding probability distribution too. \n" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "\n", 140 | "Moreover, if we assume that $c$, $d$, $e$ are stochastic \n", 141 | "because they are drawn from a distribution, \n", 142 | "then we have ~~a classic case where everything is unknown and we can’t do anything~~ \n", 143 | "an awesome modelling problem at hand! 🙂 \n", 144 | "In any case, for the first equation, \n", 145 | "our expression for the distribution of $b$ \n", 146 | "conditioned on everything on the right would look something like:\n", 147 | "\n", 148 | "$$P(b | \\delta, \\epsilon, \\sigma_b, d, e)$$\n", 149 | "\n", 150 | "And likewise, the distribution of $a$ conditioned on everything on the right would look something like this:\n", 151 | "\n", 152 | "$$P(a | \\beta, \\gamma, \\sigma_a, b, c)$$\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "Now, in Bayesian inference, \n", 160 | "we usually regard $(b, c, d, e)$ as being fixed (and hence sacred), \n", 161 | "because they are the *data that we have observed.* \n", 162 | "By convention, in many probabilistic modelling problems, \n", 163 | "we’re not really concerned about the data generating processes for $(b, c, d, e)$ \n", 164 | "because they aren’t usually the sample-invariant, \n", 165 | "intrinsic parameter of a system we’re interested in,\n", 166 | "unlike the Greek-letter symbols which take on their values\n", 167 | "independent of any given measured sample." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## Causal inference vs Bayesian inference, in brief\n", 175 | "\n", 176 | "I’d now like to address “causal inference” vs. “bayesian inference”; I think the use of a contrast is going to be helpful here.\n", 177 | "\n", 178 | "Bayesian inference asks the question, “given the observed data and a presumed model with parameters, what is the expectation and uncertainty in the parameters that could have generated the observed data?”\n", 179 | "\n", 180 | "Causal inference asks the question, “given the observed data, what are plausible structures of the model that could have generated the observed data?”" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "## Structures: in graphics and in equations\n", 188 | "\n", 189 | "By structures, we’re really asking about the relationships between variables. \n", 190 | "Using the above example again, \n", 191 | "if $a$ is caused by $b$ and $c$, \n", 192 | "then in abstract, we'd write that $a = f(b, c)$. \n", 193 | "We would then draw the following diagram to express the relationship:\n" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "%config InlineBackend.figure_format='retina'" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "import daft\n", 212 | "\n", 213 | "scale = 2.0\n", 214 | "pgm = daft.PGM()\n", 215 | "pgm.add_node(\"a\", r\"$a$\", 1.5, 1)\n", 216 | "pgm.add_node(\"b\", r\"$b$\", 1, 2)\n", 217 | "pgm.add_node(\"c\", r\"$c$\", 2, 2)\n", 218 | "\n", 219 | "a_color = {\"ec\": \"blue\"}\n", 220 | "\n", 221 | "pgm.add_edge(\"b\", \"a\", plot_params=a_color)\n", 222 | "pgm.add_edge(\"c\", \"a\", plot_params=a_color)\n", 223 | "pgm.render();" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Likewise, since we presume that $b$ is caused by $d$ and $e$, \n", 231 | "then the functional form of the causal relationship will be $b = g(d, e)$. \n", 232 | "We would then draw the following diagram to express the relationship:" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "pgm = daft.PGM()\n", 242 | "pgm.add_node(\"b\", r\"$b$\", 1.5, 1)\n", 243 | "pgm.add_node(\"d\", r\"$d$\", 1, 2)\n", 244 | "pgm.add_node(\"e\", r\"$e$\", 2, 2)\n", 245 | "\n", 246 | "b_color = {\"ec\": \"red\"}\n", 247 | "\n", 248 | "pgm.add_edge(\"d\", \"b\", plot_params=b_color)\n", 249 | "pgm.add_edge(\"e\", \"b\", plot_params=b_color)\n", 250 | "pgm.render();" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "And taken together, the full model would look like:" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "pgm = daft.PGM()\n", 267 | "pgm.add_node(\"a\", r\"$a$\", 1.5, 1)\n", 268 | "pgm.add_node(\"b\", r\"$b$\", 2, 2)\n", 269 | "pgm.add_node(\"c\", r\"$c$\", 1, 2)\n", 270 | "pgm.add_node(\"d\", r\"$d$\", 1.5, 3)\n", 271 | "pgm.add_node(\"e\", r\"$e$\", 2.5, 3)\n", 272 | "\n", 273 | "pgm.add_edge(\"c\", \"a\", plot_params=a_color)\n", 274 | "pgm.add_edge(\"b\", \"a\", plot_params=a_color)\n", 275 | "pgm.add_edge(\"d\", \"b\", plot_params=b_color)\n", 276 | "pgm.add_edge(\"e\", \"b\", plot_params=b_color)\n", 277 | "\n", 278 | "pgm.add_text(3, 1.25, r\"$a = f(b, c) = \\beta b + \\gamma c + \\sigma_a$\")\n", 279 | "pgm.add_text(3, 2.4, r\"$b = f(d, e) = \\delta d + \\epsilon e + \\sigma_b$\")\n", 280 | "\n", 281 | "pgm.render();" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "In economics, the term “structural equation models” \n", 289 | "refers to the system of equations \n", 290 | "that form the econometric model that economists build. \n", 291 | "Usually, those models are linear in nature. \n", 292 | "On occasion, though, additional functional forms might be used \n", 293 | "(sigmoidal, piecewise linear, neural network, etc.), \n", 294 | "if they help model the phenomena at hand. \n", 295 | "Whatever the form of the equation gets encapsulated into $f(v_1, v_2, ..., v_n)$,\n", 296 | "where $v_1... v_n$ refer to the variables like $b$ and $c$ above." 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": {}, 302 | "source": [ 303 | "Structure, then, can refer to both the graphical structure of the model \n", 304 | "and the particular form of equations. \n", 305 | "As far as I have seen, \n", 306 | "most causal models assume some kind of linear equation between variables, \n", 307 | "though there are exceptions; \n", 308 | "in addition, causal inference, as taught, \n", 309 | "is usually concerned with inferring the *graphical* relationship between variables, \n", 310 | "presuming some linear form underneath. \n", 311 | "Inferring the model structure is what we call \"model inference\"." 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "*A note for deep learners: this is what model inference actually is: \n", 319 | "inferring the structure of a model. \n", 320 | "More generally, if you think about linguistic convention, \n", 321 | "“X inference” usually refers to the tools and processes used in inferring X. \n", 322 | "Calling model inference the forward pass through the model breaks linguistic convention, \n", 323 | "and hence introduces viscosity in communication with others \n", 324 | "who adopt said linguistic convention!*" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "## Restating the distinction\n", 332 | "\n", 333 | "At this point, I think it's an appropriate moment to try to re-state clearly\n", 334 | "what the relationship between a \"causal inference\" and a \"Bayesian inference\" is.\n", 335 | "They are **both** concerned with the system of equations that make up our model.\n", 336 | "However, in causal inference, \n", 337 | "we are primarily concerned with the relationship between observed variables, \n", 338 | "expressed as math equations.\n", 339 | "In Bayesian inference, we are primarily concerned with the parameters of those equations\n", 340 | "and their uncertainty." 341 | ] 342 | }, 343 | { 344 | "cell_type": "markdown", 345 | "metadata": {}, 346 | "source": [ 347 | "## The do-operator\n", 348 | "\n", 349 | "Finally, we reach the point where we can touch the do-operator!\n", 350 | "This is such a key and crucial idea to causal methods,\n", 351 | "as it allows us to do **counterfactual** arguments conditioned on a presumed model.\n", 352 | "To illustrate what we mean by the do-operator,\n", 353 | "I'm going to rely on code, prose, and equations together.\n", 354 | "To start, let's implement the full probabilistic model above in Python code.\n", 355 | "\n" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": null, 361 | "metadata": {}, 362 | "outputs": [], 363 | "source": [ 364 | "import numpy as np \n", 365 | "from scipy.stats import norm\n", 366 | "import pandas as pd \n" 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "# Equations\n", 376 | "beta = 10\n", 377 | "gamma = 0.3\n", 378 | "delta = -30\n", 379 | "epsilon = 0.1\n", 380 | "variance = 10\n", 381 | "sigma_a_dist = norm(0, variance)\n", 382 | "sigma_b_dist = norm(0, variance)\n", 383 | "\n", 384 | "\n", 385 | "def a_equation(b, c):\n", 386 | " return beta * b + gamma * c + sigma_a_dist.rvs(len(b))\n", 387 | "\n", 388 | "def b_equation(d, e):\n", 389 | " return delta * d + epsilon * e + sigma_b_dist.rvs(len(d))" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "metadata": {}, 395 | "source": [ 396 | "To generate the data, we always begin with the nodes\n", 397 | "that have no parental nodes.\n", 398 | "In more complicated networks, \n", 399 | "we would leverage tools from network science, in this case, the _topological sort_,\n", 400 | "to identify the exact order in which we need to simulate observations.\n", 401 | "The observed data that we would end up collecting \n", 402 | "for this system looks like the following:" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": null, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "N = 1_000\n", 412 | "cc = norm(50, 5).rvs(N)\n", 413 | "dd = norm(-4, 1).rvs(N)\n", 414 | "ee = norm(30, 4).rvs(N)\n", 415 | "\n", 416 | "bb = b_equation(dd, ee)\n", 417 | "aa = a_equation(bb, cc)\n", 418 | "\n", 419 | "\n", 420 | "data = pd.DataFrame({\"a\": aa, \"b\": bb, \"c\": cc, \"d\": dd, \"e\": ee})\n", 421 | "data" 422 | ] 423 | }, 424 | { 425 | "cell_type": "markdown", 426 | "metadata": {}, 427 | "source": [ 428 | "And if we were to visualize the marginal distribution of each of the variables:" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": null, 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "import matplotlib.pyplot as plt\n", 438 | "fig, axes = plt.subplots(figsize=(15, 3), ncols=5)\n", 439 | "\n", 440 | "for var, ax in zip([aa, bb, cc, dd, ee], axes.flatten()):\n", 441 | " ax.hist(var)\n", 442 | "\n", 443 | "plt.tight_layout()" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "metadata": {}, 449 | "source": [ 450 | "Finally, plotting the joint distributions, we get:" 451 | ] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "execution_count": null, 456 | "metadata": {}, 457 | "outputs": [], 458 | "source": [ 459 | "import seaborn as sns \n", 460 | "\n", 461 | "sns.pairplot(data)" 462 | ] 463 | }, 464 | { 465 | "cell_type": "markdown", 466 | "metadata": {}, 467 | "source": [ 468 | "Now that we have a feel for what kind of data we'll collect,\n", 469 | "let's explore the three ways that the do-operator can be used." 470 | ] 471 | }, 472 | { 473 | "cell_type": "markdown", 474 | "metadata": {}, 475 | "source": [ 476 | "### The do-operator: local counterfactuals\n", 477 | "\n", 478 | "At an elementary level, the do-operator expresses mathematically the following quesiton,\n", 479 | "“For a given sample, what if one of its variables took on a different value?” \n", 480 | "You can think of this as a \"personalized\" counterfactual for a given sample,\n", 481 | "whether that sample is a patient, student, employee, citizen, or some other thing.\n", 482 | "\n", 483 | "Some concrete examples of this in action are:\n", 484 | "\n", 485 | "1. “For this given patient, what if the treatment given was the alternative treatment?”\n", 486 | "2. “For this given student, what if they came from a wealthier household?”\n", 487 | "3. “For this given tennis player, what if the court they played on were grass instead of clay?”\n" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "\n", 495 | "As you can see, one of the questions we’re asking when asking counterfactuals \n", 496 | "are in fact *personalized* for a given sample.\n", 497 | "\n", 498 | "If we go back to the structural equations above, \n", 499 | "we could isolate a given observational data point $(a, b, c)$ and then ask the question, \n", 500 | "“What if in row $i$, $b$ took on a certain value $B$ instead of $b_i$?” \n", 501 | "This question, when asked, results in our probabilistic structure changing a bit:\n", 502 | "\n", 503 | "1. We are now asking for $P(a_i | do(b_i=B), \\beta, \\gamma, c_i)$, where $i$ refers to the particular sample index.\n", 504 | "2. Our counterfactual question presumes a known value of $b$, and hence no longer requires us to generate it from $(d, e)$. We can effectively cut $(d, e)$ out of the picture.\n", 505 | "\n", 506 | "As long as we preserve uncertainty in the parameter values, \n", 507 | "we can obtain counterfactual uncertainty as well.\n", 508 | "To illustrate how we do personalized counterfactuals in a Bayesian setting,\n", 509 | "let's see how to do it with PyMC." 510 | ] 511 | }, 512 | { 513 | "cell_type": "markdown", 514 | "metadata": {}, 515 | "source": [ 516 | "### Implementation with PyMC\n", 517 | "\n", 518 | "We're going to start first by implementing the models.\n", 519 | "There's no do-operations happening just yet,\n", 520 | "we're just writing down the equations in PyMC first." 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": null, 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "import pymc as pm" 530 | ] 531 | }, 532 | { 533 | "cell_type": "markdown", 534 | "metadata": {}, 535 | "source": [ 536 | "We first implement the set of equations." 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "with pm.Model() as model_a:\n", 546 | " a = pm.Data(\"a\", data[\"a\"])\n", 547 | " b = pm.Data(\"b\", data[\"b\"])\n", 548 | " c = pm.Data(\"c\", data[\"c\"])\n", 549 | " # Priors for model parameters\n", 550 | " beta = pm.Normal(\"beta\", mu=10, sigma=3)\n", 551 | " gamma = pm.Normal(\"gamma\", mu=0, sigma=10)\n", 552 | " sigma_a = pm.Exponential(\"sigma_a\", lam=1)\n", 553 | " pm.Normal(\"like\", mu=beta * b + gamma * c, sigma=sigma_a, observed=a)\n", 554 | "\n", 555 | "\n", 556 | "with pm.Model() as model_b:\n", 557 | " b = pm.Data(\"b\", data[\"b\"])\n", 558 | " d = pm.Data(\"d\", data[\"d\"])\n", 559 | " e = pm.Data(\"e\", data[\"e\"])\n", 560 | " # Priors for model parameters\n", 561 | " delta = pm.Normal(\"delta\", mu=-15, sigma=15)\n", 562 | "\n", 563 | " epsilon = pm.Normal(\"epsilon\", mu=0, sigma=1)\n", 564 | " sigma_b = pm.Exponential(\"sigma_b\", lam=1)\n", 565 | " pm.Normal(\"like\", mu=delta * d + epsilon * e, sigma=sigma_b, observed=b)\n", 566 | "\n", 567 | "\n", 568 | "with pm.Model() as model_joint:\n", 569 | " a = pm.Data(\"a\", data[\"a\"])\n", 570 | " b = pm.Data(\"b\", data[\"b\"])\n", 571 | " c = pm.Data(\"c\", data[\"c\"])\n", 572 | " d = pm.Data(\"d\", data[\"d\"])\n", 573 | " e = pm.Data(\"e\", data[\"e\"])\n", 574 | " # Priors for model parameters\n", 575 | " delta = pm.Normal(\"delta\", mu=-15, sigma=15)\n", 576 | "\n", 577 | " epsilon = pm.Normal(\"epsilon\", mu=0, sigma=1)\n", 578 | " sigma_b = pm.Exponential(\"sigma_b\", lam=1)\n", 579 | " pm.Normal(\"like_b\", mu=delta * d + epsilon * e, sigma=sigma_b, observed=b)\n", 580 | " # Priors for model parameters\n", 581 | " beta = pm.Normal(\"beta\", mu=10, sigma=3)\n", 582 | " gamma = pm.Normal(\"gamma\", mu=0, sigma=10)\n", 583 | " sigma_a = pm.Exponential(\"sigma_a\", lam=1)\n", 584 | " pm.Normal(\"like_a\", mu=beta * b + gamma * c, sigma=sigma_a, observed=a)\n" 585 | ] 586 | }, 587 | { 588 | "cell_type": "markdown", 589 | "metadata": {}, 590 | "source": [ 591 | "Let's now plot the graphical model provided by PyMC." 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": {}, 598 | "outputs": [], 599 | "source": [ 600 | "pm.model_to_graphviz(model_a)" 601 | ] 602 | }, 603 | { 604 | "cell_type": "markdown", 605 | "metadata": {}, 606 | "source": [ 607 | "In this model, we see that $a$, $b$, and $c$ are all observed data,\n", 608 | "nested within the plate representing 1,000 data points.\n", 609 | "$\\beta$, $\\gamma$, and $\\sigma_a$ are the parameters of the model\n", 610 | "that are invariant to any particular data point\n", 611 | "and hence are located outside of the plate.\n", 612 | "Those are our system-level parameters.\n", 613 | "An analogous diagram exists for $b$'s model as well:" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": {}, 620 | "outputs": [], 621 | "source": [ 622 | "pm.model_to_graphviz(model_b)" 623 | ] 624 | }, 625 | { 626 | "cell_type": "code", 627 | "execution_count": null, 628 | "metadata": {}, 629 | "outputs": [], 630 | "source": [ 631 | "pm.model_to_graphviz(model_joint)" 632 | ] 633 | }, 634 | { 635 | "cell_type": "markdown", 636 | "metadata": {}, 637 | "source": [ 638 | "### Parameter inference\n", 639 | "\n", 640 | "We can now use PyMC's inference machinery, the Inference Buttom (tm),\n", 641 | "to infer the values of the parameters above." 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "with model_a:\n", 651 | " idata_a = pm.sample()" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": null, 657 | "metadata": {}, 658 | "outputs": [], 659 | "source": [ 660 | "with model_b:\n", 661 | " idata_b = pm.sample()" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "execution_count": null, 667 | "metadata": {}, 668 | "outputs": [], 669 | "source": [ 670 | "with model_joint:\n", 671 | " idata_joint = pm.sample()" 672 | ] 673 | }, 674 | { 675 | "cell_type": "markdown", 676 | "metadata": {}, 677 | "source": [ 678 | "I will be skipping over a rigorous Bayesian workflow here\n", 679 | "because it is not the point of the notebook.\n", 680 | "Nonetheless, here is a set of plots for our posteriors;\n", 681 | "you can check-them against the original Gaussian distributions above." 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "execution_count": null, 687 | "metadata": {}, 688 | "outputs": [], 689 | "source": [ 690 | "import arviz as az\n", 691 | "\n", 692 | "az.plot_posterior(idata_a, ref_val=[10, 0.3, 10])" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": {}, 699 | "outputs": [], 700 | "source": [ 701 | "az.plot_posterior(idata_b, ref_val=[-30, 0.1, 10])" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": null, 707 | "metadata": {}, 708 | "outputs": [], 709 | "source": [ 710 | "az.plot_posterior(idata_joint)" 711 | ] 712 | }, 713 | { 714 | "cell_type": "markdown", 715 | "metadata": {}, 716 | "source": [ 717 | "In all of the cases, we're pretty close, though a bit off. \n", 718 | "As the goal here is to show a personalized counterfactual,\n", 719 | "we're not going to worry too much about the parameter recovery accuracy." 720 | ] 721 | }, 722 | { 723 | "cell_type": "markdown", 724 | "metadata": {}, 725 | "source": [ 726 | "### Personalized counterfactuals in PyMC\n", 727 | "\n", 728 | "Personalized counterfactuals ask the question, \n", 729 | "\"What would have happened for a particular sample,\n", 730 | "i.e. one row in the dataset,\n", 731 | "had one of its observed variables been a different value?\"\n", 732 | "In this case, we're going to take one of the observations \n", 733 | "and ask what would happen if we counterfactually set $c$ to a different value.\n", 734 | "\n", 735 | "We'll start by isolating a sample of interest:" 736 | ] 737 | }, 738 | { 739 | "cell_type": "code", 740 | "execution_count": null, 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "sample = data.sample(random_state=491) # random state for reproducibility\n", 745 | "sample" 746 | ] 747 | }, 748 | { 749 | "cell_type": "markdown", 750 | "metadata": {}, 751 | "source": [ 752 | "We then simulate what the posterior for that sample would look like\n", 753 | "under the original case vs. the counterfactual case.\n", 754 | "In the first code block below, we set our data variables to be a single sample\n", 755 | "and then leverage the posterior predictive samples to simulate\n", 756 | "what `a` would look like under the original data.\n", 757 | "In the second code block, we set our data variables to be a single sample as well,\n", 758 | "except that we have changed `c` to be a totally different value from what it was before.\n", 759 | "This gives us our counterfactual scenario!" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": null, 765 | "metadata": {}, 766 | "outputs": [], 767 | "source": [ 768 | "import numpy as np \n", 769 | "\n", 770 | "with model_a:\n", 771 | " # Simulate posterior predictive under original observed data.\n", 772 | " pm.set_data({\"a\": sample[\"a\"], \"b\": sample[\"b\"], \"c\": sample[\"c\"]})\n", 773 | " ppc_or = pm.sample_posterior_predictive(idata_a)\n", 774 | "\n", 775 | " # Simulate posterior predictive under a counterfactual scenario.\n", 776 | " # ****This is the do-operator in action!****\n", 777 | " pm.set_data({\"a\": sample[\"a\"], \"b\": sample[\"b\"], \"c\": sample[\"c\"] / 10})\n", 778 | " ppc_cf = pm.sample_posterior_predictive(idata_a)" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": null, 784 | "metadata": {}, 785 | "outputs": [], 786 | "source": [ 787 | "ppc_or.posterior_predictive" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": null, 793 | "metadata": {}, 794 | "outputs": [], 795 | "source": [ 796 | "fig, axes = plt.subplots(figsize=(10, 5), ncols=2)\n", 797 | "az.plot_posterior(ppc_or.posterior_predictive[\"like\"], ax=axes[0])\n", 798 | "axes[0].set_title(\"Original\")\n", 799 | "\n", 800 | "az.plot_posterior(ppc_cf.posterior_predictive[\"like\"], ax=axes[1])\n", 801 | "axes[1].set_title(\"Counterfactual\")" 802 | ] 803 | }, 804 | { 805 | "cell_type": "markdown", 806 | "metadata": {}, 807 | "source": [ 808 | "What's really cool here is that we've done a personalized counterfactual\n", 809 | "for that particular student.\n", 810 | "In this case, we set the cost of the student's tuition fees to 1/10 the original, \n", 811 | "akin to sending them from a private school to a public school.\n", 812 | "Under this particularly flawed model, we would expect their sum of grades to go down,\n", 813 | "though not by a large magnitude.\n", 814 | "\n", 815 | "The key trick to enabling this\n", 816 | "was using `pm.Data()` containers for our data,\n", 817 | "thus registering them with PyMC (and Aesara underneath)\n", 818 | "as being hot-swappable entities with `pm.set_data()`.\n", 819 | "\n", 820 | "Finally, because our _posterior_ distributions \n", 821 | "contain our probabilistic beliefs having seen the data,\n", 822 | "or more colloquially, \"fits of our parameters\",\n", 823 | "we sample from the _posterior predictive_ distribution\n", 824 | "to identify what we would have gotten in a counterfactual situation." 825 | ] 826 | }, 827 | { 828 | "cell_type": "markdown", 829 | "metadata": {}, 830 | "source": [ 831 | "## The do-operator: global counterfactuals\n", 832 | "\n", 833 | "Let’s also think about another possible counterfactual question: \n", 834 | "What if the system parameters were different?\n", 835 | "\n", 836 | "Recall here that “system parameters” refer to the linear coefficients. \n", 837 | "They aren’t properties of any observation (or sample), \n", 838 | "but are properties of the entire system as a whole, \n", 839 | "hence the moniker “global” counterfactuals.\n", 840 | "\n", 841 | "To do global counterfactuals, \n", 842 | "we actually need to condition the coefficients’ values on a particular value, \n", 843 | "just like we did for data on the personalized counterfactual. \n", 844 | "For example, if we conditioned $\\beta$ to be equal to 0,\n", 845 | "in other words severing the relationship between $a$ and $b$, then:\n", 846 | "\n", 847 | "1. We are now asking for $P(a|do(\\beta=0), \\gamma, b, c)$. (Notice the omission of subscript $i$, it’s intentional!)\n", 848 | "2. Our counterfactual question presumes a known value of $\\beta$, but not a presumed known value of any of $(b, c)$. (We aren't asking about any particular sample, after all!)\n" 849 | ] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": {}, 854 | "source": [ 855 | "One way we can implement this do-operation is to reconstruct the model from our posteriors\n", 856 | "while hard-coding the value of $\\beta$ to 0.\n", 857 | "\n", 858 | "Let's see how to make this happen.\n", 859 | "\n", 860 | "Firstly, in order to take advantage of our fitted posteriors,\n", 861 | "we will use a `from_posterior` function written implemented in the PyMC how-to guides." 862 | ] 863 | }, 864 | { 865 | "cell_type": "code", 866 | "execution_count": null, 867 | "metadata": {}, 868 | "outputs": [], 869 | "source": [ 870 | "from scipy import stats \n", 871 | "\n", 872 | "# Credit: https://docs.pymc.io/projects/examples/en/latest/pymc3_howto/updating_priors.html\n", 873 | "def from_posterior(param, samples):\n", 874 | " smin, smax = np.min(samples), np.max(samples)\n", 875 | " width = smax - smin\n", 876 | " x = np.linspace(smin, smax, 100)\n", 877 | " y = stats.gaussian_kde(samples)(x)\n", 878 | "\n", 879 | " # what was never sampled should have a small probability but not 0,\n", 880 | " # so we'll extend the domain and use linear approximation of density on it\n", 881 | " x = np.concatenate([[x[0] - 1 * width], x, [x[-1] + 1 * width]])\n", 882 | " y = np.concatenate([[0], y, [0]])\n", 883 | " return pm.Interpolated(param, x, y)" 884 | ] 885 | }, 886 | { 887 | "cell_type": "markdown", 888 | "metadata": {}, 889 | "source": [ 890 | "Then, we'll need to rewrite the model a little bit,\n", 891 | "this time hard-coding one of the variables to a particular value." 892 | ] 893 | }, 894 | { 895 | "cell_type": "code", 896 | "execution_count": null, 897 | "metadata": {}, 898 | "outputs": [], 899 | "source": [ 900 | "with pm.Model() as model_a_modified:\n", 901 | " a = pm.Data(\"a\", data[\"a\"])\n", 902 | " b = pm.Data(\"b\", data[\"b\"])\n", 903 | " c = pm.Data(\"c\", data[\"c\"])\n", 904 | " # Model parameters\n", 905 | " beta = 0 # ****This is the do-operator in action!****\n", 906 | " gamma = from_posterior(\"gamma\", idata_a.posterior[\"gamma\"].values.flatten())\n", 907 | " sigma_a = from_posterior(\"sigma_a\", idata_a.posterior[\"sigma_a\"].values.flatten())\n", 908 | " pm.Normal(\"like\", mu=beta * b + gamma * c, sigma=sigma_a, observed=a)" 909 | ] 910 | }, 911 | { 912 | "cell_type": "markdown", 913 | "metadata": {}, 914 | "source": [ 915 | "Finally, since our posteriors have become priors in this new model,\n", 916 | "we sample from the prior predictive distribution:" 917 | ] 918 | }, 919 | { 920 | "cell_type": "code", 921 | "execution_count": null, 922 | "metadata": {}, 923 | "outputs": [], 924 | "source": [ 925 | "with model_a_modified:\n", 926 | " trace = pm.sample_prior_predictive()" 927 | ] 928 | }, 929 | { 930 | "cell_type": "markdown", 931 | "metadata": {}, 932 | "source": [ 933 | "Now, we're able to examine how our observations have changed\n", 934 | "based on the intervention at the system." 935 | ] 936 | }, 937 | { 938 | "cell_type": "code", 939 | "execution_count": null, 940 | "metadata": {}, 941 | "outputs": [], 942 | "source": [ 943 | "def plot_global_counterfactual(idx: int):\n", 944 | " az.plot_posterior(trace.prior_predictive[\"like\"][0, :, idx])\n", 945 | " plt.gca().set_title(f\"Original value: {data.loc[idx, 'a']:.2f}\")\n", 946 | "\n", 947 | "\n", 948 | "plot_global_counterfactual(2)\n", 949 | "plot_global_counterfactual(3)\n" 950 | ] 951 | }, 952 | { 953 | "cell_type": "markdown", 954 | "metadata": {}, 955 | "source": [ 956 | "We can see here that by severing the connection between the IQ of a student and their grades,\n", 957 | "each student's total sum of grades goes down to 1/20th of their original.\n", 958 | "Those of us smarter than this author would probably have intuited this point without needing to code it up (by examining the magnitude of the slope coefficients),\n", 959 | "but in the case of exotic functional forms with interaction terms (or a neural net structure),\n", 960 | "the value of an explicit, global perturbation of an interpretable parameter is quite evident!" 961 | ] 962 | }, 963 | { 964 | "cell_type": "markdown", 965 | "metadata": {}, 966 | "source": [ 967 | "## The do-operator: combining global and personalized counterfactuals\n", 968 | "\n", 969 | "If we’re thinking logically here, \n", 970 | "we’ll soon realize that it’s also possible \n", 971 | "to combine the two aforementioned counterfactuals together. \n", 972 | "We can ask the question, \n", 973 | "what is $P(a_i | do(\\beta=0, b_i=B), \\gamma, c_i)$? \n", 974 | "(Note again the presence of the index $i$!)\n", 975 | "\n", 976 | "Algorithmically, this question essentially translates to:\n", 977 | "\n", 978 | "1. picking out sample $i$, \n", 979 | "2. setting $b=B$, \n", 980 | "3. setting $beta=3.14$,\n", 981 | "4. and evaluating what $a_i$ would look like under those two conditions.\n", 982 | "\n", 983 | "For the sake of illustration, here it is in PyMC code:" 984 | ] 985 | }, 986 | { 987 | "cell_type": "code", 988 | "execution_count": null, 989 | "metadata": {}, 990 | "outputs": [], 991 | "source": [ 992 | "with model_a_modified:\n", 993 | " pm.set_data({\"a\": sample[\"a\"], \"b\": sample[\"b\"], \"c\": sample[\"c\"]})\n", 994 | " ppc_or = pm.sample_prior_predictive()\n", 995 | "\n", 996 | " # cf = \"counterfactual\"\n", 997 | " pm.set_data({\"a\": sample[\"a\"], \"b\": sample[\"b\"], \"c\": sample[\"c\"] * 0.1})\n", 998 | " ppc_cf = pm.sample_prior_predictive()" 999 | ] 1000 | }, 1001 | { 1002 | "cell_type": "code", 1003 | "execution_count": null, 1004 | "metadata": {}, 1005 | "outputs": [], 1006 | "source": [ 1007 | "fig, axes = plt.subplots(figsize=(15, 5), ncols=2)\n", 1008 | "az.plot_posterior(ppc_or.prior_predictive[\"like\"], ax=axes[0])\n", 1009 | "axes[0].set_title(r\"Distribution of $a$ with $\\beta=0$ and $c = c_i$\")\n", 1010 | "\n", 1011 | "az.plot_posterior(ppc_cf.prior_predictive[\"like\"], ax=axes[1])\n", 1012 | "axes[1].set_title(r\"Distribution of $a$ with $\\beta=0$ and $c = \\frac{c_i}{10}$\")" 1013 | ] 1014 | }, 1015 | { 1016 | "cell_type": "markdown", 1017 | "metadata": {}, 1018 | "source": [ 1019 | "## Framework\n", 1020 | "\n", 1021 | "Having worked through this example, we've finally arrived at a framework for thinking through the connection between Bayesian models and causal models. \n", 1022 | "\n", 1023 | "To know which kind of counterfactual we need to employ, we have to be extremely clear on the exact question we’re trying to ask. Are we trying to ask:\n", 1024 | "\n", 1025 | "1. A personalized question? (”What would have happened to this particular sample had its dependent variable been set to a particular value?”)\n", 1026 | "2. A systems-level question? (”What would happen to all of our observations if a system parameter was set to a particular value?”)\n", 1027 | "3. A hybrid question? (”What would have happened to this particular sample had its dependent variable been set to a particular value *and* the system parameter set to a particular value?”)\n", 1028 | "\n", 1029 | "If we have our variables’ dependencies clearly and explicitly stated, then it becomes easy to ask the do-operator family of questions, which basically are asking, “What happens if we set something in the model to a particular value?”" 1030 | ] 1031 | }, 1032 | { 1033 | "cell_type": "markdown", 1034 | "metadata": {}, 1035 | "source": [ 1036 | "## Summary\n", 1037 | "\n", 1038 | "In this notebook, we have seen how to do causal inference on Bayesian models written in PyMC. \n", 1039 | "We started with the contrast and comparison between causal and Bayesian inference.\n", 1040 | "Then, we went through the logical framework of asking counterfactuals on (1) a per-sample basis, (2) a system-wide basis, and (3) both together.\n", 1041 | "\n", 1042 | "The value of the do-operator, and by extension causal methods,\n", 1043 | "is that they provide us the ability to ask these \"what if\" questions\n", 1044 | "in cases where conducting an experiment would be unethical, cost-prohibitive, or logistically challenging.\n", 1045 | "Combining it with Bayesian models gives us the ability to assess\n", 1046 | "what would have happened in a counterfactual world (causal)\n", 1047 | "while also calculating the full range of possibilities (Bayesian).\n", 1048 | "Blending the two together is nothing more than a matter of logic!" 1049 | ] 1050 | }, 1051 | { 1052 | "cell_type": "markdown", 1053 | "metadata": {}, 1054 | "source": [ 1055 | "## Credits\n", 1056 | "\n", 1057 | "I would like to credit Thomas Wiecki and Ben Vincent for reviewing an early draft of this notebook." 1058 | ] 1059 | } 1060 | ], 1061 | "metadata": { 1062 | "interpreter": { 1063 | "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" 1064 | }, 1065 | "kernelspec": { 1066 | "display_name": "Python 3.8.2 64-bit ('base': conda)", 1067 | "language": "python", 1068 | "name": "python3" 1069 | }, 1070 | "language_info": { 1071 | "codemirror_mode": { 1072 | "name": "ipython", 1073 | "version": 3 1074 | }, 1075 | "file_extension": ".py", 1076 | "mimetype": "text/x-python", 1077 | "name": "python", 1078 | "nbconvert_exporter": "python", 1079 | "pygments_lexer": "ipython3", 1080 | "version": "3.9.9" 1081 | } 1082 | }, 1083 | "nbformat": 4, 1084 | "nbformat_minor": 2 1085 | } 1086 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: causality 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python=3.9 6 | - black 7 | - conda 8 | - flake8 9 | - ipykernel 10 | - ipython 11 | - jupyter 12 | - jupyter_contrib_nbextensions 13 | - jupyterlab 14 | - matplotlib 15 | - mkdocs 16 | - mkdocs-material=8.1.3 17 | - mkdocstrings 18 | - mypy 19 | - nbstripout 20 | - networkx 21 | - numpy 22 | - pandas 23 | - pip 24 | - pre-commit 25 | - pycodestyle 26 | - pydocstyle 27 | - pymc=4.0 28 | - pytest 29 | - python-graphviz 30 | - scikit-learn 31 | - scipy 32 | - seaborn 33 | - statsmodels 34 | - pip: 35 | - mknotebooks 36 | - daft 37 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: causality 2 | site_url: https://ericmjl.github.io/causality 3 | 4 | theme: 5 | name: "material" 6 | palette: 7 | primary: "blue" 8 | accent: "blue" 9 | icon: 10 | logo: "fontawesome/solid/book" 11 | features: 12 | - instant 13 | # - tabs 14 | language: en 15 | 16 | # We customize the navigation by hand to control the order 17 | # in which pages show up. 18 | nav: 19 | - causality: 20 | - Welcome: index.md 21 | - Notebooks: 22 | - "01: Linear Simulation": 01-causality-linear-simulation.ipynb 23 | - "02: Instrument Variables": 02-instrument-variables.ipynb 24 | - "03: d-Separation": 03-d-separation.ipynb 25 | - "04: Finding Confounding Set": 04-finding-confounding-set.ipynb 26 | - "05: Collider Effect": 05-collider-effect.ipynb 27 | - "06: Causal Identifiability": 06-causality-identifiability.ipynb 28 | - "07: The Do Operator": 07-do-operator.ipynb 29 | 30 | plugins: 31 | - search 32 | - mknotebooks: 33 | execute: true 34 | write_markdown: true 35 | allow_errors: false 36 | timeout: 1200 37 | binder: true 38 | binder_service_name: "gh" 39 | binder_branch: "master" 40 | binder_ui: "lab" 41 | 42 | # Taken from here: https://squidfunk.github.io/mkdocs-material/extensions/codehilite/ 43 | markdown_extensions: 44 | - codehilite 45 | - admonition 46 | - pymdownx.tabbed 47 | - pymdownx.arithmatex 48 | - pymdownx.details 49 | - pymdownx.superfences 50 | - markdown.extensions.attr_list 51 | 52 | extra_javascript: 53 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML 54 | - https://hypothes.is/embed.js 55 | 56 | extra_css: 57 | - css/nb_mods.css 58 | 59 | 60 | extra: 61 | social: 62 | - icon: "fontawesome/brands/github" 63 | link: "https://github.com/ericmjl" 64 | - icon: "fontawesome/brands/twitter" 65 | link: "https://twitter.com/ericmjl" 66 | - icon: "fontawesome/brands/linkedin" 67 | link: "https://linkedin.com/in/ericmjl" 68 | -------------------------------------------------------------------------------- /src/causality_notes/__init__.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy.random as npr 3 | 4 | 5 | def draw_graph(G, edge_weight=None, layout: str = "kamada_kawai"): 6 | pos = nx.kamada_kawai_layout(G) 7 | 8 | if edge_weight: 9 | edge_labels = { 10 | (u, v): d[edge_weight] for u, v, d in G.edges(data=True) 11 | } # noqa: E501 12 | nx.draw_networkx_edge_labels(G, pos=pos, edge_labels=edge_labels) 13 | 14 | nx.draw_networkx_edges(G, pos) 15 | nx.draw_networkx_nodes(G, pos) 16 | nx.draw_networkx_labels(G, pos) 17 | nx.draw_kamada_kawai(G, with_labels=True) 18 | 19 | 20 | def noise(size): 21 | return npr.normal(loc=0, scale=1, size=size) 22 | 23 | 24 | def rule1(n, S, G, path): 25 | """ 26 | Tells us if a node in the graph G satisfies 27 | blocking rule 1 in the causal path provided. 28 | 29 | Blocking rule 1 is: 30 | 31 | -> n -> 32 | 33 | This is topologically equivalent to: 34 | 35 | <- n <- 36 | 37 | Where n is a member of S. 38 | 39 | :param n: A node in graph G. 40 | :param S: The conditioning node set. 41 | :param G: A NetworkX graph. 42 | :param path: The causal path of interest. 43 | """ 44 | G_sub = path_nodes(G, path) 45 | in_conditioning_set = n in S 46 | has_in_edges = len(list(G_sub.in_edges(n))) == 1 47 | has_out_edges = len(list(G_sub.out_edges(n))) == 1 48 | return in_conditioning_set and has_in_edges and has_out_edges 49 | 50 | 51 | def rule2(n, S, G, path): 52 | """ 53 | Tells us if a node in the graph G satisfies 54 | blocking rule 2 in the causal path provided. 55 | 56 | Blocking rule 2 is: 57 | 58 | <- n -> 59 | 60 | Where n is a member of S. 61 | 62 | :param n: A node in graph G. 63 | :param S: The conditioning node set. 64 | :param G: A NetworkX graph. 65 | :param path: The causal path of interest. 66 | """ 67 | G_sub = path_nodes(G, path) 68 | in_conditioning_set = n in S 69 | has_out_edges = len(list(G_sub.out_edges(n))) == 2 70 | return in_conditioning_set and has_out_edges 71 | 72 | 73 | def rule3(n, S, G, path): 74 | """ 75 | Tells us if a node in the graph G satisfies 76 | blocking rule 3 in the causal path provided. 77 | 78 | Blocking rule 3 is as such: 79 | 80 | If n is a collider: 81 | 82 | -> n <- 83 | 84 | Then it is a blocker, otherwise it is not a blocker. 85 | 86 | However, if n is a member of S, or n has a descendant 87 | that is a member of S, then it is not a blocker. 88 | 89 | :param n: A node in graph G. 90 | :param S: The conditioning node set. 91 | :param G: A NetworkX graph. 92 | :param path: The causal path of interest. 93 | """ 94 | G_sub = path_nodes(G, path) 95 | in_conditioning_set = n in S 96 | is_collider = len(list(G_sub.in_edges(n))) == 2 97 | descendant_in_S = bool(set(G.successors(n)).intersection(S)) 98 | 99 | is_blocker = is_collider 100 | 101 | # We then check to see if the 102 | if n in S or descendant_in_S: 103 | is_blocker = False 104 | return is_blocker 105 | 106 | 107 | def path_nodes(G: nx.DiGraph, path: list): 108 | """ 109 | Returns the causal path as indicated by the path. 110 | 111 | Does not include the other edges, as would G.subgraph do. 112 | 113 | :param G: A NetworkX directed graph. 114 | :param path: A list of nodes denoting an undirected path. 115 | """ 116 | assert isinstance(G, nx.DiGraph), "G must be a directed graph" 117 | G_sub = nx.DiGraph() 118 | for n1, n2 in zip(path, path[1:]): 119 | if G.has_edge(n1, n2): 120 | G_sub.add_edge(n1, n2) 121 | elif G.has_edge(n2, n1): 122 | G_sub.add_edge(n2, n1) 123 | return G_sub 124 | -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | """Setup script.""" 2 | import os 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | # mandatory 7 | name="causality_notes", 8 | # mandatory 9 | version="0.1", 10 | # mandatory 11 | author="Eric J. Ma", 12 | description=("Notes on causal inference methods."), 13 | packages=find_packages(), 14 | ) 15 | -------------------------------------------------------------------------------- /src/test_causality.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from causality_notes import rule1, rule2, rule3 4 | 5 | 6 | @pytest.fixture 7 | def G4_7(): 8 | """ 9 | The graph from figure 4.7 in Judea Pearl's "The Book of Why" 10 | """ 11 | G = nx.DiGraph() 12 | edges = [ 13 | ("D", "A"), 14 | ("D", "C"), 15 | ("F", "C"), 16 | ("A", "B"), 17 | ("C", "B"), 18 | ("C", "Y"), 19 | ("F", "X"), 20 | ("F", "Y"), 21 | ("C", "E"), 22 | ("A", "X"), 23 | ("E", "X"), 24 | ("E", "Y"), 25 | ("B", "X"), 26 | ("X", "Y"), 27 | ("G", "X"), 28 | ("G", "Y"), 29 | ] 30 | G.add_edges_from(edges) 31 | return G 32 | 33 | 34 | def G_nb3(): 35 | """ 36 | The graph from notebook 3. 37 | """ 38 | G = nx.DiGraph() 39 | G.add_edge("x2", "x1") 40 | G.add_edge("x3", "x1") 41 | G.add_edge("x4", "x3") 42 | G.add_edge("x4", "x5") 43 | G.add_edge("x1", "x5") 44 | return G 45 | 46 | 47 | def G2_nb3(): 48 | """ 49 | The second graph from notebook 3. 50 | """ 51 | G2 = nx.DiGraph() 52 | edges = ["xr", "rw", "rs", "st", "tp", "ut", "vu", "vq", "vy"] 53 | edges = [(f"{i[0]}", f"{i[1]}") for i in edges] 54 | G2.add_edges_from(edges) 55 | return G2 56 | 57 | 58 | # @pytest.parameterize(G, [G_47, G_nb3, G2_nb3]) 59 | def test_rule1(): 60 | pass 61 | 62 | 63 | def test_rule2(): 64 | pass 65 | 66 | 67 | def test_rule3(): 68 | pass 69 | --------------------------------------------------------------------------------