├── tests ├── __init__.py ├── deconfounding │ ├── __init__.py │ ├── test_Do.py │ ├── backdoor_files │ │ ├── xi_xj.yml │ │ └── graphs │ │ │ └── pearl-3.4.yml │ └── test_Backdoor.py ├── identification │ ├── __init__.py │ ├── test_Identification.py │ └── test_LatentGraph.py ├── core │ ├── __init__.py │ ├── inference_files │ │ └── xi_xj.yml │ ├── test_Model.py │ ├── test_ConditionalProbabilityTable.py │ ├── test_helpers.py │ ├── test_Variables.py │ ├── test_Inference.py │ ├── test_Expression.py │ └── test_Graph.py └── source.py ├── do ├── identification │ ├── __init__.py │ ├── Exceptions.py │ ├── PExpression.py │ ├── API.py │ ├── LatentGraph.py │ └── Identification.py ├── deconfounding │ ├── __init__.py │ ├── Exceptions.py │ ├── API.py │ ├── Do.py │ └── Backdoor.py ├── __init__.py ├── core │ ├── __init__.py │ ├── API.py │ ├── Exceptions.py │ ├── Types.py │ ├── Expression.py │ ├── helpers.py │ ├── Model.py │ ├── ConditionalProbabilityTable.py │ ├── Variables.py │ ├── Graph.py │ └── Inference.py └── API.py ├── MANIFEST.in ├── requirements.txt ├── archive ├── __init__.py ├── Home.md ├── Exceptions.md ├── README.md ├── Roots.md ├── Sinks.md ├── Resources.md ├── Parents.md ├── Children.md ├── Ancestors.md ├── Descendants.md ├── Literature.md ├── Joint Distribution Table.md ├── Installation.md ├── Standard Paths.md ├── Topology.md ├── _Sidebar.md ├── Conditional Independence.md ├── Loading a Model.md ├── Deconfounding Sets.md ├── Do API.md ├── PyPI.md ├── GitHub.md ├── Backdoor Paths.md ├── Configuration.md ├── Output.md ├── build_wiki.py ├── Markovian Models.md ├── Definitions.md ├── Probability Queries.md └── __init__.md ├── wiki ├── _Sidebar.md ├── Home.md ├── Installation.md ├── API.md ├── PyPI.md └── GitHub.md ├── .gitignore ├── examples ├── 3-latent │ ├── deconfound.py │ └── 3.4-latent.yml ├── 1-basic-backdoor │ ├── deconfound.py │ └── pearl-3.4.yml └── 2-backdoor-paths │ ├── deconfound.py │ └── pearl-3.4.yml ├── .github ├── workflows │ ├── update_wiki.yml │ ├── codeql-analysis.yml │ └── test_and_release.yml ├── SECURITY.md └── CODE_OF_CONDUCT.md ├── .releaserc.yml ├── models ├── simulation.json.yml ├── melanoma.yml ├── square-game.yml ├── pearl-3.6.yml ├── abcd.yml ├── m-game.yml ├── pearl-7.5.yml ├── pearl-3.7c.yml ├── fumigants_eelworms.yml └── pearl-3.4.yml ├── setup.cfg ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /do/identification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/deconfounding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/deconfounding/test_Do.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/identification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include do/ * 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.19.4 2 | PyYAML >= 5.4.0 3 | loguru >= 0.5.3 -------------------------------------------------------------------------------- /archive/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "pages", 3 | "build_wiki" 4 | ] 5 | -------------------------------------------------------------------------------- /do/deconfounding/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "API", 3 | "BackdoorController", 4 | "CausalGraph" 5 | ] 6 | -------------------------------------------------------------------------------- /wiki/_Sidebar.md: -------------------------------------------------------------------------------- 1 | ### [[Home]] 2 | 3 | ### [[Installation]] 4 | * [[PyPI]] 5 | * [[GitHub]] 6 | 7 | ### [[API]] 8 | -------------------------------------------------------------------------------- /tests/core/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "test_ConditionalProbabilityTable", 3 | "test_Graph", 4 | "test_Variables" 5 | ] 6 | -------------------------------------------------------------------------------- /do/__init__.py: -------------------------------------------------------------------------------- 1 | from .API import API 2 | 3 | from .core.Expression import Expression 4 | from .core.Variables import Intervention, Outcome, Variable 5 | -------------------------------------------------------------------------------- /archive/Home.md: -------------------------------------------------------------------------------- 1 | # do-calculus wiki 2 | 3 | This wiki is *under construction*, and most documentation is still a work in progress. 4 | 5 | See the Sidebar for relevant links. 6 | -------------------------------------------------------------------------------- /archive/Exceptions.md: -------------------------------------------------------------------------------- 1 | Details on the custom exceptions that can be raised when using the [[API|Do API]]. 2 | 3 | Exceptions are stored in ``do/structures/Exceptions``. 4 | 5 | STUB|exceptions 6 | -------------------------------------------------------------------------------- /archive/README.md: -------------------------------------------------------------------------------- 1 | # Archive 2 | 3 | Contained here is some old code that built a (now-outdated) wiki that I don't have the heart to delete. 4 | 5 | Don't rely on it expecting accurate information. 6 | -------------------------------------------------------------------------------- /do/core/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "API", 3 | "ConditionalProbabilityTable", 4 | "Exceptions" 5 | "Graph", 6 | "helpers", 7 | "ModelLoader", 8 | "Types", 9 | "VariableStructures" 10 | ] 11 | -------------------------------------------------------------------------------- /wiki/Home.md: -------------------------------------------------------------------------------- 1 | # do-calculus wiki 2 | 3 | This is a (fairly minimal) wiki detailing usage of the `do-calculus` library. 4 | 5 | There are some examples in the `examples` subdirectory, and the API is fairly well-documented, to be accompanied by prerequisite domain-specific knowledge. 6 | 7 | See the sidebar for relevant links. 8 | -------------------------------------------------------------------------------- /do/deconfounding/Exceptions.py: -------------------------------------------------------------------------------- 1 | from ..core.Exceptions import ProbabilityException 2 | 3 | class NoDeconfoundingSet(ProbabilityException): 4 | """ 5 | Raised when attempting to perform a query on a set of data for which deconfounding is necessary, but no sufficient 6 | set of variables by which to block backdoor paths is possible. 7 | """ 8 | pass 9 | -------------------------------------------------------------------------------- /archive/Roots.md: -------------------------------------------------------------------------------- 1 | Get all roots in the graph, where a root is defined as a vertex with no parent. 2 | 3 | STUB|roots 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | roots = do_api.roots() 14 | ``` 15 | 16 | **Important** 17 | - The roots are always returned as a collection of vertices. 18 | -------------------------------------------------------------------------------- /archive/Sinks.md: -------------------------------------------------------------------------------- 1 | Get all sinks in the graph, where a sink is defined as vertex with no child. 2 | 3 | STUB|sinks 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | sinks = do_api.sinks() 14 | ``` 15 | 16 | **Important** 17 | - The sinks are always returned as a collection of vertices. 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore most .files 2 | .* 3 | !.github 4 | !.gitignore 5 | !.releaserc.yml 6 | 7 | # No pycache 8 | */__pycache__/* 9 | /__pycache__/* 10 | *.pyc 11 | 12 | # Log files from operating software 13 | do-calculus/logs/ 14 | 15 | # Config file is generated on run 16 | *config.* 17 | 18 | *.egg* 19 | 20 | # results from building package 21 | build/* 22 | dist/* 23 | 24 | -------------------------------------------------------------------------------- /do/API.py: -------------------------------------------------------------------------------- 1 | from .core.API import API as Core 2 | from .deconfounding.API import API as Deconfounding 3 | from .identification.API import API as Identification 4 | 5 | from .core.Expression import Expression 6 | 7 | class API(Core, Deconfounding, Identification): 8 | 9 | def __init__(self): 10 | Core.__init__(self) 11 | Deconfounding.__init__(self) 12 | Identification.__init__(self) 13 | -------------------------------------------------------------------------------- /examples/3-latent/deconfound.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from do import API, Expression, Intervention, Outcome 4 | 5 | api = API() 6 | 7 | file = Path("3.4-latent.yml") 8 | model = api.instantiate_model(file) 9 | 10 | xj = Outcome("Xj", "xj") 11 | xi = Intervention("Xi", "xi") 12 | e = Expression(xj, [xi]) 13 | 14 | result, proof = api.identification([xj], [xi], model) 15 | print(result) 16 | print(proof) 17 | -------------------------------------------------------------------------------- /archive/Resources.md: -------------------------------------------------------------------------------- 1 | A collection of resources for information the project, or *do-calculus* generally. 2 | * [[Definitions]]: Details on various definitions and terminology used within the project. 3 | * [[Markovian Models]]: Details on the structure of a Markovian model for use in the package. 4 | * [[Configuration]]: Settings for the project. 5 | * [[Literature]]: Books and papers referenced in the implementation of this project. 6 | -------------------------------------------------------------------------------- /tests/core/inference_files/xi_xj.yml: -------------------------------------------------------------------------------- 1 | graph_filename: pearl-3.4.yml 2 | tests: 3 | - head: Xj=xj 4 | body: Xi=xi 5 | expect: 0.41845 6 | 7 | - head: Xj=xj 8 | body: X1=x1 9 | expect: 0.45597 10 | 11 | - head: Xi=xi 12 | body: Xi=~xi 13 | expect: 0.0 14 | 15 | - head: Xi=xi 16 | body: Xi=xi 17 | expect: 1.0 18 | 19 | - head: Xi=xi 20 | body: Xi=xi, Xj=xj 21 | expect: 1.0 22 | -------------------------------------------------------------------------------- /archive/Parents.md: -------------------------------------------------------------------------------- 1 | Get the parents of some vertex *v*, where a parent is some vertex *p* such that the edge ``(p, v)`` is in the graph. 2 | 3 | STUB|parents 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | parents = do_api.parents("x") 14 | ``` 15 | 16 | **Important** 17 | - The parents are always returned as a (possibly **empty**) collection of vertices. 18 | -------------------------------------------------------------------------------- /archive/Children.md: -------------------------------------------------------------------------------- 1 | Get the children of some vertex *v*, where a child is some vertex *c* such that the edge ``(v, c)`` is in the graph. 2 | 3 | STUB|children 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | children = do_api.children("x") 14 | ``` 15 | 16 | **Important** 17 | - The children are always returned as a (possibly **empty**) collection of vertices. 18 | -------------------------------------------------------------------------------- /do/identification/Exceptions.py: -------------------------------------------------------------------------------- 1 | from do.core.Exceptions import ProbabilityException 2 | 3 | 4 | class Fail(ProbabilityException): 5 | """ 6 | Represents a failure for the Identification algorithm to properly 7 | identify a causal effect. This (real) exception is raised as done 8 | in the ID algorithm. 9 | """ 10 | 11 | def __init__(self, s, sp, proof): 12 | super().__init__(s, sp) 13 | self.proof = proof 14 | -------------------------------------------------------------------------------- /tests/core/test_Model.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from do.core.Exceptions import MissingVariable 4 | 5 | from ..source import models 6 | model = models["pearl-3.4.yml"] 7 | 8 | 9 | def test_Lookup(): 10 | 11 | # these should raise no issue 12 | assert model.variable("Xj") 13 | assert model.variable("Xi") 14 | 15 | # ensure a latent variable fails to be retrieved... 16 | with raises(MissingVariable): 17 | model.variable("Z") 18 | -------------------------------------------------------------------------------- /archive/Ancestors.md: -------------------------------------------------------------------------------- 1 | Get the ancestors of some vertex *v*, where an ancestor is some vertex *a* such that a directed path ``(a, ..., v)`` is in the graph. 2 | 3 | STUB|ancestors 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | ancestors = do_api.ancestors("x") 14 | ``` 15 | 16 | **Important** 17 | - The ancestors are always returned as a (possibly **empty**) collection of vertices. 18 | -------------------------------------------------------------------------------- /archive/Descendants.md: -------------------------------------------------------------------------------- 1 | Get the descendants of some vertex *v*, where a descendant is some vertex *d* such that a directed path ``(v, ..., d)`` is in the graph. 2 | 3 | STUB|descendants 4 | 5 | ### Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | descendants = do_api.descendants("x") 14 | ``` 15 | 16 | **Important** 17 | - The descendants are always returned as a (possibly **empty**) collection of vertices. 18 | -------------------------------------------------------------------------------- /tests/source.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from do.API import API 4 | from do.core.Inference import validate 5 | from do.core.Model import from_path 6 | 7 | 8 | api = API() 9 | 10 | # directory of YML files containing valid models for testing purposes 11 | model_path = Path("models") 12 | assert model_path.is_dir() 13 | 14 | models = dict() 15 | for file in model_path.iterdir(): 16 | models[file.name] = from_path(file) 17 | 18 | # verify all the models as correct 19 | #for name, model in models.items(): 20 | # assert validate(model) 21 | -------------------------------------------------------------------------------- /.github/workflows/update_wiki.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Wiki 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | paths: 7 | - 'wiki/**' 8 | branches: [ main ] 9 | 10 | jobs: 11 | deploy-wiki: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Push Wiki Changes 17 | uses: Andrew-Chen-Wang/github-wiki-action@v3 18 | env: 19 | WIKI_DIR: wiki/ 20 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | GH_MAIL: ${{ secrets.EMAIL }} 22 | GH_NAME: ${{ github.repository_owner }} 23 | -------------------------------------------------------------------------------- /examples/1-basic-backdoor/deconfound.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from do import API, Expression, Intervention, Outcome 4 | 5 | api = API() 6 | 7 | file = Path("pearl-3.4.yml") 8 | model = api.instantiate_model(file) 9 | 10 | xj = Outcome("Xj", "xj") 11 | xi = Intervention("Xi", "xi") 12 | e = Expression(xj, [xi]) 13 | 14 | # basic inference won't work! 15 | try: 16 | api.probability(e, model) 17 | print("This cannot happen!") 18 | 19 | except Exception: 20 | e2 = Expression(xj) 21 | result = api.treat(e2, [xi], model) 22 | print(result) 23 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL Analysis" 2 | on: 3 | push: 4 | branches: [ main, beta, develop ] 5 | pull_request: 6 | branches: [ main ] 7 | 8 | jobs: 9 | analyze: 10 | name: Analyze 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Checkout repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Initialize CodeQL 18 | uses: github/codeql-action/init@v1 19 | with: 20 | languages: python 21 | 22 | - name: Perform CodeQL Analysis 23 | uses: github/codeql-action/analyze@v1 24 | 25 | -------------------------------------------------------------------------------- /archive/Literature.md: -------------------------------------------------------------------------------- 1 | TODO - References galore to backdoor paths, deconfounding, and more! 2 | 3 | ## Books 4 | 5 | * Causality (2nd Edition) - Judea Pearl, 2009 6 | * The Book of Why: The New Science of Cause and Effect - Judea Pearl and Dana Mackenzie, 2018 7 | * Causal Inference in Statistics: A Primer - Judea Pearl, Madelyn Glymour, Nicholas P. Jewell, 2016 8 | 9 | ## Papers 10 | 11 | TODO - Shpitser & Pearl 2004, Thesis, and a few more. 12 | 13 | ## Graph Related 14 | 15 | Kahn, A. B. (1962). Topological sorting of large networks. Communications of the ACM, 16 | 5(11):558–562. 17 | -------------------------------------------------------------------------------- /tests/deconfounding/backdoor_files/xi_xj.yml: -------------------------------------------------------------------------------- 1 | graph_filename: pearl-3.4.yml 2 | tests: 3 | - type: backdoors 4 | src: [ Xi ] 5 | dst: [ Xj ] 6 | expect: [ 7 | [ Xi, X4, Xj ], 8 | [ Xi, X4, X2, X5, Xj ], 9 | [ Xi, X3, X1, X4, Xj ] 10 | ] 11 | exhaustive: true 12 | 13 | - type: treatment 14 | head: Xj=xj 15 | body: do(Xi=xi) 16 | expect: 0.362828 17 | 18 | - type: treatment 19 | head: Xj=xj 20 | body: do(X1=x1) 21 | expect: 0.45597 22 | 23 | - type: treatment 24 | head: Xi=xi 25 | body: do(Xj=xj) 26 | expect: 0.2669 27 | -------------------------------------------------------------------------------- /.releaserc.yml: -------------------------------------------------------------------------------- 1 | # .releaserc file 2 | # Configures the test-and-release workflow used to automatically tag / release on commits to certain branches 3 | 4 | # Only trigger proper release on 5 | branches: 6 | 7 | # "main" branch is not prerelease 8 | - name: main 9 | prerelease: false 10 | 11 | # "beta" branch is prerelease 12 | - name: beta 13 | prerelease: true 14 | 15 | # Plugins; Omit the npm plugin entirely, as we are not publishing on npm 16 | plugins: 17 | - '@semantic-release/commit-analyzer' 18 | - '@semantic-release/release-notes-generator' 19 | - '@semantic-release/github' 20 | -------------------------------------------------------------------------------- /archive/Joint Distribution Table.md: -------------------------------------------------------------------------------- 1 | Get a joint distribution table for all possible combination of outcomes for all variables in the model. 2 | 3 | STUB|joint_distribution_table 4 | 5 | ## Example 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | model = "models/model1.yml" 11 | do_api = Do(model) 12 | 13 | table = do_api.joint_distribution_table() 14 | ``` 15 | 16 | **Important** 17 | - This table can be *extremely* computationally intensive if there are many outcomes and/or many variables in the model. 18 | - To improve performance, ensure that [[computation-caching is enabled|Configuration]]. 19 | -------------------------------------------------------------------------------- /archive/Installation.md: -------------------------------------------------------------------------------- 1 | How to install and set up the software. 2 | 3 | ## Table of Contents 4 | 5 | * [Requirements](#requirements) 6 | * [Options](#options) 7 | 8 | ## Requirements 9 | 10 | Setup requirements for the project are: 11 | - **[Python 3.8+](https://www.python.org/)** 12 | - [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages. 13 | 14 | **Note**: `pip` will already be installed with any installation of **Python 3.4+**. 15 | 16 | ## Options 17 | 18 | There are **two** main ways to install the package: 19 | - [[Install from PyPI|PyPI]] 20 | - [[Install from source|GitHub]] 21 | -------------------------------------------------------------------------------- /archive/Standard Paths.md: -------------------------------------------------------------------------------- 1 | Get traditional, directed paths from the *DAG* encoded in the model. 2 | 3 | This includes all standard, directed paths as well-defined in graph terminology, and explicitly does **not** include any backdoor paths. 4 | 5 | STUB|standard_paths 6 | 7 | ## Example 8 | 9 | ```python 10 | from do.API import Do 11 | 12 | do_api = Do("models/model1.yml") 13 | 14 | paths = do_api.standard_paths({"x", "y"}, {"z"}) 15 | ``` 16 | 17 | **Important** 18 | - Since collections of vertices are provided, any path from some vertex in ``src`` to some vertex in ``dst`` is included in the returned collection. 19 | -------------------------------------------------------------------------------- /wiki/Installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | How to install and set up the software. 4 | 5 | ## Table of Contents 6 | 7 | * [Requirements](#requirements) 8 | * [Options](#options) 9 | 10 | ## Requirements 11 | 12 | Setup requirements for the project are: 13 | - **[Python 3.8+](https://www.python.org/)** 14 | - [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages. 15 | 16 | **Note**: `pip` will already be installed with any installation of **Python 3.4+**. 17 | 18 | ## Options 19 | 20 | There are **two** main ways to install the package: 21 | - [[Install from PyPI|PyPI]] 22 | - [[Install from source|GitHub]] 23 | -------------------------------------------------------------------------------- /tests/core/test_ConditionalProbabilityTable.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from do.API import API 4 | from do.core.Exceptions import MissingTableRow 5 | from do.core.Variables import Outcome 6 | 7 | from ..source import models 8 | 9 | model = models["pearl-3.4.yml"] 10 | table = model.table("Xj") 11 | priors = [Outcome("X6", "x6"), Outcome("X4", "x4"), Outcome("X5", "x5")] 12 | 13 | api = API() 14 | 15 | 16 | def test_ValidLookup(): 17 | table.probability_lookup(Outcome("Xj", "xj"), priors) 18 | 19 | 20 | def test_InvalidLookup(): 21 | with raises(MissingTableRow): 22 | table.probability_lookup(Outcome("Xj", "foo"), priors) 23 | -------------------------------------------------------------------------------- /models/simulation.json.yml: -------------------------------------------------------------------------------- 1 | name: Simulation Test 2 | endogenous: 3 | A: 4 | outcomes: 5 | - a 6 | - ~a 7 | parents: 8 | - B 9 | - C 10 | table: [ 11 | [a, b, c, 0.6], 12 | [a, b, ~c, 0.3], 13 | [a, ~b, c, 0.4], 14 | [a, ~b, ~c, 0.9], 15 | [~a, b, c, 0.4], 16 | [~a, b, ~c, 0.7], 17 | [~a, ~b, c, 0.6], 18 | [~a, ~b, ~c, 0.1] 19 | ] 20 | B: 21 | outcomes: 22 | - b 23 | - ~b 24 | parents: [] 25 | table: [ 26 | [b, 0.4], 27 | [~b, 0.6] 28 | ] 29 | C: 30 | outcomes: 31 | - c 32 | - ~c 33 | parents: [] 34 | table: [ 35 | [c, 0.7], 36 | [~c, 0.3] 37 | ] 38 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = 3 | .git, 4 | .github 5 | __pycache__ 6 | debug 7 | do/probability/do_calculus 8 | do/shpitser 9 | tests/ 10 | wiki/ 11 | 12 | [coverage:run] 13 | relative_files = True 14 | source = 15 | do/ 16 | tests/ 17 | omit = 18 | do/API.py 19 | do/__main__.py 20 | do/config/generate_config_docs.py 21 | do/config/config_manager.py 22 | do/shpitser/* 23 | do/util/OutputLogger.py 24 | do/probability 25 | 26 | [coverage:report] 27 | exclude_lines = 28 | def __str__ 29 | coverage: skip 30 | 31 | [tool:pytest] 32 | minversion = 6.0 33 | norecursedirs = 34 | build 35 | debug 36 | dist 37 | old_tests -------------------------------------------------------------------------------- /examples/2-backdoor-paths/deconfound.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from do import API 4 | 5 | api = API() 6 | 7 | file = Path("pearl-3.4.yml") 8 | model = api.instantiate_model(file) 9 | 10 | backdoors = api.backdoors({"Xi"}, {"Xj"}, model.graph()) 11 | assert len(backdoors) > 0, "No backdoor paths detected!" 12 | assert not api.blocks({"Xi"}, {"Xj"}, model.graph(), set()), "This should not block all paths!" 13 | 14 | for path in backdoors: 15 | print("Path:", path) 16 | 17 | backdoors = api.backdoors({"Xi"}, {"Xj"}, model.graph(), {"X1", "X4"}) 18 | assert len(backdoors) == 0, "Expected this to block!" 19 | assert api.blocks({"Xi"}, {"Xj"}, model.graph(), {"X1", "X4"}), "This should block!" 20 | -------------------------------------------------------------------------------- /do/core/API.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Union 3 | 4 | from .Expression import Expression 5 | from .Inference import inference, validate 6 | from .Model import Model, from_dict, from_path 7 | 8 | 9 | class API: 10 | 11 | def validate(self, model: Model) -> bool: 12 | return validate(model) 13 | 14 | def probability(self, query: Expression, model: Model) -> float: 15 | return inference(query, model) 16 | 17 | def instantiate_model(self, model_target: Union[str, Path, dict]) -> Model: 18 | 19 | if isinstance(model_target, dict): 20 | return from_dict(model_target) 21 | 22 | return from_path(Path(model_target) if isinstance(model_target, str) else model_target) 23 | -------------------------------------------------------------------------------- /wiki/API.md: -------------------------------------------------------------------------------- 1 | # API 2 | 3 | Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project. 4 | 5 | This assumes the steps in the [[Installation]] section have been followed, and the project is set up. 6 | 7 | **Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package. 8 | 9 | ## Importing 10 | 11 | To import the package: 12 | 13 | ```python 14 | import do 15 | ``` 16 | 17 | **Important**: 18 | - The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``. 19 | 20 |
21 | 22 | To create an instance of the API: 23 | 24 | ```python 25 | from do import API 26 | 27 | api = API() 28 | ``` 29 | -------------------------------------------------------------------------------- /models/melanoma.yml: -------------------------------------------------------------------------------- 1 | name: "Melanoma" 2 | endogenous: 3 | Y: 4 | outcomes: 5 | - "y" 6 | - "~y" 7 | parents: 8 | - "X" 9 | - "Z" 10 | table: [ 11 | [ "y", "x", "z", 0.3 ], 12 | [ "y", "x", "~z", 0.6 ], 13 | [ "y", "~x", "z", 0.5 ], 14 | [ "y", "~x", "~z", 0.8 ], 15 | [ "~y", "x", "z", 0.7 ], 16 | [ "~y", "x", "~z", 0.4 ], 17 | [ "~y", "~x", "z", 0.5 ], 18 | [ "~y", "~x", "~z", 0.2 ] 19 | ] 20 | X: 21 | outcomes: 22 | - "x" 23 | - "~x" 24 | parents: 25 | - "Z" 26 | table: [ 27 | [ "x", "z", 0.8 ], 28 | [ "x", "~z", 0.4 ], 29 | [ "~x", "z", 0.2 ], 30 | [ "~x", "~z", 0.6 ] 31 | ] 32 | Z: 33 | outcomes: 34 | - "z" 35 | - "~z" 36 | table: [ 37 | [ "z", 0.7 ], 38 | [ "~z", 0.3 ] 39 | ] 40 | -------------------------------------------------------------------------------- /.github/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Version Support 4 | 5 | Version support always extends only to the current MINOR version number (the second number under semantic versioning, following the pattern MAJOR.MINOR.PATH). There are no plans for any breaking changes to the API moving forward, only feature *additions*, so it seems a reasonable expectation for a dependant of this package to be able to upgrade with no problems. 6 | 7 | ## Reporting a Vulnerability 8 | 9 | Should any vulnerabilities ever be discovered, please contact me directly by email at [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca). Depending on the severity, a fix should be made available within one week. While the vulnerabilities possible are expected to be exceptionally minor, given the capabilities of this project, acknowledgement of the vulnerability reporter will be added to either the main README of the project, or an ACKNOWLEDGEMENTS page in the project. 10 | 11 | -------------------------------------------------------------------------------- /models/square-game.yml: -------------------------------------------------------------------------------- 1 | name: Square-Game 2 | endogenous: 3 | T: 4 | outcomes: 5 | - t 6 | - ~t 7 | parents: [] 8 | table: [ 9 | [t, 0.3], 10 | [~t, 0.7] 11 | ] 12 | W: 13 | outcomes: 14 | - w 15 | - ~w 16 | parents: 17 | - T 18 | table: [ 19 | [w, t, 0.2], 20 | [w, ~t, 0.1], 21 | [~w, t, 0.8], 22 | [~w, ~t, 0.9] 23 | ] 24 | X: 25 | outcomes: 26 | - x 27 | - ~x 28 | parents: 29 | - W 30 | table: [ 31 | [x, w, 0.4], 32 | [x, ~w, 0.8], 33 | [~x, w, 0.6], 34 | [~x, ~w, 0.2] 35 | ] 36 | Y: 37 | outcomes: 38 | - y 39 | - ~y 40 | parents: 41 | - X 42 | - T 43 | table: [ 44 | [y, x, t, 0.3], 45 | [y, x, ~t, 0.6], 46 | [y, ~x, t, 0.5], 47 | [y, ~x, ~t, 0.8], 48 | [~y, x, t, 0.7], 49 | [~y, x, ~t, 0.4], 50 | [~y, ~x, t, 0.5], 51 | [~y, ~x, ~t, 0.2] 52 | ] 53 | -------------------------------------------------------------------------------- /models/pearl-3.6.yml: -------------------------------------------------------------------------------- 1 | endogenous: 2 | U: 3 | outcomes: 4 | - u 5 | - ~u 6 | parents: [] 7 | table: [ 8 | [u, 0.3], 9 | [~u, 0.7] 10 | ] 11 | X: 12 | outcomes: 13 | - x 14 | - ~x 15 | parents: 16 | - U 17 | table: [ 18 | [x, u, 0.2], 19 | [x, ~u, 0.1], 20 | [~x, u, 0.8], 21 | [~x, ~u, 0.9] 22 | ] 23 | Y: 24 | outcomes: 25 | - y 26 | - ~y 27 | parents: 28 | - U 29 | - Z 30 | table: [ 31 | [y, u, z, 0.3], 32 | [y, u, ~z, 0.6], 33 | [y, ~u, z, 0.5], 34 | [y, ~u, ~z, 0.8], 35 | [~y, u, z, 0.7], 36 | [~y, u, ~z, 0.4], 37 | [~y, ~u, z, 0.5], 38 | [~y, ~u, ~z, 0.2] 39 | ] 40 | Z: 41 | outcomes: 42 | - z 43 | - ~z 44 | parents: 45 | - X 46 | table: [ 47 | [z, x, 0.4], 48 | [z, ~x, 0.8], 49 | [~z, x, 0.6], 50 | [~z, ~x, 0.2] 51 | ] 52 | name: 'Pearl: Figure 3.6' 53 | -------------------------------------------------------------------------------- /archive/Topology.md: -------------------------------------------------------------------------------- 1 | Details on getting the topology of the model. 2 | 3 | See details in [[Definitions]] for information on the topology-ordering. 4 | 5 | ## Topology 6 | 7 | Getting a topological ordering of the model. 8 | 9 | STUB|topology 10 | 11 | ### Example 12 | 13 | ```python 14 | from do.API import Do 15 | 16 | do_api = Do("models/model1.yml") 17 | 18 | topology = do_api.topology() 19 | 20 | for v in topology: 21 | print(v) 22 | ``` 23 | 24 | **Important** 25 | - A sequence of *N* vertices is returned. 26 | 27 |
28 | 29 | ## Topology Position 30 | 31 | Get the position of some vertex in the model in its topological ordering. 32 | 33 | STUB|topology_position 34 | 35 | ```python 36 | from do.API import Do 37 | 38 | do_api = Do("models/model1.yml") 39 | 40 | position = do_api.topology("x") 41 | print(position) 42 | ``` 43 | 44 | **Important** 45 | - The topological ordering begins at V1, so the value returned for a graph of N vertices is in the range \[1, N\]. 46 | -------------------------------------------------------------------------------- /archive/_Sidebar.md: -------------------------------------------------------------------------------- 1 | ### [[Home]] 2 | 3 | ### [[Installation]] 4 | * [[PyPI]] 5 | * [[GitHub]] 6 | 7 | ### [[Resources]] 8 | * [[Definitions]] 9 | * [[Markovian Models]] 10 | * [[Configuration]] 11 | * [[Literature]] 12 | 13 | ### [[Do API]] 14 | * [[Do.\_\_init\_\_|\_\_init\_\_]] 15 | * [[Do.load_model|Loading a Model]] 16 | * [[Do.p|Probability Queries]] 17 | * [[Do.joint_distribution_table|Joint Distribution Table]] 18 | * [[Do.backdoor_paths|Backdoor Paths]] 19 | * [[Do.standard_paths|Standard Paths]] 20 | * [[Do.deconfounding_sets|Deconfounding Sets]] 21 | * [[Do.independent|Conditional Independence]] 22 | * [[Do.roots|Roots]] 23 | * [[Do.sinks|Sinks]] 24 | * [[Do.parents|Parents]] 25 | * [[Do.children|Children]] 26 | * [[Do.ancestors|Ancestors]] 27 | * [[Do.descendants|Descendants]] 28 | * [[Do.topology|Topology]] 29 | * [[Do.topology_position|Topology]] 30 | * [[Do.set_print_result|Output]] 31 | * [[Do.set_print_detail|Output]] 32 | * [[Do.set_logging|Output]] 33 | * [[Do.set_log_fd|Output]] 34 | * [[Exceptions]] 35 | -------------------------------------------------------------------------------- /archive/Conditional Independence.md: -------------------------------------------------------------------------------- 1 | Determine if two sets of variables in the model are conditionally independent. 2 | 3 | STUB|independent 4 | 5 | ## Independent 6 | 7 | ```python 8 | from do.API import Do 9 | 10 | # Assume this were a detailed model conforming to the above graph... 11 | model = dict() 12 | 13 | do_api = Do(model) 14 | 15 | independent = do_api.independent({"x"}, {"y"}) 16 | 17 | independent_2 = do_api.independent({"x"}, {"y"}, {"z"}) 18 | 19 | independent_3 = do_api.independent({"y"}, {"z"}, dcf=None) 20 | 21 | if independent: 22 | print("Independent!") 23 | else: 24 | print("Not independent!") 25 | ``` 26 | 27 | A boolean for whether the two sets are conditionally independent, given some optional deconfounding set, is returned. 28 | 29 | **Important** 30 | - The third parameter, a set of deconfounding variables, can be given, to block backdoor paths from ``s`` to ``t``. 31 | - If there are no deconfounding variables, an empty collection can be provided, *omitted entirely*, or explicitly set to ``None``. 32 | -------------------------------------------------------------------------------- /tests/core/test_helpers.py: -------------------------------------------------------------------------------- 1 | from do.core.helpers import disjoint, minimal_sets, power_set 2 | 3 | 4 | def test_disjoint(): 5 | d1 = {0, 1, 2, 3, 4} 6 | d2 = {3, 4, 5, 6, 7} 7 | d3 = {6, 7, 8, 9, 10} 8 | assert not disjoint(d1, d2) 9 | assert not disjoint(d2, d3) 10 | assert not disjoint(d1, d2, d3) 11 | assert disjoint(d1, d3) 12 | 13 | 14 | def test_minimal_sets(): 15 | s1 = {1, 2, 3} 16 | s2 = {1, 2, 3, 4} 17 | s3 = {0, 1, 2, 3, 4} 18 | s4 = {5, 6, 7} 19 | s5 = {0, 1, 2, 3, 4, 5, 6, 7} 20 | 21 | minimums = minimal_sets(s1, s2, s3, s4, s5) 22 | assert minimums == [s1, s4] 23 | 24 | assert minimal_sets(s1) == [s1] 25 | assert minimal_sets(s1, s2) == [s1] 26 | assert minimal_sets(s1, s4) == [s1, s4] 27 | 28 | 29 | def test_power_set(): 30 | data = [1, 2, 3, 4] 31 | with_empty = power_set(data, allow_empty_set=True) 32 | without_empty = power_set(data, allow_empty_set=False) 33 | assert len(set(with_empty)) == 2 ** len(data) 34 | assert len(set(without_empty)) == 2 ** len(data) - 1 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from setuptools import setup 3 | from os import environ 4 | 5 | cwd = Path(".") 6 | 7 | README = (cwd / "README.md").read_text() 8 | dependencies = (cwd / "requirements.txt").read_text().strip().split("\n") 9 | 10 | # This should be set by the automated Github workflow 11 | VERSION = environ["SEMANTIC_VERSION"] if "SEMANTIC_VERSION" in environ else "0.0.1" 12 | 13 | setup( 14 | name="do-calculus", 15 | version=VERSION, 16 | description="A Python implementation of the do-calculus of Judea Pearl et. al.", 17 | long_description=README, 18 | long_description_content_type="text/markdown", 19 | url="https://github.com/bradendubois/do-calculus", 20 | author="Braden Dubois", 21 | author_email="braden.dubois@usask.ca", 22 | packages=["do"], 23 | keywords="do-calculus causation statistics pearl python", 24 | include_package_data=True, 25 | install_requires=dependencies, 26 | entry_points={ 27 | "console_scripts": [ 28 | "do=do.__main__:main", 29 | ] 30 | }, 31 | ) 32 | -------------------------------------------------------------------------------- /do/deconfounding/API.py: -------------------------------------------------------------------------------- 1 | from typing import Collection, Optional 2 | 3 | from ..core.Expression import Expression 4 | from ..core.Graph import Graph 5 | from ..core.Model import Model 6 | from ..core.Types import Vertex, Path 7 | from ..core.Variables import Intervention 8 | 9 | from .Backdoor import backdoors, deconfound 10 | from .Do import treat 11 | 12 | 13 | class API: 14 | 15 | def treat(self, expression: Expression, interventions: Collection[Intervention], model: Model) -> float: 16 | return treat(expression, interventions, model) 17 | 18 | def backdoors(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph, z: Optional[Collection[Vertex]] = None) -> Collection[Path]: 19 | return backdoors(x, y, graph, z) 20 | 21 | def blocks(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph, z: Collection[Vertex]) -> bool: 22 | return len(backdoors(x, y, graph, z)) == 0 23 | 24 | def deconfound(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph) -> Collection[Collection[Vertex]]: 25 | return deconfound(x, y, graph) 26 | -------------------------------------------------------------------------------- /models/abcd.yml: -------------------------------------------------------------------------------- 1 | name: ABCD Trace 2 | endogenous: 3 | A: 4 | outcomes: 5 | - a 6 | - ~a 7 | parents: 8 | - B 9 | - C 10 | table: [ 11 | [a, b, c, 0.3], 12 | [a, b, ~c, 0.6], 13 | [a, ~b, c, 0.5], 14 | [a, ~b, ~c, 0.8], 15 | [~a, b, c, 0.7], 16 | [~a, b, ~c, 0.4], 17 | [~a, ~b, c, 0.5], 18 | [~a, ~b, ~c, 0.2] 19 | ] 20 | B: 21 | outcomes: 22 | - b 23 | - ~b 24 | parents: 25 | - D 26 | table: [ 27 | [b, d, 0.6], 28 | [b, ~d, 0.8,], 29 | [~b, d, 0.4], 30 | [~b, ~d, 0.2] 31 | ] 32 | C: 33 | outcomes: 34 | - c 35 | - ~c 36 | parents: 37 | - B 38 | - D 39 | table: [ 40 | [c, b, d, 0.3], 41 | [c, b, ~d, 0.6], 42 | [c, ~b, d, 0.5], 43 | [c, ~b, ~d, 0.8], 44 | [~c, b, d, 0.7], 45 | [~c, b, ~d, 0.4], 46 | [~c, ~b, d, 0.5], 47 | [~c, ~b, ~d, 0.2] 48 | ] 49 | D: 50 | outcomes: 51 | - d 52 | - ~d 53 | parents: [] 54 | table: [ 55 | [d, 0.3], 56 | [~d, 0.7] 57 | ] 58 | -------------------------------------------------------------------------------- /models/m-game.yml: -------------------------------------------------------------------------------- 1 | name: M-Game 2 | endogenous: 3 | A: 4 | outcomes: 5 | - a 6 | - ~a 7 | parents: 8 | - X 9 | table: [ 10 | [a, x, 0.3], 11 | [a, ~x, 0.5], 12 | [~a, x, 0.7], 13 | [~a, ~x, 0.5] 14 | ] 15 | B: 16 | outcomes: 17 | - b 18 | - ~b 19 | parents: 20 | - X 21 | - Y 22 | table: [ 23 | [b, x, y, 0.3], 24 | [b, x, ~y, 0.6], 25 | [b, ~x, y, 0.5], 26 | [b, ~x, ~y, 0.8], 27 | [~b, x, y, 0.7], 28 | [~b, x, ~y, 0.4], 29 | [~b, ~x, y, 0.5], 30 | [~b, ~x, ~y, 0.2] 31 | ] 32 | C: 33 | outcomes: 34 | - c 35 | - ~c 36 | parents: 37 | - Y 38 | table: [ 39 | [c, y, 0.7], 40 | [c, ~y, 0.25], 41 | [~c, y, 0.3], 42 | [~c, ~y, 0.75] 43 | ] 44 | X: 45 | outcomes: 46 | - x 47 | - ~x 48 | parents: [] 49 | table: [ 50 | [x, 0.3], 51 | [~x, 0.7] 52 | ] 53 | Y: 54 | outcomes: 55 | - y 56 | - ~y 57 | parents: [] 58 | table: [ 59 | [y, 0.2], 60 | [~y, 0.8] 61 | ] 62 | -------------------------------------------------------------------------------- /tests/core/test_Variables.py: -------------------------------------------------------------------------------- 1 | from do.core.Variables import Outcome, Intervention 2 | 3 | 4 | o1 = Outcome("X", "x") 5 | o2 = Outcome("X", "~x") 6 | o3 = Outcome("Y", "y") 7 | 8 | t1 = Intervention("X", "x") 9 | t2 = Intervention("X", "~x") 10 | t3 = Intervention("Y", "y") 11 | 12 | 13 | def test_Outcome(): 14 | assert o1 == "X" 15 | assert o2 == "X" 16 | assert o3 == "Y" 17 | 18 | 19 | def test_Intervention(): 20 | assert t1 == "X" 21 | assert t2 == "X" 22 | assert t3 == "Y" 23 | 24 | 25 | def test_OutcomesEquality(): 26 | assert o1.name == o2.name and o1 != o2 27 | assert o2 != o3 28 | assert o1 != o3 29 | o1_copy = o1.copy() 30 | assert o1 == o1_copy 31 | assert o1 is not o1_copy 32 | 33 | 34 | def test_InterventionEquality(): 35 | assert t1.name == t2.name and t1 != t2 36 | assert t2 != t3 37 | assert t1 != t3 38 | t1_copy = t1.copy() 39 | assert t1 == t1_copy 40 | assert t1 is not t1_copy 41 | 42 | 43 | def test_OutcomesInterventionEquality(): 44 | assert o1.name == t1.name and o1 != t1 45 | assert o2.name == t2.name and o2 != t2 46 | assert o3.name == t3.name and o3 != t3 47 | -------------------------------------------------------------------------------- /archive/Loading a Model.md: -------------------------------------------------------------------------------- 1 | How to load a model into an instance of the API. 2 | 3 | All examples will be using the model from [[Markovian Models]]. 4 | 5 | STUB|load_model 6 | 7 | As shown in [[\_\_init\_\_|\_\_init\_\_]], the following forms of models are acceptable: 8 | - a Python dictionary 9 | - a string path to a file 10 | - a [pathlib.Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object 11 | 12 | One can have instantiated a **Do**, and wish to replace the model loaded, or one may have deferred providing a model at the time of instantiation, and wish to provide one now. 13 | 14 | ## Examples 15 | 16 | ### Swapping a Model 17 | 18 | ```python 19 | from do.API import Do 20 | from pathlib import Path 21 | 22 | model_1 = "data/graph1.yml" 23 | 24 | do_api = Do(model=model_1) 25 | 26 | model_2 = Path("data/graph2.yml") 27 | 28 | do_api.load_model(model_2) 29 | ``` 30 | 31 | **Important**: 32 | - One can mix and match the model argument provided when swapping models; a dictionary could be given, then a path, or vice versa. 33 | 34 | ### Deferred Loading a Model 35 | 36 | ```python 37 | from do.API import Do 38 | 39 | do_api = Do(model=None) 40 | 41 | model_path = "data/graph1.yml" 42 | do_api.load_model(model_path) 43 | ``` 44 | -------------------------------------------------------------------------------- /tests/core/test_Inference.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, abspath 2 | from pathlib import Path 3 | from yaml import safe_load 4 | 5 | from do.API import API 6 | from do.core.Model import from_dict 7 | from do.core.Expression import Expression 8 | from do.core.Variables import parse_outcomes_and_interventions 9 | 10 | from do.core.helpers import within_precision 11 | 12 | from ..source import api, models 13 | 14 | test_file_directory = Path(dirname(abspath(__file__))) / "inference_files" 15 | 16 | 17 | def test_Inference(): 18 | 19 | files = sorted(list(filter(lambda x: x.suffix.lower() == ".yml", test_file_directory.iterdir()))) 20 | assert len(files) > 0, "Inference test files not found" 21 | 22 | for file in files: 23 | 24 | with file.open("r") as f: 25 | data = safe_load(f) 26 | 27 | m = models[data["graph_filename"]] 28 | 29 | for test in data["tests"]: 30 | 31 | head = parse_outcomes_and_interventions(test["head"]) 32 | body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set() 33 | 34 | expected = test["expect"] 35 | 36 | result = api.probability(Expression(head, body), m) 37 | assert within_precision(result, expected) 38 | -------------------------------------------------------------------------------- /archive/Deconfounding Sets.md: -------------------------------------------------------------------------------- 1 | Finding all deconfounding sets between two sets of vertices. 2 | 3 | STUB|deconfounding_sets 4 | 5 | ## Basic Example 6 | 7 | Assuming the basic 3-vertex graph from [[Backdoor Paths]], **G = (V, E)** where: 8 | - **V** = ``{x, y, z}`` 9 | - **E** = ``{(x, y), (z, x), (z, y)}`` 10 | 11 | ```python 12 | from do.API import Do 13 | 14 | # Assume this were a detailed model conforming to the above graph... 15 | model = dict() 16 | 17 | do_api = Do(model) 18 | 19 | dcf = do_api.deconfounding_sets({"x"}, {"y"}) 20 | 21 | for deconfounding_set in dcf: 22 | print(f"Deconfounding set for x->y!: {deconfounding_set}") 23 | ``` 24 | 25 | **Important**: 26 | - ``deconfounding_sets`` takes a *source* set of variables, and a *destination/target* set of variables. 27 | - A list of sets is returned, where each set consists of one possible set by which to block all deconfounding paths. 28 | 29 | ## Usage of Deconfounding Sets 30 | 31 | Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handle deconfounding. An easy check to verify each deconfounding set: 32 | 33 | ```python 34 | from do.API import Do 35 | 36 | # Assume this were a more complicated model 37 | model = dict() 38 | 39 | do_api = Do(model) 40 | 41 | dcf = do_api.deconfounding_sets({"x"}, {"y"}) 42 | 43 | for deconfounding_set in dcf: 44 | assert len(do_api.backdoor_paths({"x"}, {"y"}, deconfounding_set)) == 0 45 | ``` 46 | -------------------------------------------------------------------------------- /do/core/Exceptions.py: -------------------------------------------------------------------------------- 1 | class ProbabilityException(BaseException): 2 | """ 3 | A base Exception to catch all Probability-code-related Exceptions, 4 | but still crash on any other Exceptions as they should be caught" 5 | """ 6 | pass 7 | 8 | 9 | class ProbabilityIndeterminableException(ProbabilityException): 10 | """ 11 | A slightly more specialized Exception for indicating a failure to compute a probability, and inability to 12 | continue/no further options. This should never occur with a consistent model. 13 | """ 14 | pass 15 | 16 | 17 | class MissingTableRow(ProbabilityException): 18 | """ 19 | Raised when a row is missing from a table, but was expected. Can occur during probability queries. 20 | """ 21 | pass 22 | 23 | 24 | class InvalidOutcome(ProbabilityException): 25 | """ 26 | Raised when attempting to evaluate some query, where a given Outcome or Intervention has been assigned an outcome 27 | that is not possible for that respective variable. 28 | """ 29 | pass 30 | 31 | 32 | class IntersectingSets(ProbabilityException): 33 | """ 34 | Raised when attempting any backdoor-path related searches, where the source, destination, and/or optional deconfounding 35 | set of vertices intersect. 36 | """ 37 | pass 38 | 39 | 40 | class MissingVariable(ProbabilityException): 41 | pass 42 | 43 | 44 | class EmptyExpressionHead(ProbabilityException): 45 | pass 46 | 47 | 48 | class ExogenousNonRoot(ProbabilityException): 49 | pass 50 | -------------------------------------------------------------------------------- /models/pearl-7.5.yml: -------------------------------------------------------------------------------- 1 | endogenous: 2 | U1: 3 | outcomes: 4 | - u1 5 | - ~u1 6 | parents: [] 7 | table: [ 8 | [u1, 0.75], 9 | [~u1, 0.25] 10 | ] 11 | U2: 12 | outcomes: 13 | - u2 14 | - ~u2 15 | parents: [] 16 | table: [ 17 | [u2, 0.4], 18 | [~u2, 0.6] 19 | ] 20 | W: 21 | outcomes: 22 | - w 23 | - ~w 24 | parents: 25 | - U1 26 | - U2 27 | table: [ 28 | [w, u1, u2, 0.7], 29 | [w, u1, ~u2, 0.8], 30 | [w, ~u1, u2, 0.85], 31 | [w, ~u1, ~u2, 0.3], 32 | [~w, u1, u2, 0.3], 33 | [~w, u1, ~u2, 0.2], 34 | [~w, ~u1, u2, 0.15], 35 | [~w, ~u1, ~u2, 0.7] 36 | ] 37 | X: 38 | outcomes: 39 | - x 40 | - ~x 41 | parents: 42 | - Z 43 | - U2 44 | table: [ 45 | [x, z, u2, 0.35], 46 | [x, z, ~u2, 0.1], 47 | [x, ~z, u2, 0.5], 48 | [x, ~z, ~u2, 0.9], 49 | [~x, z, u2, 0.65], 50 | [~x, z, ~u2, 0.9], 51 | [~x, ~z, u2, 0.5], 52 | [~x, ~z, ~u2, 0.1] 53 | ] 54 | Y: 55 | outcomes: 56 | - y 57 | - ~y 58 | parents: 59 | - X 60 | - U1 61 | table: [ 62 | [y, x, u1, 0.2], 63 | [y, x, ~u1, 0.8], 64 | [y, ~x, u1, 0.5], 65 | [y, ~x, ~u1, 0.95], 66 | [~y, x, u1, 0.8], 67 | [~y, x, ~u1, 0.2], 68 | [~y, ~x, u1, 0.5], 69 | [~y, ~x, ~u1, 0.05] 70 | ] 71 | Z: 72 | outcomes: 73 | - z 74 | - ~z 75 | parents: 76 | - W 77 | table: [ 78 | [z, w, 0.6], 79 | [z, ~w, 0.4], 80 | [~z, w, 0.4], 81 | [~z, ~w, 0.6] 82 | ] 83 | name: 'Pearl: Figure 7.5' 84 | -------------------------------------------------------------------------------- /do/core/Types.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Union 2 | 3 | from .Variables import Variable, Outcome, Intervention 4 | 5 | # Variable-related 6 | 7 | VClass = Union[Variable, Outcome, Intervention] 8 | """ 9 | A VClass is any of a Variable, Outcome, or Intervention. 10 | 11 | Usage of VClass for type-hinting is useful when the variable need not have 12 | a known value / measurement. For example, VClass might be used in graph-related 13 | components in which the 'name' of the variable is the important feature, whether 14 | the variable be provided as any of the Variable, Outcome, or Intervention class types. 15 | """ 16 | 17 | 18 | VMeasured = Union[Outcome, Intervention] 19 | """ 20 | A VMeasured is either of an Outcome or Intervention. 21 | 22 | Usage of VMeasured is useful in querying distributions on a model; a variable on its 23 | own does not indicate much, but a discrete value represents a measurement/observation (Outcome) 24 | or a treatment (Intervention). 25 | """ 26 | 27 | 28 | # Graph-related 29 | 30 | Vertex = Union[VClass, str] 31 | """ 32 | A Vertex is any of a Variable, Outcome, Intervention, or string. 33 | 34 | It may happen that one has any of (or any combination of) the above, and some path-finding 35 | must be done. The Graph class itself stores only strings as vertices, and we can treat 36 | a string as the name of vertex, or the 'name' field of any of the Variable, Outcome, or Intervention 37 | as corresponding to a named vertex. 38 | """ 39 | 40 | 41 | Path = Sequence[str] 42 | """ 43 | A Path may be yielded in deconfounding (see: Backdoor/Frontdoor Criterions) and is represented 44 | as a sequence of string vertex labels. 45 | """ 46 | -------------------------------------------------------------------------------- /models/pearl-3.7c.yml: -------------------------------------------------------------------------------- 1 | endogenous: 2 | U1: 3 | outcomes: 4 | - u1 5 | - ~u1 6 | parents: [] 7 | table: [ 8 | [u1, 0.3], 9 | [~u1, 0.7] 10 | ] 11 | U2: 12 | outcomes: 13 | - u2 14 | - ~u2 15 | parents: [] 16 | table: [ 17 | [u2, 0.3], 18 | [~u2, 0.7] 19 | ] 20 | X: 21 | outcomes: 22 | - x 23 | - ~x 24 | parents: 25 | - U1 26 | table: [ 27 | [x, u1, 0.2], 28 | [x, ~u1, 0.1], 29 | [~x, u1, 0.8], 30 | [~x, ~u1, 0.9] 31 | ] 32 | Y: 33 | outcomes: 34 | - y 35 | - ~y 36 | parents: 37 | - Z1 38 | - Z2 39 | table: [ 40 | [y, z1, z2, 0.3], 41 | [y, z1, ~z2, 0.6], 42 | [y, ~z1, z2, 0.5], 43 | [y, ~z1, ~z2, 0.8], 44 | [~y, z1, z2, 0.7], 45 | [~y, z1, ~z2, 0.4], 46 | [~y, ~z1, z2, 0.5], 47 | [~y, ~z1, ~z2, 0.2] 48 | ] 49 | Z1: 50 | outcomes: 51 | - z1 52 | - ~z1 53 | parents: 54 | - U2 55 | - X 56 | table: [ 57 | [z1, u2, x, 0.3], 58 | [z1, u2, ~x, 0.6], 59 | [z1, ~u2, x, 0.5], 60 | [z1, ~u2, ~x, 0.8], 61 | [~z1, u2, x, 0.7], 62 | [~z1, u2, ~x, 0.4], 63 | [~z1, ~u2, x, 0.5], 64 | [~z1, ~u2, ~x, 0.2] 65 | ] 66 | Z2: 67 | outcomes: 68 | - z2 69 | - ~z2 70 | parents: 71 | - U1 72 | - U2 73 | table: [ 74 | [z2, u1, u2, 0.3], 75 | [z2, u1, ~u2, 0.6], 76 | [z2, ~u1, u2, 0.5], 77 | [z2, ~u1, ~u2, 0.8], 78 | [~z2, u1, u2, 0.7], 79 | [~z2, u1, ~u2, 0.4], 80 | [~z2, ~u1, u2, 0.5], 81 | [~z2, ~u1, ~u2, 0.2] 82 | ] 83 | name: 'Pearl: Figure 3.7c' 84 | -------------------------------------------------------------------------------- /archive/Do API.md: -------------------------------------------------------------------------------- 1 | Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project. 2 | 3 | This assumes the steps in the [[Installation]] section have been followed, and the project is set up. 4 | 5 | **Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package. 6 | 7 | ## Importing 8 | 9 | To import the package: 10 | 11 | ```python 12 | import do 13 | ``` 14 | 15 | **Important**: 16 | - The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``. 17 | 18 |
19 | 20 | To import *just* the API: 21 | 22 | ```python 23 | from do.API import Do 24 | ``` 25 | 26 | **Important**: 27 | - The API, represented as a Python class, is called **Do**. 28 | - **Do** is stored in the file ``API``, so it can be imported from ``do.API``. 29 | 30 | ## Further 31 | 32 | See any of the specific pages on API functions provided: 33 | * [[Do.\_\_init\_\_|\_\_init\_\_]] 34 | * [[Do.load_model|Loading a Model]] 35 | * [[Do.p|Probability Queries]] 36 | * [[Do.joint_distribution_table|Joint Distribution Table]] 37 | * [[Do.backdoor_paths|Backdoor Paths]] 38 | * [[Do.standard_paths|Standard Paths]] 39 | * [[Do.deconfounding_sets|Deconfounding Sets]] 40 | * [[Do.independent|Conditional Independence]] 41 | * [[Do.roots|Roots]] 42 | * [[Do.sinks|Sinks]] 43 | * [[Do.parents|Parents]] 44 | * [[Do.children|Children]] 45 | * [[Do.ancestors|Ancestors]] 46 | * [[Do.descendants|Descendants]] 47 | * [[Do.topology|Topology]] 48 | * [[Do.topology_position|Topology]] 49 | * [[Do.set_print_result|Output]] 50 | * [[Do.set_print_detail|Output]] 51 | * [[Do.set_logging|Output]] 52 | * [[Do.set_log_fd|Output]] 53 | * [[Exceptions]] 54 | -------------------------------------------------------------------------------- /wiki/PyPI.md: -------------------------------------------------------------------------------- 1 | Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/). 2 | 3 | ## PyPI Package 4 | 5 | The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/). 6 | 7 | To install from [PyPI](https://pypi.org/) as a package: 8 | 9 | ```shell 10 | pip install do-calculus 11 | ``` 12 | 13 | ## Upgrade 14 | 15 | To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag: 16 | 17 | ```shell 18 | pip install -U do-calculus 19 | ``` 20 | 21 | ## PyPI Release Cycle 22 | 23 | By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions). 24 | 25 | Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project. 26 | 27 | *Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days. 28 | 29 | See the [[API]] page for importing and using the package once installed. 30 | -------------------------------------------------------------------------------- /archive/PyPI.md: -------------------------------------------------------------------------------- 1 | Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/). 2 | 3 | ## PyPI Package 4 | 5 | The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/). 6 | 7 | To install from [PyPI](https://pypi.org/) as a package: 8 | 9 | ```shell 10 | pip install do-calculus 11 | ``` 12 | 13 | ## Upgrade 14 | 15 | To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag: 16 | 17 | ```shell 18 | pip install -U do-calculus 19 | ``` 20 | 21 | ## PyPI Release Cycle 22 | 23 | By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions). 24 | 25 | Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project. 26 | 27 | *Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days. 28 | 29 | See the [[API|Do API]] page for importing and using the package once installed. 30 | -------------------------------------------------------------------------------- /tests/identification/test_Identification.py: -------------------------------------------------------------------------------- 1 | from do.API import API 2 | from do.core.Expression import Expression 3 | from do.core.Variables import Intervention, Outcome 4 | from do.core.helpers import within_precision 5 | 6 | from ..source import models 7 | melanoma = models["melanoma.yml"] 8 | pearl34 = models["pearl-3.4.yml"] 9 | 10 | api = API() 11 | 12 | ################################################################################## 13 | """ 14 | Some tests comparing the resulting value computed by ID as being the same as the 15 | value computed by the standard inference and deconfounding modules. Some queries 16 | don't require deconfounding, and ID should correctly handle these; some queries 17 | require substantial deconfounding. 18 | """ 19 | 20 | def test_NoDeconfounding_Pearl34(): 21 | assert within_precision(api.probability(Expression(Outcome("Xj", "xj")), pearl34), api.identification({Outcome("Xj", "xj")}, [], pearl34, False)) 22 | 23 | 24 | def test_NoDeconfounding_Melanoma(): 25 | assert within_precision(api.probability(Expression(Outcome("Y", "y")), melanoma), api.identification({Outcome("Y", "y")}, [], melanoma, False)) 26 | 27 | 28 | def test_p34(): 29 | assert within_precision(api.identification({Outcome("Xj", "xj")}, {Intervention("Xi", "xi")}, pearl34, False), api.treat(Expression(Outcome("Xj", "xj")), [Intervention("Xi", "xi")], pearl34)) 30 | 31 | 32 | def test_melanoma(): 33 | assert within_precision(api.identification({Outcome("Y", "y")}, {Intervention("X", "x")}, melanoma, False), api.treat(Expression(Outcome("Y", "y")), [Intervention("X", "x")], melanoma)) 34 | 35 | def test_proof(): 36 | print(api.proof({Outcome("Y", "y")}, {Intervention("X", "x")}, melanoma)) 37 | 38 | ################################################################################## 39 | -------------------------------------------------------------------------------- /examples/3-latent/3.4-latent.yml: -------------------------------------------------------------------------------- 1 | name: 'Pearl: Figure 3.4' 2 | endogenous: 3 | X1: 4 | outcomes: 5 | - x1 6 | - ~x1 7 | parents: [] 8 | table: [ 9 | [x1, 0.4], 10 | [~x1, 0.6] 11 | ] 12 | X2: 13 | outcomes: 14 | - x2 15 | - ~x2 16 | parents: [] 17 | table: [ 18 | [x2, 0.15], 19 | [~x2, 0.85] 20 | ] 21 | X3: 22 | outcomes: 23 | - x3 24 | - ~x3 25 | table: [ 26 | [x3, 0.1], 27 | [~x3, 0.9], 28 | ] 29 | X4: 30 | outcomes: 31 | - x4 32 | - ~x4 33 | parents: 34 | - Xi 35 | table: [ 36 | [x4, xi, 0.9], 37 | [x4, ~xi, 0.25], 38 | [~x4, xi, 0.1], 39 | [~x4, ~xi, 0.75] 40 | ] 41 | Xi: 42 | outcomes: 43 | - xi 44 | - ~xi 45 | parents: 46 | - X1 47 | - X2 48 | table: [ 49 | [xi, x1, x2, 0.5], 50 | [xi, x1, ~x2, 0.65], 51 | [xi, ~x1, x2, 0.1], 52 | [xi, ~x1, ~x2, 0.25], 53 | [~xi, x1, x2, 0.5], 54 | [~xi, x1, ~x2, 0.35], 55 | [~xi, ~x1, x2, 0.9], 56 | [~xi, ~x1, ~x2, 0.75] 57 | ] 58 | Xj: 59 | outcomes: 60 | - xj 61 | - ~xj 62 | parents: 63 | - X2 64 | - X3 65 | - X4 66 | table: [ 67 | [xj, x2, x3, x4, 0.0], 68 | [xj, x2, x3, ~x4, 0.25], 69 | [xj, x2, ~x3, x4, 0.7], 70 | [xj, x2, ~x3, ~x4, 0.45], 71 | [xj, ~x2, x3, x4, 0.15], 72 | [xj, ~x2, x3, ~x4, 0.8], 73 | [xj, ~x2, ~x3, x4, 0.95], 74 | [xj, ~x2, ~x3, ~x4, 0.05], 75 | [~xj, x2, x3, x4, 1.0], 76 | [~xj, x2, x3, ~x4, 0.75], 77 | [~xj, x2, ~x3, x4, 0.3], 78 | [~xj, x2, ~x3, ~x4, 0.55], 79 | [~xj, ~x2, x3, x4, 0.85], 80 | [~xj, ~x2, x3, ~x4, 0.2], 81 | [~xj, ~x2, ~x3, x4, 0.05], 82 | [~xj, ~x2, ~x3, ~x4, 0.95] 83 | ] 84 | exogenous: 85 | U1: 86 | - X1 87 | - X2 88 | U2: 89 | - X2 90 | - X3 91 | -------------------------------------------------------------------------------- /do/core/Expression.py: -------------------------------------------------------------------------------- 1 | from typing import Collection, Optional, Union 2 | 3 | from .Exceptions import EmptyExpressionHead 4 | from .Types import Outcome 5 | 6 | 7 | class Expression: 8 | 9 | def __init__(self, head: Union[Outcome, Collection[Outcome]], body: Optional[Collection[Outcome]] = None): 10 | if head is None: 11 | raise EmptyExpressionHead 12 | 13 | if body is None: 14 | body = [] 15 | 16 | self._head = set(head) if not isinstance(head, Outcome) else {head} 17 | self._body = set(body) if not isinstance(body, Outcome) else {body} 18 | 19 | def __str__(self) -> str: 20 | if len(self._body) == 0: 21 | return f'P({", ".join(map(str, self._head))})' 22 | 23 | return f'P({", ".join(map(str, self._head))} | {", ".join(map(str, self._body))})' 24 | 25 | # getters 26 | def head_contains(self, outcome: Outcome) -> bool: 27 | return outcome in self._head 28 | 29 | def body_contains(self, outcome: Outcome) -> bool: 30 | return outcome in self._body 31 | 32 | def head(self) -> Collection[Outcome]: 33 | return self._head.copy() 34 | 35 | def body(self) -> Collection[Outcome]: 36 | return self._body.copy() 37 | 38 | # setters 39 | def add_to_head(self, outcome: Outcome) -> bool: 40 | if outcome in self._head: 41 | return False 42 | self._head.add(outcome) 43 | return True 44 | 45 | def add_to_body(self, outcome: Outcome) -> bool: 46 | if outcome in self._body: 47 | return False 48 | self._body.add(outcome) 49 | return True 50 | 51 | def remove_from_head(self, outcome: Outcome) -> bool: 52 | if outcome not in self._head: 53 | return False 54 | self._head.remove(outcome) 55 | return True 56 | 57 | def remove_from_body(self, outcome: Outcome) -> bool: 58 | if outcome not in self._body: 59 | return False 60 | self._body.remove(outcome) 61 | return True 62 | -------------------------------------------------------------------------------- /tests/deconfounding/test_Backdoor.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, abspath 2 | from pathlib import Path 3 | from typing import Collection, Mapping 4 | from yaml import safe_load 5 | 6 | from do.core.Expression import Expression 7 | from do.core.Model import Model 8 | from do.core.Variables import Intervention, parse_outcomes_and_interventions 9 | from do.core.helpers import within_precision 10 | 11 | from ..source import api, models 12 | 13 | test_file_directory = Path(dirname(abspath(__file__))) / "backdoor_files" 14 | 15 | 16 | def deconfounding_validation(model: Model, tests: Collection[Mapping]): 17 | 18 | for test in tests: 19 | 20 | expect = test["expect"] 21 | v = test["type"] 22 | 23 | if v == "backdoors": 24 | src = test["src"] 25 | dst = test["dst"] 26 | dcf = test["dcf"] if "dcf" in test else [] 27 | result = api.backdoors(src, dst, model.graph(), dcf) 28 | assert all(x in expect for x in result) 29 | if test["exhaustive"]: 30 | assert len(result) == len(expect) 31 | 32 | elif v == "treatment": 33 | head = parse_outcomes_and_interventions(test["head"]) 34 | body = parse_outcomes_and_interventions(test["body"]) 35 | 36 | o = list(filter(lambda x: not isinstance(x, Intervention), body)) 37 | i = list(filter(lambda x: isinstance(x, Intervention), body)) 38 | 39 | assert within_precision(api.treat(Expression(head, o), i, model), expect) 40 | 41 | else: 42 | raise Exception("unexpected test type") 43 | 44 | 45 | def test_Deconfounding(): 46 | 47 | files = sorted(list(filter(lambda x: x.suffix.lower() == ".yml", test_file_directory.iterdir()))) 48 | assert len(files) > 0, f"Found no backdoor module tests" 49 | 50 | for file in files: 51 | 52 | with file.open("r") as f: 53 | data = safe_load(f) 54 | 55 | model = models[data["graph_filename"]] 56 | 57 | deconfounding_validation(model, data["tests"]) 58 | -------------------------------------------------------------------------------- /do/core/helpers.py: -------------------------------------------------------------------------------- 1 | from itertools import chain, combinations 2 | from typing import Iterator 3 | 4 | 5 | def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]: 6 | """ 7 | Quick helper that creates a chain of tuples, which will be the power set of the given list or set 8 | @param variable_list: Any arbitrary list or set 9 | @param allow_empty_set: Whether or not to consider the empty set {} as a valid set 10 | @return: A chain object of tuples; power set of variable_list 11 | """ 12 | p_set = list(variable_list) 13 | base = 0 if allow_empty_set else 1 14 | return chain.from_iterable(combinations(p_set, r) for r in range(base, len(p_set)+1)) 15 | 16 | 17 | def minimal_sets(*sets) -> list: 18 | """ 19 | Take a set of sets, and return only the minimal sets 20 | @param sets: An arbitrary number of sets, each set containing strings 21 | @return: A list of minimal sets; that is, all sets such that there is no superset 22 | """ 23 | sorted_sets = sorted(map(set, list(sets)), key=len) 24 | minimal_subsets = [] 25 | for s in sorted_sets: 26 | if not any(minimal_subset.issubset(s) for minimal_subset in minimal_subsets): 27 | minimal_subsets.append(s) 28 | return minimal_subsets 29 | 30 | 31 | def disjoint(*sets) -> bool: 32 | """ 33 | Check whether or not an arbitrary number of sets are completely disjoint; that is, no element in any set exists in 34 | any other given set. 35 | @param sets: Any number of sets. 36 | @return: True if all sets are disjoint, False otherwise 37 | """ 38 | return len(set().union(*sets)) == sum(map(lambda iterable: len(iterable), sets)) 39 | 40 | 41 | def within_precision(a: float, b: float) -> bool: 42 | """ 43 | Check whether two values differ by an amount less than some number of digits of precision 44 | @param a: The first value 45 | @param b: The second value 46 | @return: True if the values are within the margin of error acceptable, False otherwise 47 | """ 48 | return abs(a - b) < 1 / (10 ** 5) 49 | -------------------------------------------------------------------------------- /do/identification/PExpression.py: -------------------------------------------------------------------------------- 1 | from typing import Collection, List, Sequence, Tuple 2 | 3 | 4 | class PExpression: 5 | 6 | def __init__(self, sigma: Collection[str], terms: list = None, proof: List[Tuple[int, List[str]]] = None): 7 | self.sigma = list(sigma) 8 | self.terms = list(terms) if terms else [] 9 | self.internal_proof = proof if proof else [] 10 | 11 | def __str__(self): 12 | 13 | buf = "" 14 | if self.sigma: 15 | buf += "<Σ {" + ", ".join(self.sigma) + "} " 16 | 17 | # Put Distributions (or PExprs with empty summations) first 18 | for el in filter(lambda x: isinstance(x, TemplateExpression) or isinstance(x, PExpression) and len(x.sigma) == 0, self.terms): 19 | buf += str(el) 20 | 21 | # Put PExprs after 22 | for el in filter(lambda x: isinstance(x, PExpression) and len(x.sigma) > 0, self.terms): 23 | buf += str(el) 24 | 25 | if self.sigma: 26 | buf += '>' 27 | return buf 28 | 29 | def copy(self): 30 | copied_proof = [(i, [s for s in block]) for i, block in self.internal_proof] 31 | return PExpression(self.sigma.copy(), [x.copy() for x in self.terms], copied_proof) 32 | 33 | def proof(self) -> str: 34 | s = "" 35 | for j, block in self.internal_proof: 36 | indent = " " * 3 * j 37 | for line in block: 38 | s += indent + line + '\n' 39 | s += '\n' 40 | 41 | for child in filter(lambda x: isinstance(x, PExpression), self.terms): 42 | s += child.proof() 43 | return s 44 | 45 | 46 | class TemplateExpression: 47 | 48 | 49 | def __init__(self, head: str, given: Sequence[str]) -> None: 50 | self.head = head 51 | self.given = given 52 | 53 | def copy(self): 54 | return TemplateExpression(self.head, self.given.copy()) 55 | 56 | def __str__(self) -> str: 57 | if len(self.given) == 0: 58 | return f"[{self.head}]" 59 | return f"[{self.head}|{','.join(self.given)}]" 60 | -------------------------------------------------------------------------------- /wiki/GitHub.md: -------------------------------------------------------------------------------- 1 | Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki). 2 | 3 | ## Acquiring a Copy 4 | 5 | To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli). 6 | 7 | After a copy has been acquired, [install the extra dependencies](#extra-dependencies). 8 | 9 | ## Clone 10 | 11 | In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed. 12 | 13 | You can clone the repository using either the **HTTPS** or **SSH** URL. If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**. 14 | 15 | To clone with the **HTTPS** URL: 16 | 17 | ```shell 18 | git clone https://github.com/bradendubois/do-calculus.git 19 | ``` 20 | 21 | To clone with the **SSH** URL: 22 | ```shell 23 | git clone git@github.com:bradendubois/do-calculus.git 24 | ``` 25 | 26 | ## Release 27 | 28 | The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. 29 | 30 | **Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases) 31 | 32 | Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release). 33 | 34 | ## CLI 35 | 36 | To clone with the [GitHub CLI](https://cli.github.com/). 37 | 38 | ```shell 39 | gh repo clone bradendubois/do-calculus 40 | ``` 41 | 42 | ## Extra Dependencies 43 | 44 | After acquiring a copy from any of the above steps: 45 | 46 | ```shell 47 | pip install -r requirements.txt 48 | ``` 49 | 50 | The above command will install all dependencies listed in ``requirements.txt``. 51 | 52 | ## Further 53 | 54 | An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]]. 55 | -------------------------------------------------------------------------------- /archive/GitHub.md: -------------------------------------------------------------------------------- 1 | Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki). 2 | 3 | ## Acquiring a Copy 4 | 5 | To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli). 6 | 7 | After a copy has been acquired, [install the extra dependencies](#extra-dependencies). 8 | 9 | ## Clone 10 | 11 | In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed. 12 | 13 | You can clone the repository using either the **HTTPS** or **SSH** URL. If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**. 14 | 15 | To clone with the **HTTPS** URL: 16 | 17 | ```shell 18 | git clone https://github.com/bradendubois/do-calculus.git 19 | ``` 20 | 21 | To clone with the **SSH** URL: 22 | ```shell 23 | git clone git@github.com:bradendubois/do-calculus.git 24 | ``` 25 | 26 | ## Release 27 | 28 | The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. 29 | 30 | **Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases) 31 | 32 | Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release). 33 | 34 | ## CLI 35 | 36 | To clone with the [GitHub CLI](https://cli.github.com/). 37 | 38 | ```shell 39 | gh repo clone bradendubois/do-calculus 40 | ``` 41 | 42 | ## Extra Dependencies 43 | 44 | After acquiring a copy from any of the above steps: 45 | 46 | ```shell 47 | pip install -r requirements.txt 48 | ``` 49 | 50 | The above command will install all dependencies listed in ``requirements.txt``. 51 | 52 | ## Further 53 | 54 | An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]]. 55 | -------------------------------------------------------------------------------- /archive/Backdoor Paths.md: -------------------------------------------------------------------------------- 1 | How to discover backdoor paths between two sets of variables in a given [[Markovian model|Markovian Models]]. 2 | 3 | STUB|backdoor_paths 4 | 5 | ## Basic Backdoor Paths 6 | 7 | Assume the following model uses the graph **G = (V, E)**, where: 8 | - **V** = ``{x, y, z}`` 9 | - **E** = ``{(x, y), (z, x), (z, y)}`` 10 | 11 | ```python 12 | from do.API import Do 13 | 14 | # Assume this were a detailed model conforming to the above graph... 15 | model = dict() 16 | 17 | do_api = Do(model) 18 | 19 | backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}) 20 | 21 | for path in backdoor_paths: 22 | print(f"Backdoor path from x->y!: {path}") 23 | ``` 24 | 25 | ``backdoor_paths`` returns a collection of paths, in which each path consists of the vertices (end-points included) connecting some vertex in the ``src`` collection to some vertex in the ``dst`` collection. 26 | - In this example, the return value would be ``[["x", "z", "y"]]``, as this denotes the singular backdoor path ``x <- z -> y``. 27 | 28 | **Important** 29 | - The first parameter is the collection of source variables from which the pathfinding begins. 30 | - The second parameter is the collection of destination variables to which the pathfinding attempts to reach. 31 | - Each path, a backdoor path, is ordered such that the path order is correctly maintained. 32 | 33 | ## Blocking Backdoor Paths 34 | 35 | Assuming the same graph as defined [above](#basic-backdoor-paths)... 36 | 37 | ```python 38 | from do.API import Do 39 | 40 | # Assume this were a detailed model conforming to the above graph... 41 | model = dict() 42 | 43 | do_api = Do(model) 44 | 45 | backdoor_paths = do_api.backdoor_paths({"x"}, {"y"}) 46 | 47 | for path in backdoor_paths: 48 | print(f"Backdoor path from x->y!: {path}") 49 | 50 | blocked = do_api.backdoor_paths({"x"}, {"y"}, {"z"}) 51 | 52 | assert len(blocked) == 0 53 | ``` 54 | 55 | **Important** 56 | - A third parameter is a collection of *deconfounding* variables by which to "block" backdoor paths. 57 | - To represent that there are no deconfounding variables, an *empty* collection of vertices can be given, explicitly set as ``None``, or *omitted entirely*. 58 | - If all backdoor paths are successfully blocked, an **empty list** is returned. 59 | -------------------------------------------------------------------------------- /models/fumigants_eelworms.yml: -------------------------------------------------------------------------------- 1 | name: Fumigants / Eelworms 2 | endogenous: 3 | B: 4 | outcomes: 5 | - b 6 | - ~b 7 | parents: 8 | - Z0 9 | table: [ 10 | [b, z0, 0.1], 11 | [b, ~z0, 0.85], 12 | [~b, z0, 0.9], 13 | [~b, ~z0, 0.15] 14 | ] 15 | X: 16 | outcomes: 17 | - x 18 | - ~x 19 | parents: 20 | - Z0 21 | table: [ 22 | [x, z0, 0.45], 23 | [x, ~z0, 0.9], 24 | [~x, z0, 0.55], 25 | [~x, ~z0, 0.1]] 26 | Y: 27 | outcomes: 28 | - y 29 | - ~y 30 | parents: 31 | - X 32 | - Z2 33 | - Z3 34 | table: [ 35 | [y, x, z2, z3, 0.7], 36 | [y, x, z2, ~z3, 0.65], 37 | [y, x, ~z2, z3, 0.4], 38 | [y, x, ~z2, ~z3, 0.9], 39 | [y, ~x, z2, z3, 0.02], 40 | [y, ~x, z2, ~z3, 0.15], 41 | [y, ~x, ~z2, z3, 0.22], 42 | [y, ~x, ~z2, ~z3, 0.56], 43 | [~y, x, z2, z3, 0.3], 44 | [~y, x, z2, ~z3, 0.35], 45 | [~y, x, ~z2, z3, 0.6], 46 | [~y, x, ~z2, ~z3, 0.1], 47 | [~y, ~x, z2, z3, 0.98], 48 | [~y, ~x, z2, ~z3, 0.85], 49 | [~y, ~x, ~z2, z3, 0.78], 50 | [~y, ~x, ~z2, ~z3, 0.44]] 51 | Z0: 52 | outcomes: 53 | - z0 54 | - ~z0 55 | parents: [] 56 | table: [ 57 | [z0, 0.75], 58 | [~z0, 0.25] 59 | ] 60 | Z1: 61 | outcomes: 62 | - z1 63 | - ~z1 64 | parents: 65 | - Z0 66 | table: [ 67 | [z1, z0, 0.2], 68 | [z1, ~z0, 0.24], 69 | [~z1, z0, 0.8], 70 | [~z1, ~z0, 0.76] 71 | ] 72 | Z2: 73 | outcomes: 74 | - z2 75 | - ~z2 76 | parents: 77 | - X 78 | - Z1 79 | table: [ 80 | [z2, x, z1, 0.8], 81 | [z2, x, ~z1, 0.6], 82 | [z2, ~x, z1, 0.5], 83 | [z2, ~x, ~z1, 0.7], 84 | [~z2, x, z1, 0.2], 85 | [~z2, x, ~z1, 0.4], 86 | [~z2, ~x, z1, 0.5], 87 | [~z2, ~x, ~z1, 0.3] 88 | ] 89 | Z3: 90 | outcomes: 91 | - z3 92 | - ~z3 93 | parents: 94 | - B 95 | - Z2 96 | table: [ 97 | [z3, b, z2, 0.75], 98 | [z3, b, ~z2, 0.65], 99 | [z3, ~b, z2, 0.4], 100 | [z3, ~b, ~z2, 0.9], 101 | [~z3, b, z2, 0.25], 102 | [~z3, b, ~z2, 0.35], 103 | [~z3, ~b, z2, 0.6], 104 | [~z3, ~b, ~z2, 0.1] 105 | ] 106 | -------------------------------------------------------------------------------- /archive/Configuration.md: -------------------------------------------------------------------------------- 1 | Settings for the project are stored in ``config.yml`` in the same directory as the Python file that imports ``Do``. 2 | - **Note**: This file will be created if it does not exist, when the project is run. 3 | 4 | ## Output Control 5 | 6 | Control what information is output; the computational steps of queries or regression tests, on launch, whether to minimize acceptable sets Z in backdoor paths. 7 | 8 | #### Output Levels of Precision 9 | 10 | How many digits of precision to output a result to. 11 | 12 | | Setting Name | Options | Default Value | 13 | |:-:|:-:|:-:| 14 | | ``output_levels_of_precision`` | any positive integer | 5 | 15 | 16 | #### Minimize Backdoor Sets 17 | 18 | If enabled, when sets X and Y are given, and all feasible sets Z to ensure causal independence are created, only minimal sets will be shown. 19 | 20 | | Setting Name | Options | Default Value | 21 | |:-:|:-:|:-:| 22 | | ``minimize_backdoor_sets`` | [True, False] | True | 23 | 24 | ## Accuracy / Formatting / Precision Rules 25 | 26 | Regards settings on the accuracy/settings of regression tests, computation caching, and noisein function evaluations. 27 | 28 | #### Cache Computation Results 29 | 30 | If enabled, any time a specific query is computed, its results will be cached; if the same query is required in any subsequent queries, its cached result will be reused instead of computing the same result from scratch. This can yield a large performance increase in larger causal graphs. 31 | 32 | | Setting Name | Options | Default Value | 33 | |:-:|:-:|:-:| 34 | | ``cache_computation_results`` | [True, False] | True | 35 | 36 | #### Topological Sort Variables 37 | 38 | If enabled, to avoid Bayes rule as much as possible, the head and body of queries can be topologically sorted. 39 | 40 | | Setting Name | Options | Default Value | 41 | |:-:|:-:|:-:| 42 | | ``topological_sort_variables`` | [True, False] | True | 43 | 44 | #### Regression Test Result Precision 45 | 46 | In a regression test (see: ``Regression Tests``) where an 'expected value' is provided, this is how many digits of precision the computed value must meet within. Higher requires more accuracy, but also a longer/more detailed hand-computed 'expected result'. 47 | 48 | | Setting Name | Options | Default Value | 49 | |:-:|:-:|:-:| 50 | | ``regression_levels_of_precision`` | any positive integer | 5 | 51 | 52 | -------------------------------------------------------------------------------- /archive/Output.md: -------------------------------------------------------------------------------- 1 | Control over the output that is printed to standard output from usage of the [[API|Do API]]. 2 | 3 | Here, we will make clear *two* categorizations of output: 4 | 1. **Result**: the final result returned from some computation 5 | 2. **Detail**: any intermediate information involved in some computation 6 | 7 | ## Print Result 8 | 9 | Set whether to print the result of an API call to standard output. 10 | 11 | STUB|set_print_result 12 | 13 | ### Example 14 | 15 | ```python 16 | from do.API import Do 17 | 18 | do_api = Do("models/model1.yml") 19 | 20 | do_api.set_print_result(True) 21 | 22 | # queries here... 23 | ``` 24 | 25 |
26 | 27 | ## Print Detail 28 | 29 | Set whether to print the detail of an API call to standard output. 30 | 31 | STUB|set_print_detail 32 | 33 | ### Example 34 | 35 | ```python 36 | from do.API import Do 37 | 38 | do_api = Do("models/model1.yml") 39 | do_api.set_print_detail(True) 40 | 41 | # queries here... 42 | ``` 43 | 44 |
45 | 46 | ## Set Logging 47 | 48 | Set whether to log results and details to some file descriptor. 49 | 50 | Requires a file descriptor to have been set when [[instantiating the API|Loading a Model]], or [explicitly set](#set-log-fd). 51 | 52 | STUB|set_logging 53 | 54 | ### Example 55 | 56 | ```python 57 | from pathlib import Path 58 | from do.API import Do 59 | 60 | file = Path("output/model1-output") 61 | f = file.open("w") 62 | 63 | do_api = Do("models/model1.yml", log_fd=f) 64 | 65 | do_api.set_logging(True) 66 | 67 | # queries here... 68 | 69 | f.close() 70 | ``` 71 | 72 | **Important** 73 | - If logging is enabled, What is written to the file descriptor set will be all results and details will be written to the file, regardless of settings for whether to *print* results and/or details. 74 | 75 |
76 | 77 | ## Set Log FD 78 | 79 | Set an open file descriptor as the file descriptor to write to. 80 | 81 | STUB|set_log_fd 82 | 83 | ### Example 84 | 85 | ```python 86 | from pathlib import Path 87 | from do.API import Do 88 | 89 | do_api = Do("models/model1.yml") 90 | 91 | file = Path("output/model1-output") 92 | f = file.open("w") 93 | 94 | do_api.set_log_fd(f) 95 | 96 | # queries here... 97 | 98 | f.close() 99 | ``` 100 | 101 | **Important** 102 | - For this, *any* open file descriptor can be given, as long as the file descriptor object given *has write permission*, and supports a ``.write()`` method that **takes a string as input**. 103 | -------------------------------------------------------------------------------- /tests/core/test_Expression.py: -------------------------------------------------------------------------------- 1 | from pytest import raises 2 | 3 | from do.core.Exceptions import EmptyExpressionHead 4 | from do.core.Expression import Expression 5 | from do.core.Variables import Outcome 6 | 7 | 8 | def test_SequenceSequence(): 9 | x = Expression([Outcome("Y", "y"), Outcome("X", "x")], [Outcome("Z", "z")]) 10 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 11 | 12 | def test_SequenceSet(): 13 | x = Expression([Outcome("Y", "y"), Outcome("X", "x")], {Outcome("Z", "z")}) 14 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 15 | 16 | def test_SequenceSingle(): 17 | x = Expression([Outcome("Y", "y"), Outcome("X", "x")], Outcome("Z", "z")) 18 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 19 | 20 | def test_SequenceNone(): 21 | x = Expression([Outcome("Y", "y"), Outcome("X", "x")]) 22 | assert str(x) == "P(Y = y, X = x)" or str(x) == "P(X = x, Y = y)" 23 | 24 | 25 | def test_SetSequence(): 26 | x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, [Outcome("Z", "z")]) 27 | print(str(x)) 28 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 29 | 30 | def test_SetSet(): 31 | x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, {Outcome("Z", "z")}) 32 | print(str(x)) 33 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 34 | 35 | def test_SetSingle(): 36 | x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, Outcome("Z", "z")) 37 | assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)" 38 | 39 | def test_SetNone(): 40 | x = Expression({Outcome("Y", "y"), Outcome("X", "x")}) 41 | assert str(x) == "P(Y = y, X = x)" or str(x) == "P(X = x, Y = y)" 42 | 43 | 44 | def test_SingleSequence(): 45 | x = Expression(Outcome("Y", "y"), [Outcome("Z", "z")]) 46 | assert str(x) == "P(Y = y | Z = z)" 47 | 48 | def test_SingleSet(): 49 | x = Expression(Outcome("Y", "y"), {Outcome("Z", "z")}) 50 | assert str(x) == "P(Y = y | Z = z)" 51 | 52 | def test_SingleSingle(): 53 | x = Expression(Outcome("Y", "y"), Outcome("Z", "z")) 54 | assert str(x) == "P(Y = y | Z = z)" 55 | 56 | def test_SingleNone(): 57 | x = Expression(Outcome("Y", "y")) 58 | assert str(x) == "P(Y = y)" 59 | 60 | 61 | def test_NoneSequence(): 62 | with raises(EmptyExpressionHead): 63 | Expression(None, [Outcome("Z", "z")]) 64 | 65 | def test_NoneNone(): 66 | with raises(EmptyExpressionHead): 67 | Expression(None) 68 | -------------------------------------------------------------------------------- /models/pearl-3.4.yml: -------------------------------------------------------------------------------- 1 | name: 'Pearl: Figure 3.4' 2 | endogenous: 3 | X1: 4 | outcomes: 5 | - x1 6 | - ~x1 7 | parents: [] 8 | table: [ 9 | [x1, 0.4], 10 | [~x1, 0.6] 11 | ] 12 | X2: 13 | outcomes: 14 | - x2 15 | - ~x2 16 | parents: [] 17 | table: [ 18 | [x2, 0.15], 19 | [~x2, 0.85] 20 | ] 21 | X3: 22 | outcomes: 23 | - x3 24 | - ~x3 25 | parents: 26 | - X1 27 | table: [ 28 | [x3, x1, 0.1], 29 | [x3, ~x1, 0.3], 30 | [~x3, x1, 0.9], 31 | [~x3, ~x1, 0.7] 32 | ] 33 | X4: 34 | outcomes: 35 | - x4 36 | - ~x4 37 | parents: 38 | - X1 39 | - X2 40 | table: [ 41 | [x4, x1, x2, 0.7], 42 | [x4, x1, ~x2, 0.9], 43 | [x4, ~x1, x2, 0.55], 44 | [x4, ~x1, ~x2, 0.15], 45 | [~x4, x1, x2, 0.3], 46 | [~x4, x1, ~x2, 0.1], 47 | [~x4, ~x1, x2, 0.45], 48 | [~x4, ~x1, ~x2, 0.85] 49 | ] 50 | X5: 51 | outcomes: 52 | - x5 53 | - ~x5 54 | parents: 55 | - X2 56 | table: [ 57 | [x5, x2, 0.8], 58 | [x5, ~x2, 0.25], 59 | [~x5, x2, 0.2], 60 | [~x5, ~x2, 0.75] 61 | ] 62 | X6: 63 | outcomes: 64 | - x6 65 | - ~x6 66 | parents: 67 | - Xi 68 | table: [ 69 | [x6, xi, 0.9], 70 | [x6, ~xi, 0.25], 71 | [~x6, xi, 0.1], 72 | [~x6, ~xi, 0.75] 73 | ] 74 | Xi: 75 | outcomes: 76 | - xi 77 | - ~xi 78 | parents: 79 | - X3 80 | - X4 81 | table: [ 82 | [xi, x3, x4, 0.5], 83 | [xi, x3, ~x4, 0.65], 84 | [xi, ~x3, x4, 0.1], 85 | [xi, ~x3, ~x4, 0.25], 86 | [~xi, x3, x4, 0.5], 87 | [~xi, x3, ~x4, 0.35], 88 | [~xi, ~x3, x4, 0.9], 89 | [~xi, ~x3, ~x4, 0.75] 90 | ] 91 | Xj: 92 | outcomes: 93 | - xj 94 | - ~xj 95 | parents: 96 | - X6 97 | - X4 98 | - X5 99 | table: [ 100 | [xj, x6, x4, x5, 0.0], 101 | [xj, x6, x4, ~x5, 0.25], 102 | [xj, x6, ~x4, x5, 0.7], 103 | [xj, x6, ~x4, ~x5, 0.45], 104 | [xj, ~x6, x4, x5, 0.15], 105 | [xj, ~x6, x4, ~x5, 0.8], 106 | [xj, ~x6, ~x4, x5, 0.95], 107 | [xj, ~x6, ~x4, ~x5, 0.05], 108 | [~xj, x6, x4, x5, 1.0], 109 | [~xj, x6, x4, ~x5, 0.75], 110 | [~xj, x6, ~x4, x5, 0.3], 111 | [~xj, x6, ~x4, ~x5, 0.55], 112 | [~xj, ~x6, x4, x5, 0.85], 113 | [~xj, ~x6, x4, ~x5, 0.2], 114 | [~xj, ~x6, ~x4, x5, 0.05], 115 | [~xj, ~x6, ~x4, ~x5, 0.95] 116 | ] 117 | -------------------------------------------------------------------------------- /examples/1-basic-backdoor/pearl-3.4.yml: -------------------------------------------------------------------------------- 1 | name: 'Pearl: Figure 3.4' 2 | endogenous: 3 | X1: 4 | outcomes: 5 | - x1 6 | - ~x1 7 | parents: [] 8 | table: [ 9 | [x1, 0.4], 10 | [~x1, 0.6] 11 | ] 12 | X2: 13 | outcomes: 14 | - x2 15 | - ~x2 16 | parents: [] 17 | table: [ 18 | [x2, 0.15], 19 | [~x2, 0.85] 20 | ] 21 | X3: 22 | outcomes: 23 | - x3 24 | - ~x3 25 | parents: 26 | - X1 27 | table: [ 28 | [x3, x1, 0.1], 29 | [x3, ~x1, 0.3], 30 | [~x3, x1, 0.9], 31 | [~x3, ~x1, 0.7] 32 | ] 33 | X4: 34 | outcomes: 35 | - x4 36 | - ~x4 37 | parents: 38 | - X1 39 | - X2 40 | table: [ 41 | [x4, x1, x2, 0.7], 42 | [x4, x1, ~x2, 0.9], 43 | [x4, ~x1, x2, 0.55], 44 | [x4, ~x1, ~x2, 0.15], 45 | [~x4, x1, x2, 0.3], 46 | [~x4, x1, ~x2, 0.1], 47 | [~x4, ~x1, x2, 0.45], 48 | [~x4, ~x1, ~x2, 0.85] 49 | ] 50 | X5: 51 | outcomes: 52 | - x5 53 | - ~x5 54 | parents: 55 | - X2 56 | table: [ 57 | [x5, x2, 0.8], 58 | [x5, ~x2, 0.25], 59 | [~x5, x2, 0.2], 60 | [~x5, ~x2, 0.75] 61 | ] 62 | X6: 63 | outcomes: 64 | - x6 65 | - ~x6 66 | parents: 67 | - Xi 68 | table: [ 69 | [x6, xi, 0.9], 70 | [x6, ~xi, 0.25], 71 | [~x6, xi, 0.1], 72 | [~x6, ~xi, 0.75] 73 | ] 74 | Xi: 75 | outcomes: 76 | - xi 77 | - ~xi 78 | parents: 79 | - X3 80 | - X4 81 | table: [ 82 | [xi, x3, x4, 0.5], 83 | [xi, x3, ~x4, 0.65], 84 | [xi, ~x3, x4, 0.1], 85 | [xi, ~x3, ~x4, 0.25], 86 | [~xi, x3, x4, 0.5], 87 | [~xi, x3, ~x4, 0.35], 88 | [~xi, ~x3, x4, 0.9], 89 | [~xi, ~x3, ~x4, 0.75] 90 | ] 91 | Xj: 92 | outcomes: 93 | - xj 94 | - ~xj 95 | parents: 96 | - X6 97 | - X4 98 | - X5 99 | table: [ 100 | [xj, x6, x4, x5, 0.0], 101 | [xj, x6, x4, ~x5, 0.25], 102 | [xj, x6, ~x4, x5, 0.7], 103 | [xj, x6, ~x4, ~x5, 0.45], 104 | [xj, ~x6, x4, x5, 0.15], 105 | [xj, ~x6, x4, ~x5, 0.8], 106 | [xj, ~x6, ~x4, x5, 0.95], 107 | [xj, ~x6, ~x4, ~x5, 0.05], 108 | [~xj, x6, x4, x5, 1.0], 109 | [~xj, x6, x4, ~x5, 0.75], 110 | [~xj, x6, ~x4, x5, 0.3], 111 | [~xj, x6, ~x4, ~x5, 0.55], 112 | [~xj, ~x6, x4, x5, 0.85], 113 | [~xj, ~x6, x4, ~x5, 0.2], 114 | [~xj, ~x6, ~x4, x5, 0.05], 115 | [~xj, ~x6, ~x4, ~x5, 0.95] 116 | ] 117 | -------------------------------------------------------------------------------- /examples/2-backdoor-paths/pearl-3.4.yml: -------------------------------------------------------------------------------- 1 | name: 'Pearl: Figure 3.4' 2 | endogenous: 3 | X1: 4 | outcomes: 5 | - x1 6 | - ~x1 7 | parents: [] 8 | table: [ 9 | [x1, 0.4], 10 | [~x1, 0.6] 11 | ] 12 | X2: 13 | outcomes: 14 | - x2 15 | - ~x2 16 | parents: [] 17 | table: [ 18 | [x2, 0.15], 19 | [~x2, 0.85] 20 | ] 21 | X3: 22 | outcomes: 23 | - x3 24 | - ~x3 25 | parents: 26 | - X1 27 | table: [ 28 | [x3, x1, 0.1], 29 | [x3, ~x1, 0.3], 30 | [~x3, x1, 0.9], 31 | [~x3, ~x1, 0.7] 32 | ] 33 | X4: 34 | outcomes: 35 | - x4 36 | - ~x4 37 | parents: 38 | - X1 39 | - X2 40 | table: [ 41 | [x4, x1, x2, 0.7], 42 | [x4, x1, ~x2, 0.9], 43 | [x4, ~x1, x2, 0.55], 44 | [x4, ~x1, ~x2, 0.15], 45 | [~x4, x1, x2, 0.3], 46 | [~x4, x1, ~x2, 0.1], 47 | [~x4, ~x1, x2, 0.45], 48 | [~x4, ~x1, ~x2, 0.85] 49 | ] 50 | X5: 51 | outcomes: 52 | - x5 53 | - ~x5 54 | parents: 55 | - X2 56 | table: [ 57 | [x5, x2, 0.8], 58 | [x5, ~x2, 0.25], 59 | [~x5, x2, 0.2], 60 | [~x5, ~x2, 0.75] 61 | ] 62 | X6: 63 | outcomes: 64 | - x6 65 | - ~x6 66 | parents: 67 | - Xi 68 | table: [ 69 | [x6, xi, 0.9], 70 | [x6, ~xi, 0.25], 71 | [~x6, xi, 0.1], 72 | [~x6, ~xi, 0.75] 73 | ] 74 | Xi: 75 | outcomes: 76 | - xi 77 | - ~xi 78 | parents: 79 | - X3 80 | - X4 81 | table: [ 82 | [xi, x3, x4, 0.5], 83 | [xi, x3, ~x4, 0.65], 84 | [xi, ~x3, x4, 0.1], 85 | [xi, ~x3, ~x4, 0.25], 86 | [~xi, x3, x4, 0.5], 87 | [~xi, x3, ~x4, 0.35], 88 | [~xi, ~x3, x4, 0.9], 89 | [~xi, ~x3, ~x4, 0.75] 90 | ] 91 | Xj: 92 | outcomes: 93 | - xj 94 | - ~xj 95 | parents: 96 | - X6 97 | - X4 98 | - X5 99 | table: [ 100 | [xj, x6, x4, x5, 0.0], 101 | [xj, x6, x4, ~x5, 0.25], 102 | [xj, x6, ~x4, x5, 0.7], 103 | [xj, x6, ~x4, ~x5, 0.45], 104 | [xj, ~x6, x4, x5, 0.15], 105 | [xj, ~x6, x4, ~x5, 0.8], 106 | [xj, ~x6, ~x4, x5, 0.95], 107 | [xj, ~x6, ~x4, ~x5, 0.05], 108 | [~xj, x6, x4, x5, 1.0], 109 | [~xj, x6, x4, ~x5, 0.75], 110 | [~xj, x6, ~x4, x5, 0.3], 111 | [~xj, x6, ~x4, ~x5, 0.55], 112 | [~xj, ~x6, x4, x5, 0.85], 113 | [~xj, ~x6, x4, ~x5, 0.2], 114 | [~xj, ~x6, ~x4, x5, 0.05], 115 | [~xj, ~x6, ~x4, ~x5, 0.95] 116 | ] 117 | -------------------------------------------------------------------------------- /tests/deconfounding/backdoor_files/graphs/pearl-3.4.yml: -------------------------------------------------------------------------------- 1 | name: 'Pearl: Figure 3.4' 2 | endogenous: 3 | X1: 4 | outcomes: 5 | - x1 6 | - ~x1 7 | parents: [] 8 | table: [ 9 | [x1, 0.4], 10 | [~x1, 0.6] 11 | ] 12 | X2: 13 | outcomes: 14 | - x2 15 | - ~x2 16 | parents: [] 17 | table: [ 18 | [x2, 0.15], 19 | [~x2, 0.85] 20 | ] 21 | X3: 22 | outcomes: 23 | - x3 24 | - ~x3 25 | parents: 26 | - X1 27 | table: [ 28 | [x3, x1, 0.1], 29 | [x3, ~x1, 0.3], 30 | [~x3, x1, 0.9], 31 | [~x3, ~x1, 0.7] 32 | ] 33 | X4: 34 | outcomes: 35 | - x4 36 | - ~x4 37 | parents: 38 | - X1 39 | - X2 40 | table: [ 41 | [x4, x1, x2, 0.7], 42 | [x4, x1, ~x2, 0.9], 43 | [x4, ~x1, x2, 0.55], 44 | [x4, ~x1, ~x2, 0.15], 45 | [~x4, x1, x2, 0.3], 46 | [~x4, x1, ~x2, 0.1], 47 | [~x4, ~x1, x2, 0.45], 48 | [~x4, ~x1, ~x2, 0.85] 49 | ] 50 | X5: 51 | outcomes: 52 | - x5 53 | - ~x5 54 | parents: 55 | - X2 56 | table: [ 57 | [x5, x2, 0.8], 58 | [x5, ~x2, 0.25], 59 | [~x5, x2, 0.2], 60 | [~x5, ~x2, 0.75] 61 | ] 62 | X6: 63 | outcomes: 64 | - x6 65 | - ~x6 66 | parents: 67 | - Xi 68 | table: [ 69 | [x6, xi, 0.9], 70 | [x6, ~xi, 0.25], 71 | [~x6, xi, 0.1], 72 | [~x6, ~xi, 0.75] 73 | ] 74 | Xi: 75 | outcomes: 76 | - xi 77 | - ~xi 78 | parents: 79 | - X3 80 | - X4 81 | table: [ 82 | [xi, x3, x4, 0.5], 83 | [xi, x3, ~x4, 0.65], 84 | [xi, ~x3, x4, 0.1], 85 | [xi, ~x3, ~x4, 0.25], 86 | [~xi, x3, x4, 0.5], 87 | [~xi, x3, ~x4, 0.35], 88 | [~xi, ~x3, x4, 0.9], 89 | [~xi, ~x3, ~x4, 0.75] 90 | ] 91 | Xj: 92 | outcomes: 93 | - xj 94 | - ~xj 95 | parents: 96 | - X6 97 | - X4 98 | - X5 99 | table: [ 100 | [xj, x6, x4, x5, 0.0], 101 | [xj, x6, x4, ~x5, 0.25], 102 | [xj, x6, ~x4, x5, 0.7], 103 | [xj, x6, ~x4, ~x5, 0.45], 104 | [xj, ~x6, x4, x5, 0.15], 105 | [xj, ~x6, x4, ~x5, 0.8], 106 | [xj, ~x6, ~x4, x5, 0.95], 107 | [xj, ~x6, ~x4, ~x5, 0.05], 108 | [~xj, x6, x4, x5, 1.0], 109 | [~xj, x6, x4, ~x5, 0.75], 110 | [~xj, x6, ~x4, x5, 0.3], 111 | [~xj, x6, ~x4, ~x5, 0.55], 112 | [~xj, ~x6, x4, x5, 0.85], 113 | [~xj, ~x6, x4, ~x5, 0.2], 114 | [~xj, ~x6, ~x4, x5, 0.05], 115 | [~xj, ~x6, ~x4, ~x5, 0.95] 116 | ] 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

do-calculus

2 |

A Python implementation of the do-calculus of Judea Pearl et. al.

3 |

4 | 5 | Test Workflows 6 | 7 | 8 | Coverage Status 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | semantic-release 18 | 19 |

20 | 21 | ## Resources 22 | 23 | * **Documentation / Wiki**: [github.com/bradendubois/do-calculus/wiki](https://github.com/bradendubois/do-calculus/wiki) 24 | * **Source Code**: [github.com/bradendubois/do-calculus](https://github.com/bradendubois/do-calculus) 25 | * **PyPI**: [pypi.org/project/do-calculus/](https://pypi.org/project/do-calculus/) 26 | * **Releases**: [github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases) 27 | * **Bug reports**: [github.com/bradendubois/do-calculus/issues](https://github.com/bradendubois/do-calculus/issues) 28 | * **Contact**: [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca) 29 | 30 | See the [wiki](https://github.com/bradendubois/do-calculus/wiki) to get started. 31 | 32 | 33 | ## Development Status 34 | 35 | A full overhaul has been completed, and marks important milestones in my life and (consequently) this project's. Development on this project is halted until further notice, barring further changes to my own life, or necessary bug fixes and/or security fixes. 36 | 37 | ## Acknowledgements 38 | 39 | This project represents approximately two years of part- and full-time work as part of an indescribably fulfilling undergraduate research project. This project was done under the supervision of Dr. Eric Neufeld from approximately Spring 2020 - Winter 2022. Without Dr. Neufeld's support, guidance, patience, expertise (and of course, funding), this project would never have been started, let alone completed. I cannot overstate my appreciation - you've changed my academic and professional path, and provided me with so many wonderful experiences and memories that I will never forget. Thanks, Eric. 40 | -------------------------------------------------------------------------------- /archive/build_wiki.py: -------------------------------------------------------------------------------- 1 | from inspect import getmembers, getdoc, getsource, isclass, ismethod, signature, Signature 2 | from os import chdir 3 | from os.path import abspath, dirname 4 | from pathlib import Path 5 | 6 | from do.API import Do 7 | import do.structures.Exceptions 8 | 9 | 10 | def api_docstring_description(function_name): 11 | 12 | def parameter_signature(parameter_item): 13 | parameter_key, parameter_value = parameter_item 14 | return f"#### {parameter_key}\n```py\n{parameter_value.annotation}\n```" 15 | 16 | name = str(function_name.__name__) 17 | function_signature = signature(function_name, follow_wrapped=True) 18 | 19 | title = f"## Function Signature - Do.{name}\n" 20 | print(function_signature) 21 | # header = source.split("\n")[0][:-1].split(" ", maxsplit=1)[1].strip(" ") 22 | header = f"### Header\n\n```py\ndef {function_signature}\n```\n" 23 | 24 | parameters = "### Parameters\n\n" + "\n".join(map(parameter_signature, function_signature.parameters.items())) 25 | if len(function_signature.parameters) == 0: 26 | parameters = "### Parameters\n\n**None**\n" 27 | 28 | if function_signature.return_annotation is not Signature.empty: 29 | return_annotation = function_signature.return_annotation 30 | else: 31 | return_annotation = "None" 32 | 33 | return_value = f"### Return Value\n\n```py\n{return_annotation}\n```\n" 34 | 35 | sections = [title, header, parameters, return_value] 36 | 37 | return "\n".join(sections) + "\n
\n" 38 | 39 | 40 | def exception_description(exception_name): 41 | return f"## {exception_name}\n\n> {getdoc(exception_name)}\n\n" 42 | 43 | 44 | def populate_wiki_stubs(): 45 | 46 | chdir(dirname(abspath(__file__))) 47 | 48 | api_signatures = {name: api_docstring_description(method) for (name, method) in 49 | getmembers(Do(model=None), predicate=ismethod)} 50 | 51 | exceptions = {name: exception_description(exception) for (name, exception) in 52 | getmembers(do.structures.Exceptions, predicate=isclass)} 53 | 54 | wiki_dir = Path("pages") 55 | 56 | for file in wiki_dir.iterdir(): 57 | if not file.is_file(): 58 | continue 59 | 60 | text = file.read_text().splitlines() 61 | 62 | found = False 63 | for line, content in enumerate(text): 64 | if content.startswith("STUB"): 65 | stub, replace = content.split("|") 66 | if replace in api_signatures: 67 | text[line] = api_signatures[replace] 68 | elif replace == "exceptions": 69 | text[line] = "\n\n".join(exceptions.values()) 70 | found = True 71 | 72 | if found: 73 | file.write_text("\n".join(text)) 74 | 75 | 76 | if __name__ == "__main__": 77 | populate_wiki_stubs() 78 | -------------------------------------------------------------------------------- /archive/Markovian Models.md: -------------------------------------------------------------------------------- 1 | This document outlines the structure of how to create and represent a Markovian model for use in the package, such as in the [[API|Do API]]. 2 | 3 | Models are inherently **DAGs** (Directed Acyclic Graph), where each variable in a model is also represented as a vertex in the DAG. 4 | 5 | ## Model Structure 6 | 7 | A model is represented as dictionary, mapping the name of one variable in the model to its detailed information. 8 | 9 | A variable's detailed information consists of the following key-value pairs: 10 | - ``outcomes``: all discrete outcomes the variable may take, represented as a list. 11 | - ``parents``: parent variables (also defined in the model) of the current variable, represented as a list. 12 | - If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely. 13 | - If there are *unobservable parents* - that is, *latent variables* - they must be listed *after* all observable parents, but in any particular order. 14 | - ``table``: a list of lists, representing the probability distribution of the variable. Each sub-list is one unique combination of outcomes of the given variable and each of its parents, along with a probability between 0 and 1. 15 | - The order of the observable parent variables must correspond to the order given in the ``parents`` entry, if there are any. 16 | - to represent a latent variable, **omit** this key. 17 | 18 | Additionally, a key ``name`` can be given, corresponding to an arbitrary name for the model. 19 | 20 | ## Files 21 | 22 | Models can be stored in ``json`` or ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions. 23 | - A handful of models are stored in ``do/graphs``. 24 | 25 | ## Dictionaries 26 | 27 | A model can also be stored as a Python dictionary directly, and loaded into an instance of the [[API|Do API]]. 28 | 29 | ### Example 30 | 31 | Here is an example of a very simple model in **yml**: 32 | 33 | ```yaml 34 | name: Simple Model 35 | model: 36 | Y: 37 | outcomes: 38 | - y 39 | - ~y 40 | table: [ 41 | [ y, 0.7 ], 42 | [ ~y, 0.3 ] 43 | ] 44 | X: 45 | outcomes: 46 | - x 47 | - ~x 48 | parents: [ Y ] 49 | table: [ 50 | [ x, y, 0.9 ], 51 | [ x, ~y, 0.75 ], 52 | [ ~x, y, 0.1 ], 53 | [ ~x, ~y, 0.25 ] 54 | ] 55 | ``` 56 | 57 | This represents the basic graph of a single edge, (Y, X). 58 | - ``Y`` has no parents, it is a root. 59 | 60 | #### Dictionary 61 | 62 | Here is the [above example](#example), represented as a Python dictionary. 63 | 64 | ```py 65 | m = { 66 | "name": "Simple Model", 67 | "model": { 68 | "Y": { 69 | "outcomes": ["y", "~y"], 70 | "table": [ 71 | ["y", 0.7], 72 | ["~y", 0.3] 73 | ] 74 | }, 75 | "X": { 76 | "outcomes": ["x", "~x" ], 77 | "parents": [ "Y" ], 78 | "table": [ 79 | ["x", "y", 0.9], 80 | ["x", "~y", 0.75], 81 | ["~x", "y", 0.1], 82 | ["~x", "~y", 0.25] 83 | ] 84 | } 85 | } 86 | } 87 | ``` 88 | 89 | Both representations can be used in the [[API|Do API]]; if a string path to a file is given, an attempt will be made to load and parse it. 90 | -------------------------------------------------------------------------------- /archive/Definitions.md: -------------------------------------------------------------------------------- 1 | WORK IN PROGRESS (while we iron out who says what!) 2 | 3 | - Vertices 4 | - Edges 5 | - Path 6 | - Backdoor Path 7 | - Confounding / Deconfounding 8 | - Markovian Model / semi-Markovian model / causal Bayesian network 9 | - Parents(V) 10 | 11 | ## Tian & Pearl, 2004 12 | 13 | > The most common such representation involves a Markovian model (also known as a causal Bayesian network). A Markovian model consists of a DAG G over a set V = {V1, ..., Vn } of variables, called a *causal graph*. 14 | 15 | - Tian & Pearl, 2004, p. 562 16 | 17 | > The probabilistic interpretation views *G* as representing conditional independence assertions: Each variable is independent of all its non-descendants given its direct parents in the graph. These assertions imply that the joint probability function *P(v) = P(v_1, ..., v_n)* factorizes according to the product *P(v) = Π_{i} P(V_i | pa_i)* where *pa_i* are (values of) the parents of variable *V_i* in the graph. 18 | 19 | - Tian & Pearl, 2004, p. 562 20 | 21 | *pa_i* is **exclusive**. 22 | 23 | > Let *V* and *U* stand for the sets of observed and unobserved variables, respectively. In this paper, we assume that no *U* variable is a descendant of any *V* variable (called a semi-Markovian model). Then the observed probability distribution, *P(v)*, becomes a mixture of products: *P(v) = Σ_{u} Π_{i} P(v_{i} | pa_i, u^i) P(u)* where *Pa_i* and *U^i* stand for the sets of the observed and unobserved parents of *V_i*, and the summation ranges over all the *U* variables. 24 | 25 | - Tian & Pearl, 2004, p. 562 26 | 27 | ## Santu Tikku, 2018 28 | 29 | > For a directed graph G = (V, E) and a set of vertices W ⊆ V the sets Pa(W)_G , Ch(W)_G, An(W)_G and De(W)_G denote a set that 30 | contains W in addition to its parents, children, ancestors and descendants in G, respectively. 31 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 32 | 33 | Inclusive with given set. 34 | 35 | > Contrary to usual graph theoretic conventions, we call a vertex without any descendants a root (typically referred to as sink). The root set of G is the set of all roots of G, which is {X ∈ V | De(X)G \ {X} = ∅}. The reason for this reversal of the names of sinks and roots is to retain consistency with relevant literature (e.g. Shpitser and Pearl, 2006b) and other 36 | important definitions. 37 | 38 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 39 | 40 | > When a DAG is considered, we can relate an ordering of its vertices to its topological structure. This is useful especially when a causal interpretation is associated with the graph. A topological ordering π of a DAG G = (V, E) is an ordering of its vertices, such that if X is an ancestor of Y in G then X < Y in π. The subset of vertices that are less than V_j in π is denoted by V_π^{j-1}. 41 | 42 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 43 | 44 | > An algorithm by Kahn (1962) can be used to derive a topological ordering for any DAG. First, we add the vertices without ancestors to the ordering in any order. At the next stage, we add all vertices such that their parents are already contained in the ordering. This is repeated until every vertex has been included. It should be noted that a DAG may have more than one ordering. 45 | 46 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8 47 | 48 | - Backdoor Paths 49 | - Definitions 50 | - Sorting for ordering 51 | -------------------------------------------------------------------------------- /archive/Probability Queries.md: -------------------------------------------------------------------------------- 1 | How to measure probabilities using the **Do** API. 2 | 3 | STUB|p 4 | 5 | ## Basic Query 6 | 7 | For this, we will query a standard probability through the **Do** API. 8 | 9 | This will use the [[simple model from Markovian Models|Markovian Models]]. 10 | 11 | ```python 12 | from do.API import Do 13 | from do.structures.VariableStructures import Outcome 14 | 15 | m = { 16 | "name": "Simple Model", 17 | "model": { 18 | "Y": { 19 | "outcomes": ["y", "~y"], 20 | "table": [ 21 | ["y", 0.7], 22 | ["~y", 0.3] 23 | ] 24 | }, 25 | "X": { 26 | "outcomes": ["x", "~x" ], 27 | "parents": [ "Y" ], 28 | "table": [ 29 | ["x", "y", 0.9], 30 | ["x", "~y", 0.75], 31 | ["~x", "y", 0.1], 32 | ["~x", "~y", 0.25] 33 | ] 34 | } 35 | } 36 | } 37 | 38 | do_api = Do(m) 39 | 40 | x = Outcome("X", "x") 41 | y = Outcome("Y", "y") 42 | 43 | x_alone = do_api.p({x}) 44 | print(f"The probability of X=x, P(X=x) = {x_alone:5}") 45 | 46 | x_if_y = do_api.p({x}, {y}) 47 | print(f"The probability of P(X=x | Y=y) = {x_if_y:5}") 48 | 49 | x_and_y = do_api.p({x, y}, set()) 50 | print(f"The probability of P(X=x, Y=y) = {x_and_y:5}") 51 | ``` 52 | 53 | **Important**: 54 | - The representation of a variable in the model having some *observed* value is implemented as an **Outcome** object. 55 | - The creation of an Outcome object is to supply the *name* of the variable, and *some outcome of this variable*. 56 | - The Outcome class is located at ``do.structures.VariableStructures``. 57 | - The API function provided in **Do** to query a probability is the ``p`` function. 58 | - **Do.p** takes *two* arguments, a *Collection of outcome outcomes*, and a *Collection of "given" outcomes*. 59 | - **Do.p** can take an *empty collection* if there are no "given" outcomes. 60 | - **Do.p** can *completely omit* the "given" collection of outcomes if there are none. 61 | - **Do.p** can have its "given" collection of outcomes explicitly set to ``None`` if there are none. 62 | - **Do.p** returns a *float*, between [0, 1]. 63 | 64 | ## Querying an Interventional Measurement 65 | 66 | Assume the existence of some more complicated model, ``m_confounded``, in which multiple variables are susceptible to *backdoor paths* or *confounding*, but a sufficient *deconfounding set* can block all backdoor paths. 67 | - See [[Literature]] for more details on *backdoor paths* and *deconfounding*. 68 | 69 | ```python 70 | from do.API import Do 71 | from do.structures.VariableStructures import Outcome, Intervention 72 | 73 | # Assume this were some more complicated model... 74 | m_confounding = dict() 75 | 76 | do_api = Do(m_confounding) 77 | 78 | x = Outcome("X", "x") 79 | 80 | y_outcome = Outcome("Y", "y") 81 | y_intervention = Intervention("Y", "y") 82 | 83 | x_y = do_api.p({x}, {y_outcome}) 84 | x_do_y = do_api.p({x}, {y_intervention}) 85 | 86 | if x_y != x_do_y: 87 | print(f"P(X=x | Y=y) ({x_y:5}) != P(X=x | do(Y=y)) ({x_do_y:5}): Y shows causal influence over X!") 88 | ``` 89 | 90 | **Important**: 91 | - A *treatment* or *intervention* is represented by the **Intervention** object. 92 | - The Intervention class is located at ``do.structures.VariableStructures``, the same as the Outcome class. 93 | - The Intervention class takes the same arguments as the Outcome class. 94 | - Queries involving interventions use **Do.p** just as standard queries do. 95 | - The "given" / body of a query is a *Collection* of Outcomes and Interventions. 96 | -------------------------------------------------------------------------------- /archive/__init__.md: -------------------------------------------------------------------------------- 1 | How to instantiate the **Do** API. 2 | 3 | STUB|__init__ 4 | 5 | ## Examples 6 | 7 | One can provide a model, and specify what details and results to print and/or log to a file. 8 | 9 | ```python 10 | from pathlib import Path 11 | from do.API import Do 12 | 13 | file = Path("output/model1.yml") 14 | f = file.open("w") 15 | 16 | do_api = Do( 17 | model=m, 18 | print_detail=False, 19 | print_result=True, 20 | log=True, 21 | log_fd=f 22 | ) 23 | ``` 24 | 25 | **Note**: Here, ``m`` is not defined, but multiple examples will follow, detailing acceptable forms of ``m``. 26 | 27 | **Important**: 28 | - Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time. 29 | - Various parameters of outputting and logging details can be [[tweaked|Output]]. 30 | 31 |
32 | 33 | ### Model: Python dictionary 34 | 35 | One can have a model represented as a dictionary, and pass this as a *constructor argument* to instantiate **Do**. 36 | 37 | ```python 38 | from pathlib import Path 39 | from do.API import Do 40 | 41 | m = { 42 | "name": "Simple Model", 43 | "model": { 44 | "Y": { 45 | "outcomes": ["y", "~y"], 46 | "table": [ 47 | ["y", 0.7], 48 | ["~y", 0.3] 49 | ] 50 | }, 51 | "X": { 52 | "outcomes": ["x", "~x" ], 53 | "parents": [ "Y" ], 54 | "table": [ 55 | ["x", "y", 0.9], 56 | ["x", "~y", 0.75], 57 | ["~x", "y", 0.1], 58 | ["~x", "~y", 0.25] 59 | ] 60 | } 61 | } 62 | } 63 | 64 | file = Path("output/model1.yml") 65 | f = file.open("w") 66 | 67 | do_api = Do( 68 | model=m, 69 | print_detail=False, 70 | print_result=True, 71 | log=True, 72 | log_fd=f 73 | ) 74 | ``` 75 | 76 | **Important** 77 | - A regular Python dictionary representation of a [[Markovian model|Markovian Models]] is valid input to **Do**. 78 | 79 |
80 | 81 | ### Model: string path to a file 82 | 83 | One can also have a file contain a valid model, and pass the *path* to the file as input as well. 84 | 85 | ```python 86 | from do.API import Do 87 | 88 | model_path = "data/graph1.yml" 89 | do_api = Do(model_path) # All good! 90 | 91 | fake_path = "does/not/exist.file" 92 | do_api_2 = Do(fake_path) # This will raise an exception! 93 | ``` 94 | 95 | **Important**: 96 | - A *string path* is valid to pass to **Do**. 97 | - If the file cannot be found or parsed, an exception will be raised. 98 | 99 |
100 | 101 | ### Model: pathlib.Path 102 | 103 | One can also provide a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object, as part of the [pathlib library](https://docs.python.org/3/library/pathlib.html). 104 | - **Trivia**: Providing a [string path to a file](#model-string-path-to-a-file) works by attempting to create a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) from the string path. 105 | 106 | ```python 107 | from pathlib import Path 108 | from do.API import Do 109 | 110 | model_path = Path("graph2.yml") 111 | do_api = Do(model_path) 112 | ``` 113 | 114 |
115 | 116 | ### Model: None 117 | 118 | One can also create an instance of **Do**, in which no model is provided, and instead [[defer loading the model until later|Loading a Model]]. 119 | 120 | ```python 121 | from do.API import Do 122 | 123 | do_api = Do(model=None, print_result=True) 124 | ``` 125 | 126 | **Important** 127 | - If no model is loaded, any relevant API functionality will fail until a model [[has been loaded|Loading a Model]]. 128 | -------------------------------------------------------------------------------- /.github/workflows/test_and_release.yml: -------------------------------------------------------------------------------- 1 | name: Test and Release 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [ main, beta, develop ] 7 | pull_request: 8 | branches: [ main ] 9 | 10 | jobs: 11 | test: 12 | strategy: 13 | matrix: 14 | os: 15 | - ubuntu-latest 16 | - macos-latest 17 | - windows-latest 18 | python-version: [ '3.9', '3.10' ] 19 | report-coverage: [ false ] 20 | 21 | # Special matrix job to report coverage only once 22 | include: 23 | - os: ubuntu-latest 24 | python-version: '3.10' 25 | report-coverage: true 26 | 27 | runs-on: ${{ matrix.os }} 28 | 29 | steps: 30 | - uses: actions/checkout@v2 31 | 32 | - name: Setup Python 33 | uses: actions/setup-python@v2 34 | with: 35 | python-version: ${{ matrix.python-version }} 36 | 37 | - name: Install dependencies 38 | run: | 39 | python -m pip install --upgrade pip 40 | python -m pip install flake8 pytest coverage coveralls 41 | pip install -r requirements.txt 42 | 43 | - name: Lint with flake8 44 | run: | 45 | # stop the build if there are Python syntax errors or undefined names 46 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 47 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 48 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 49 | 50 | - name: Test with pytest / coverage 51 | run: | 52 | coverage run -m pytest 53 | 54 | - name: Report Coverage w/Coveralls 55 | uses: AndreMiras/coveralls-python-action@develop 56 | if: ${{ matrix.report-coverage }} 57 | 58 | # Tag and create a new release - handled by semantic-release 59 | tag: 60 | runs-on: ubuntu-latest 61 | needs: test # Only consider creating a release if all tests pass 62 | 63 | # Only create a release on main / beta 64 | if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/beta' 65 | steps: 66 | - uses: actions/checkout@v2 67 | - name: Create Release 68 | env: 69 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 70 | run: npx semantic-release 71 | 72 | # Package and upload to PyPI 73 | pypi: 74 | runs-on: ubuntu-latest 75 | needs: tag # Need to get the latest version number, which may change based on semantic-release 76 | 77 | # Only upload to PyPI on main 78 | if: github.ref == 'refs/heads/main' 79 | steps: 80 | - name: Checkout 81 | uses: actions/checkout@v2 82 | 83 | - name: Setup Python 84 | uses: actions/setup-python@v2 85 | with: 86 | python-version: ${{ matrix.python-version }} 87 | 88 | - name: Install dependencies 89 | run: | 90 | python -m pip install --upgrade pip 91 | python -m pip install flake8 pytest coverage coveralls 92 | pip install -r requirements.txt 93 | 94 | - name: Get Version Number 95 | uses: oprypin/find-latest-tag@v1 96 | id: tag 97 | with: 98 | repository: ${{ github.repository }} 99 | releases-only: true 100 | 101 | - name: Build PyPI Package 102 | run: | 103 | python setup.py sdist bdist_wheel 104 | env: 105 | SEMANTIC_VERSION: ${{ steps.tag.outputs.tag }} 106 | 107 | - name: PyPI Publish 108 | uses: pypa/gh-action-pypi-publish@v1.4.2 109 | with: 110 | user: __token__ 111 | password: ${{ secrets.PYPI_API_TOKEN }} 112 | -------------------------------------------------------------------------------- /do/core/Model.py: -------------------------------------------------------------------------------- 1 | from json import load as json_load 2 | from pathlib import Path 3 | from typing import Collection, Mapping 4 | from loguru import logger 5 | from yaml import safe_load as yaml_load 6 | 7 | from .ConditionalProbabilityTable import ConditionalProbabilityTable 8 | from .Exceptions import MissingVariable 9 | from .Graph import Graph 10 | from .Variables import Variable 11 | 12 | 13 | class Model: 14 | 15 | def __init__(self, graph: Graph, variables: Mapping[str, Variable], distribution: Mapping[Variable, ConditionalProbabilityTable]): 16 | self._g = graph.copy() 17 | self._v = {k: variables[k] for k in variables} 18 | self._d = {k: distribution[k] for k in distribution} 19 | 20 | def graph(self) -> Graph: 21 | return self._g 22 | 23 | def variable(self, key: str) -> Variable: 24 | if key not in self._v: 25 | logger.error(f"unknown variable: {key}") 26 | raise MissingVariable(key) 27 | return self._v[key] 28 | 29 | def table(self, key: str) -> ConditionalProbabilityTable: 30 | if key not in self._v: 31 | logger.error(f"unknown variable: {key}") 32 | raise MissingVariable(key) 33 | return self._d[key] 34 | 35 | def all_variables(self) -> Collection[Variable]: 36 | return self._v.values() 37 | 38 | 39 | def from_dict(data: dict) -> Model: 40 | return parse_model(data) 41 | 42 | 43 | def from_path(p: Path) -> Model: 44 | if not p.exists() or not p.is_file(): 45 | raise FileNotFoundError 46 | 47 | if p.suffix == ".json": 48 | return parse_model(json_load(p.read_text())) 49 | 50 | elif p.suffix in [".yml", ".yaml"]: 51 | return parse_model(yaml_load(p.read_text())) 52 | 53 | else: 54 | raise Exception(f"Unknown extension for {p}") 55 | 56 | def parse_model(data: dict) -> Model: 57 | 58 | """ 59 | variables: maps string name to the Variable object instantiated 60 | outcomes: maps string name *and* corresponding Variable to a list of outcome values 61 | tables: maps strings/Variables to corresponding ConditionalProbabilityTables 62 | """ 63 | variables = dict() 64 | outcomes = dict() 65 | tables = dict() 66 | 67 | for name, detail in data["endogenous"].items(): 68 | 69 | # Load the relevant data to construct a Variable 70 | v_outcomes = detail["outcomes"] if "outcomes" in detail else [] 71 | v_parents = detail["parents"] if "parents" in detail else [] 72 | 73 | # Create a Variable object 74 | variable = Variable(name, v_outcomes, v_parents) 75 | 76 | # Lookup the object by its name 77 | variables[name] = variable 78 | 79 | # Store by both the Variable object as well as its name, for ease of access 80 | outcomes[name] = v_outcomes 81 | outcomes[variable] = v_outcomes 82 | 83 | # Load in the table and construct a CPT 84 | table = detail["table"] 85 | cpt = ConditionalProbabilityTable(variable, v_parents, table) 86 | 87 | # Map the name/variable to the table 88 | tables[name] = cpt 89 | tables[variable] = cpt 90 | 91 | v = set(variables.keys()) 92 | e = set() 93 | 94 | for child in variables.keys(): 95 | e.update(list(map(lambda parent: (parent, child), variables[child].parents))) 96 | 97 | if "exogenous" in data: 98 | for variable, children in data["exogenous"].items(): 99 | v.add(variable) 100 | for c in children: 101 | v.add(c) 102 | e.add((variable, c)) 103 | 104 | graph = Graph(v, e) 105 | 106 | return Model(graph, variables, tables) 107 | -------------------------------------------------------------------------------- /do/identification/API.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from typing import Mapping, Set, Tuple, Union 3 | 4 | from ..core.Model import Model 5 | from ..core.Variables import Intervention, Outcome 6 | 7 | from .LatentGraph import latent_transform 8 | from .Identification import Identification, simplify_expression 9 | from .PExpression import PExpression, TemplateExpression 10 | 11 | 12 | class API: 13 | 14 | def identification(self, y: Set[Outcome], x: Set[Intervention], model: Model, include_proof: bool = True) -> Union[float, Tuple[float, str]]: 15 | """ 16 | The Identification algorithm presented in Shpitser & Pearl, 2007. 17 | 18 | Args: 19 | y (Set[Outcome]): A set of (outcome) variables. 20 | x (Set[Intervention]): A set of (treatment) variables. 21 | model (Model): The given model, which may include exogenous variables. 22 | include_proof (bool, optional): Controls whether a proof should be generated along 23 | with the expression and returned. Defaults to True. 24 | 25 | Raises: 26 | Fail: Raises a Fail exception if the effect cannot be identified, containing the hedge 27 | causing the unidentifiability. 28 | 29 | Returns: 30 | Union[float, Tuple[float, str]]: The result is represented as a float in the range [0, 1], 31 | representing the resulting effect. Returns this float if include_proof is False. Returns 32 | a tuple (result, proof) if include_proof is True, where proof is a string. 33 | """ 34 | 35 | endogenous = set(model._v.keys()) 36 | exogenous = model._g.v - endogenous 37 | 38 | latent = latent_transform(model._g.copy(), exogenous) 39 | 40 | p = PExpression([], [TemplateExpression(x, list(latent.parents(x))) for x in latent.v]) 41 | expression = Identification({v.name for v in y}, {v.name for v in x}, p, latent, include_proof) 42 | 43 | def _process(current: Union[PExpression, TemplateExpression], known: Mapping[str, str]): 44 | 45 | if isinstance(current, TemplateExpression): 46 | t = model.table(current.head) 47 | return t.probability_lookup(Outcome(current.head, known[current.head]), [Outcome(v, known[v]) for v in model.variable(current.head).parents]) 48 | 49 | elif len(current.sigma) == 0: 50 | i = 1 51 | for term in current.terms: 52 | i *= _process(term, known) 53 | return i 54 | 55 | else: 56 | t = 0 57 | for values in product(*[model.variable(v).outcomes for v in current.sigma]): 58 | i = 1 59 | for term in current.terms: 60 | i *= _process(term, known | dict(zip(current.sigma, values))) 61 | t += i 62 | return t 63 | 64 | result = _process(expression, {v.name: v.outcome for v in y} | {v.name: v.outcome for v in x}) 65 | return (result, expression.proof()) if include_proof else result 66 | 67 | def proof(self, y: Set[Outcome], x: Set[Intervention], model: Model) -> str: 68 | """ 69 | Generates a proof for the effects of a given expression, as identified by ID (Shpitser & Pearl, 2007). 70 | 71 | Args: 72 | y (Set[Outcome]): A set of (outcome) variables. 73 | x (Set[Intervention]): A set of (treatment) variables. 74 | model (Model): The given model, which may include exogenous variables. 75 | 76 | Raises: 77 | Fail: Raises a Fail exception if the effect cannot be identified, containing the hedge 78 | causing the unidentifiability. 79 | 80 | Returns: 81 | str: A string proof for the effect identified. 82 | """ 83 | 84 | endogenous = set(model._v.keys()) 85 | exogenous = model._g.v - endogenous 86 | 87 | latent = latent_transform(model._g.copy(), exogenous) 88 | 89 | p = PExpression([], [TemplateExpression(x, list(latent.parents(x))) for x in latent.v]) 90 | expression = Identification({v.name for v in y}, {v.name for v in x}, p, latent, True) 91 | return expression.proof() 92 | -------------------------------------------------------------------------------- /do/core/ConditionalProbabilityTable.py: -------------------------------------------------------------------------------- 1 | from math import floor, ceil 2 | from numpy import empty 3 | from typing import List, Union 4 | 5 | from .Exceptions import MissingTableRow 6 | from .Variables import Variable, Outcome, Intervention 7 | 8 | 9 | class ConditionalProbabilityTable: 10 | """ 11 | A basic conditional probability table that reflects the values of one Variable, and any number of conditional 12 | values 13 | @param variable: A Variable object, representing the variable this table computes a probability for 14 | @param parents: A (possibly empty) list of Variables, representing the parents for the variable given 15 | @param table_rows: A list of rows in the table, each formatted as [, [", ...],

] 16 | """ 17 | 18 | # Padding units on the left/right sides of each cell 19 | padding = 1 20 | 21 | def __init__(self, variable: Variable, parents: List[str], table_rows: List): 22 | self.variable = variable # The LHS of the table, single-variable only 23 | self.parents = parents # The RHS/body of the table 24 | 25 | self.table_rows = [] 26 | 27 | latent = len(parents) - (len(table_rows) - 2) 28 | 29 | # Clean up the rows; Each is formatted as: [outcome of variable, parent_1, parent_2, ..., probability] 30 | for row in table_rows: 31 | outcome = Outcome(variable.name, row[0]) 32 | p = row[1:-1] 33 | 34 | self.table_rows.append([outcome, [Outcome(v, x) for v, x in zip(parents[:-latent], p)], row[-1]]) 35 | 36 | def __str__(self) -> str: 37 | """ 38 | String builtin for a ConditionalProbabilityTable 39 | @return: A string representation of the table. 40 | """ 41 | 42 | # Create a snazzy numpy table 43 | # Rows: 1 for a header + 1 for each row; Columns: 1 for variable, 1 for each given var, 1 for the probability 44 | rows = 1 + len(self.table_rows) 45 | columns = 1 + len(self.parents) + 1 46 | 47 | # dtype declaration is better than "str", as str only allows one character in each cell 48 | table = empty((rows, columns), dtype=' float: 87 | """ 88 | Directly lookup the probability for the row corresponding to the queried outcome and given data 89 | @param outcome: The specific outcome to lookup 90 | @param given: A list of Outcome objects 91 | @return: A probability corresponding to the respective row. Raises an Exception otherwise. 92 | """ 93 | for row_outcome, row_given, row_p in self.table_rows: 94 | # If the outcome for this row matches, and each outcome for the given data matches... 95 | if outcome == row_outcome and set(row_given) == set(given): 96 | return row_p # We have our answer 97 | 98 | # Iterated over all the rows and didn't find the correct one 99 | print(f"Couldn't find row: {outcome} | {', '.join(map(str, given))}") 100 | raise MissingTableRow 101 | -------------------------------------------------------------------------------- /do/deconfounding/Do.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from loguru import logger 3 | from typing import Collection 4 | 5 | from ..core.Expression import Expression 6 | from ..core.Inference import inference 7 | from ..core.Model import Model 8 | from ..core.Variables import Outcome, Intervention 9 | 10 | from .Backdoor import backdoors, deconfound 11 | from .Exceptions import NoDeconfoundingSet 12 | 13 | 14 | def treat(expression: Expression, interventions: Collection[Intervention], model: Model) -> float: 15 | 16 | head = set(expression.head()) 17 | body = set(expression.body()) 18 | 19 | print(head, body, interventions) 20 | 21 | # If there are no Interventions, we can compute a standard query 22 | if len(interventions) == 0: 23 | return inference(expression, model) 24 | 25 | # There are interventions; may need to find some valid Z to compute 26 | else: 27 | 28 | paths = backdoors(interventions, head, model.graph(), body) 29 | 30 | # No backdoor paths; augment graph space and compute 31 | if len(paths) == 0: 32 | logger.info(f"no backdoor paths; translating into standard inference query") 33 | expression_transform = Expression(expression.head(), set(expression.body()) | set(Outcome(x.name, x.outcome) for x in interventions)) 34 | logger.info(f"translated expression: {expression_transform}") 35 | logger.info(f"disabling incoming edges on graph: {[x.name for x in interventions]}") 36 | model.graph().disable_incoming(*interventions) 37 | p = inference(expression_transform, model) 38 | logger.info("resetting edge transformations") 39 | model.graph().reset_disabled() 40 | return p 41 | 42 | # Backdoor paths found; find deconfounding set to compute 43 | # Find all possible deconfounding sets, and use possible subsets 44 | logger.info("computing deconfounding sets") 45 | deconfounding_sets = deconfound(interventions, head, model.graph()) 46 | logger.info(f"resulting deconfounding sets: {deconfounding_sets}") 47 | 48 | # Filter down the deconfounding sets not overlapping with our query body 49 | vertex_dcf = list(filter(lambda s: len(set(s) & {x.name for x in body}) == 0, deconfounding_sets)) 50 | if len(vertex_dcf) == 0: 51 | raise NoDeconfoundingSet 52 | 53 | # Compute with every possible deconfounding set as a safety measure; ensuring they all match 54 | probability = None # Sentinel value 55 | for z_set in vertex_dcf: 56 | 57 | result = _marginalize_query(expression, interventions, z_set, model) 58 | if probability is None: # Storing first result 59 | probability = result 60 | 61 | # If results do NOT match; error 62 | assert abs(result-probability) < 0.00000001, f"Error: Distinct results: {probability} vs {result}" 63 | 64 | logger.info("{0} = {1:.5f}".format(Expression(head, set(body) | set(interventions)), probability, precision=1)) 65 | return result 66 | 67 | 68 | def _marginalize_query(expression: Expression, interventions: Collection[Intervention], deconfound: Collection[str], model: Model) -> float: 69 | """ 70 | Handle the modified query where we require a deconfounding set due to Interventions / treatments. 71 | @param head: The head of the query, a set containing Outcome objects 72 | @param body: The body of the query, a set containing Outcome and Intervention objects 73 | @param dcf: A set of (string) names of variables to serve as a deconfounding set, blocking all backdoor paths 74 | between the head and body 75 | @return: 76 | """ 77 | 78 | head = set(expression.head()) 79 | body = set(expression.body()) 80 | 81 | # Augment graph (isolating interventions as roots) and create engine 82 | model.graph().disable_incoming(*interventions) 83 | as_outcomes = {Outcome(x.name, x.outcome) for x in interventions} 84 | 85 | probability = 0.0 86 | 87 | # We take every possible combination of outcomes of Z and compute each probability separately 88 | for cross in product(*[model.variable(var).outcomes for var in deconfound]): 89 | 90 | # Construct the respective Outcome list of each Z outcome cross product 91 | z_outcomes = {Outcome(x, cross[i]) for i, x in enumerate(deconfound)} 92 | 93 | # First, we do P(Y | do(X), Z) 94 | ex1 = Expression(head, body | as_outcomes | z_outcomes) 95 | logger.info(f"computing sub-query: {ex1}") 96 | p_y_x_z = inference(ex1, model) 97 | 98 | # Second, P(Z) 99 | ex2 = Expression(z_outcomes, body | as_outcomes) 100 | logger.info(f"computing sub-query: {ex2}") 101 | p_z = inference(ex2, model) 102 | 103 | probability += p_y_x_z * p_z 104 | 105 | model.graph().reset_disabled() 106 | return probability 107 | -------------------------------------------------------------------------------- /do/core/Variables.py: -------------------------------------------------------------------------------- 1 | from re import findall, sub 2 | 3 | 4 | class Outcome: 5 | """ 6 | A basic "Outcome" of a variable, representing a specific outcome such as "X = x". 7 | This does essentially act as a Pair-like. 8 | """ 9 | 10 | def __init__(self, name: str, outcome: str): 11 | """ 12 | Constructor for an Outcome 13 | @param name: The name of the variable. Ex: "X" 14 | @param outcome: The specific outcome of the variable. Ex: "x" or "~x" 15 | """ 16 | self.name = name.strip() 17 | self.outcome = outcome.strip() 18 | 19 | def __str__(self) -> str: 20 | return self.name + " = " + self.outcome 21 | 22 | def __hash__(self) -> int: 23 | return hash(self.name + self.outcome) 24 | 25 | def __copy__(self): 26 | return Outcome(self.name, self.outcome) 27 | 28 | def copy(self): 29 | return self.__copy__() 30 | 31 | def __eq__(self, other) -> bool: 32 | if isinstance(other, str): 33 | return self.name == other 34 | return self.name == other.name and self.outcome == other.outcome and type(self) == type(other) 35 | 36 | 37 | class Variable: 38 | """ 39 | Represents a basic "Variable", as part of a Conditional Probability Table or the like. 40 | Has a name, list of potential outcomes, and some list of parent variables. 41 | """ 42 | 43 | def __init__(self, name: str, outcomes: list, parents: list, descendants=None, topological_order=0): 44 | """ 45 | A basic Variable for use in a CPT or Causal Graph 46 | @param name: The name of the Variable, "X" 47 | @param outcomes: A list of all potential outcomes of the variable: ["x", "~x"] 48 | @param parents: A list of strings representing the names of all the parents of this Variable 49 | @param descendants: An optional set of Variables which are reachable from this Variable 50 | @param topological_order: Used in the ordering of Variables as defined by a topological sort 51 | """ 52 | self.name = name.strip() 53 | self.outcomes = [outcome.strip() for outcome in outcomes] 54 | self.parents = [parent.strip() for parent in parents] 55 | self.topological_order = topological_order 56 | 57 | if descendants is None: 58 | descendants = set() 59 | self.descendants = descendants 60 | 61 | def __str__(self) -> str: 62 | return self.name + ": <" + ",".join(self.outcomes) + ">, <-- " + ",".join(self.parents) 63 | 64 | def __hash__(self) -> int: 65 | return hash(self.name + str(self.outcomes) + str(self.parents)) 66 | 67 | def __eq__(self, other) -> bool: 68 | if isinstance(other, str): 69 | return self.name == other 70 | 71 | return self.name == other.name and \ 72 | set(self.outcomes) == set(other.outcomes) and \ 73 | set(self.parents) == set(other.parents) 74 | 75 | def __copy__(self): 76 | return Variable(self.name, self.outcomes.copy(), self.parents.copy(), descendants=self.descendants.copy()) 77 | 78 | def copy(self): 79 | return self.__copy__() 80 | 81 | 82 | class Intervention(Outcome): 83 | """ 84 | Represents an intervention; do(X). 85 | """ 86 | 87 | def __init__(self, name: str, fixed_outcome: str): 88 | super().__init__(name, fixed_outcome) 89 | 90 | def __str__(self) -> str: 91 | return "do(" + self.name + "=" + self.outcome + ")" 92 | 93 | def __hash__(self): 94 | return hash(self.name + self.outcome) 95 | 96 | def __copy__(self): 97 | return Intervention(self.name, self.outcome) 98 | 99 | def copy(self): 100 | return self.__copy__() 101 | 102 | 103 | def parse_outcomes_and_interventions(line: str) -> set: 104 | """ 105 | Take one string line and parse it into a list of Outcomes and Interventions 106 | @param line: A string representing the query 107 | @return: A list, of Outcomes and/or Interventions 108 | """ 109 | # "do(X=x)", "do(X=x, Y=y)", "do(X=x), do(Y=y)" are all valid ways to write interventions 110 | interventions_preprocessed = findall(r'do\([^do]*\)', line) 111 | interventions_preprocessed = [item.strip("do(), ") for item in interventions_preprocessed] 112 | interventions = [] 113 | for string in interventions_preprocessed: 114 | interventions.extend([item.strip(", ") for item in string.split(", ")]) 115 | 116 | # Remove all the interventions, leaving only specific Outcomes 117 | outcomes_preprocessed = sub(r'do\([^do]*\)', '', line).strip(", ").split(",") 118 | outcomes_preprocessed = [item.strip(", ") for item in outcomes_preprocessed] 119 | outcomes = [string for string in outcomes_preprocessed if string] 120 | 121 | # Convert the outcome and intervention strings into the specific Outcome and Intervention classes 122 | outcomes = [Outcome(item.split("=")[0].strip(), item.split("=")[1].strip()) for item in outcomes] 123 | interventions = [Intervention(item.split("=")[0].strip(), item.split("=")[1].strip()) for item in interventions] 124 | 125 | together = [] 126 | together.extend(outcomes) 127 | together.extend(interventions) 128 | 129 | return set(together) 130 | -------------------------------------------------------------------------------- /tests/core/test_Graph.py: -------------------------------------------------------------------------------- 1 | from do.core.Variables import Outcome, Intervention, Variable 2 | from do.core.Graph import to_label 3 | 4 | from ..source import models 5 | graph = models["pearl-3.4.yml"]._g 6 | 7 | 8 | def test_roots(): 9 | assert sum(map(lambda v: len(graph.parents(v)), graph.roots())) == 0 10 | 11 | 12 | def test_descendants(): 13 | assert sum(map(lambda v: len(graph.children(v)), graph.sinks())) == 0 14 | 15 | 16 | def test_parents(): 17 | graph.reset_disabled() 18 | roots = graph.roots() 19 | for vertex in graph.v: 20 | parents = graph.parents(vertex) 21 | for parent in parents: 22 | assert (parent, vertex) in graph.e 23 | 24 | if vertex in roots: 25 | assert len(parents) == 0 26 | else: 27 | assert len(parents) > 0 28 | 29 | 30 | def test_children(): 31 | graph.reset_disabled() 32 | for vertex in graph.v: 33 | children = graph.children(vertex) 34 | for child in children: 35 | assert (vertex, child) in graph.e 36 | 37 | for child in children: 38 | assert vertex in graph.parents(child) 39 | 40 | 41 | def test_ancestors(): 42 | graph.reset_disabled() 43 | for vertex in graph.v: 44 | ancestors = graph.ancestors(vertex) 45 | for ancestor in ancestors: 46 | assert vertex in graph.descendants(ancestor) 47 | 48 | 49 | def test_reach(): 50 | graph.reset_disabled() 51 | for vertex in graph.v: 52 | descendants = graph.descendants(vertex) 53 | for descendant in descendants: 54 | assert vertex in graph.ancestors(descendant) 55 | 56 | 57 | def test_disable_outgoing(): 58 | 59 | graph.reset_disabled() 60 | 61 | for v in graph.v: 62 | children = graph.children(v) 63 | descendants = graph.descendants(v) 64 | graph.disable_outgoing(v) 65 | assert len(graph.children(v)) == 0 66 | assert len(graph.descendants(v)) == 0 67 | for child in children: 68 | assert v not in graph.parents(child) 69 | for descendant in descendants: 70 | assert v not in graph.ancestors(descendant) 71 | 72 | graph.reset_disabled() 73 | 74 | 75 | def test_disable_incoming(): 76 | 77 | graph.reset_disabled() 78 | 79 | for v in graph.v: 80 | parents = graph.parents(v) 81 | ancestors = graph.ancestors(v) 82 | graph.disable_incoming(v) 83 | assert len(graph.parents(v)) == 0 84 | assert len(graph.ancestors(v)) == 0 85 | for parent in parents: 86 | assert v not in graph.children(parent) 87 | for ancestor in ancestors: 88 | assert v not in graph.descendants(ancestor) 89 | 90 | graph.reset_disabled() 91 | 92 | 93 | def test_topology_sort(): 94 | 95 | topology = graph.topology_sort() 96 | 97 | print(topology) 98 | 99 | for i, v in enumerate(topology): 100 | for before in topology[:i]: 101 | assert before not in graph.descendants(v) 102 | 103 | for after in topology[i:]: 104 | assert after not in graph.ancestors(v) 105 | 106 | 107 | def test_graph_copy(): 108 | 109 | graph_2 = graph.copy() 110 | 111 | assert len(graph.v) == len(graph_2.v) 112 | assert len(graph.e) == len(graph_2.e) 113 | 114 | assert graph.v is not graph_2.v 115 | assert graph.e is not graph_2.e 116 | 117 | for v in graph.v: 118 | assert v in graph_2.v 119 | 120 | for v in graph_2.v: 121 | assert v in graph.v 122 | 123 | for e in graph.e: 124 | assert e in graph_2.e 125 | 126 | for e in graph_2.e: 127 | assert e in graph.e 128 | 129 | 130 | def test_without_incoming_edges(): 131 | 132 | g = graph.copy() 133 | 134 | roots = g.roots() 135 | root_children = set().union(*[g.children(x) for x in roots]) 136 | 137 | nop = g.without_incoming_edges(roots) # roots have no incoming; should change nothing 138 | op = g.without_incoming_edges(root_children) # sever initial roots 139 | 140 | assert g.v == nop.v and g.e == nop.e # ensure no change 141 | 142 | assert g.v == op.v 143 | assert g.e != op.e 144 | assert len(g.e) > len(op.e) 145 | assert len(op.roots()) > len(g.roots()) 146 | assert op.roots() == set(g.roots()) | root_children 147 | 148 | 149 | def test_without_outgoing_edges(): 150 | 151 | g = graph.copy() 152 | 153 | sinks = g.sinks() 154 | sink_parents = set().union(*[g.parents(x) for x in sinks]) 155 | 156 | nop = g.without_outgoing_edges(sinks) # sinks have no outgoing; should change nothing 157 | op = g.without_outgoing_edges(sink_parents) # sever initial sinks 158 | 159 | assert g.v == nop.v and g.e == nop.e # ensure no change 160 | 161 | assert g.v == op.v 162 | assert g.e != op.e 163 | assert len(g.e) > len(op.e) 164 | assert len(op.sinks()) > len(g.sinks()) 165 | assert op.sinks() == set(g.sinks()) | sink_parents 166 | 167 | 168 | def test_to_label(): 169 | outcome = Outcome("Xj", "xj") 170 | intervention = Intervention("Xj", "xj") 171 | variable = Variable("Xj", [], []) 172 | 173 | assert to_label(outcome) == outcome.name 174 | assert to_label(intervention) == intervention.name 175 | assert to_label(variable) == variable.name 176 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | braden.dubois@usask.ca. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified perid of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /do/core/Graph.py: -------------------------------------------------------------------------------- 1 | from typing import Collection, Optional, Sequence, Set, Tuple, Union 2 | 3 | from .Types import VClass, Vertex 4 | 5 | 6 | class Graph: 7 | 8 | """A basic graph, with edge control.""" 9 | 10 | def __init__(self, v: Set[str], e: Set[Tuple[str, str]], topology: Optional[Sequence[Union[str, VClass]]] = None): 11 | """ 12 | Initializer for a basic Graph. 13 | @param v: A set of vertices 14 | @param e: A set of edges, each edge being (source, target) 15 | @param topology: An optional sequence of vertices defining the topological ordering of the graph 16 | """ 17 | 18 | self.v = v 19 | self.e = {(s.strip(), t.strip()) for s, t in e} 20 | 21 | # Declare the keys (which are vertices) 22 | self.incoming = {vertex.strip(): set() for vertex in v} 23 | self.outgoing = {vertex.strip(): set() for vertex in v} 24 | 25 | for s, t in e: 26 | self.outgoing[s].add(t) 27 | self.incoming[t].add(s) 28 | 29 | self.outgoing_disabled = set() 30 | self.incoming_disabled = set() 31 | 32 | if not topology: 33 | topology = self.topology_sort() 34 | else: 35 | topology = list(filter(lambda x: x in v, topology)) 36 | 37 | self.topology_map = {vertex: index for index, vertex in enumerate(topology, start=1)} 38 | 39 | def __str__(self) -> str: 40 | """ 41 | String builtin for the Graph class 42 | @return: A string representation of the given Graph instance 43 | """ 44 | msg = "Vertices: " + ", ".join(sorted(i for i in self.v)) + "\n" 45 | msg += "Edges:\n" + "\n".join(" -> ".join(i for i in edge) for edge in self.e) 46 | return msg 47 | 48 | def roots(self) -> Collection[str]: 49 | """ 50 | Get the roots of the the graph G. 51 | @return: A set of vertices (strings) in G that have no parents. 52 | """ 53 | return set([x for x in self.v if len(self.parents(x)) == 0]) 54 | 55 | def sinks(self) -> Collection[str]: 56 | """ 57 | Get the sinks of the graph G. 58 | @return: A collection of string vertices in G that have no descendants. 59 | """ 60 | return set([x for x in self.v if len(self.children(x)) == 0]) 61 | 62 | def parents(self, v: Vertex) -> Collection[Vertex]: 63 | """ 64 | Get the parents of v, which may actually be currently controlled 65 | @param v: A variable in our graph 66 | @return: All parents reachable (which would be none if being controlled) 67 | """ 68 | label = to_label(v) 69 | if label in self.incoming_disabled: 70 | return set() 71 | 72 | return {p for p in self.incoming[label] if p not in self.outgoing_disabled and p not in self.outgoing[label]} 73 | 74 | def children(self, v: Vertex) -> Collection[Vertex]: 75 | """ 76 | Get the children of v, which may actually be currently controlled 77 | @param v: A variable in our graph 78 | @return: All children reachable (which would be none if being controlled) 79 | """ 80 | label = to_label(v) 81 | if label in self.outgoing_disabled: 82 | return set() 83 | 84 | return {c for c in self.outgoing[label] if c not in self.incoming_disabled and c not in self.incoming[label]} 85 | 86 | def ancestors(self, v: Vertex) -> Collection[Vertex]: 87 | """ 88 | Get the ancestors of v, accounting for disabled vertices 89 | @param v: The vertex to find all ancestors of 90 | @return: A set of reachable ancestors of v 91 | """ 92 | 93 | ancestors = set() 94 | queue = [] 95 | queue.extend(self.parents(v)) 96 | 97 | while queue: 98 | current = queue.pop(0) 99 | ancestors.add(current) 100 | queue.extend(self.parents(current)) 101 | 102 | return ancestors 103 | 104 | def descendants(self, v: Vertex) -> Collection[Vertex]: 105 | """ 106 | Get the reach of v, accounting for disabled vertices 107 | @param v: The vertex to find all descendants of 108 | @return: A set of reachable descendants of v 109 | """ 110 | 111 | children = set() 112 | queue = [] 113 | queue.extend(list(self.children(v))) 114 | 115 | while queue: 116 | current = queue.pop(0) 117 | children.add(current) 118 | queue.extend(list(self.children(current))) 119 | 120 | return children 121 | 122 | def disable_outgoing(self, *disable: Vertex): 123 | """ 124 | Disable the given vertices' outgoing edges 125 | @param disable: Any number of vertices to disable 126 | """ 127 | for v in disable: 128 | self.outgoing_disabled.add(to_label(v)) 129 | 130 | def disable_incoming(self, *disable: Vertex): 131 | """ 132 | Disable the given vertices' incoming edges 133 | @param disable: Any number of vertices to disable 134 | """ 135 | for v in disable: 136 | self.incoming_disabled.add(to_label(v)) 137 | 138 | def reset_disabled(self): 139 | """ 140 | Clear and reset all the disabled edges, restoring the graph 141 | """ 142 | self.outgoing_disabled.clear() 143 | self.incoming_disabled.clear() 144 | 145 | def get_topology(self, v: Vertex) -> int: 146 | """ 147 | Determine the "depth" a given Variable is at in a topological sort of the graph 148 | @param v: The variable to determine the depth of 149 | @return: Some non-negative integer representing the depth of this variable 150 | """ 151 | return self.topology_map[to_label(v)] 152 | 153 | def copy(self): 154 | """ 155 | Public copy method; copies v, e, and the disabled sets 156 | @return: A copied Graph 157 | """ 158 | return self.__copy__() 159 | 160 | def __copy__(self): 161 | """ 162 | Copy builtin allowing the Graph to be copied 163 | @return: A copied Graph 164 | """ 165 | copied = Graph(self.v.copy(), set(self.e.copy())) 166 | copied.incoming_disabled = self.incoming_disabled.copy() 167 | copied.outgoing_disabled = self.outgoing_disabled.copy() 168 | return copied 169 | 170 | def __getitem__(self, v: set): 171 | """ 172 | Compute a subset V of some Graph G. 173 | :param v: A set of variables in G. 174 | :return: A Graph representing the subgraph G[V]. 175 | """ 176 | return Graph({s for s in self.v if s in v}, {s for s in self.e if s[0] in v and s[1] in v}) 177 | 178 | def descendant_first_sort(self, variables: Collection[Vertex]) -> Sequence[Vertex]: 179 | """ 180 | A helper function to "sort" a list of Variables/Outcomes/Interventions such that no element has a 181 | "parent"/"ancestor" to its left 182 | @param variables: A list of any number of Variable/Outcome/Intervention instances 183 | @return: A sorted list, such that any instance has no ancestor earlier in the list 184 | """ 185 | return sorted(variables, key=lambda v: self.get_topology(v)) 186 | 187 | def topology_sort(self) -> Sequence[str]: 188 | 189 | topology = [] 190 | v = self.v.copy() 191 | e = self.e.copy() 192 | 193 | while len(v) > 0: 194 | 195 | roots = set(filter(lambda t: not any((s, t) in e for s in v), v)) 196 | assert len(roots) > 0 197 | 198 | topology.extend(sorted(list(roots))) 199 | v -= roots 200 | e -= set(filter(lambda edge: edge[0] in roots, e)) 201 | 202 | return topology 203 | 204 | def without_incoming_edges(self, x: Collection[Vertex]): 205 | 206 | v = self.v.copy() 207 | e = {(s, t) for (s, t) in self.e if t not in x} 208 | 209 | return Graph(v, e) 210 | 211 | def without_outgoing_edges(self, x: Collection[Vertex]): 212 | 213 | v = self.v.copy() 214 | e = {(s, t) for (s, t) in self.e if s not in x} 215 | 216 | return Graph(v, e) 217 | 218 | 219 | def to_label(item: VClass) -> str: 220 | """ 221 | Convert a variable to its string name, if not already provided as such 222 | @param item: The item to convert, either a string (done) or some Variable 223 | @return: A string name of the given item, if not already provided as a string 224 | """ 225 | return item.strip("'") if isinstance(item, str) else item.name.strip("'") 226 | -------------------------------------------------------------------------------- /do/identification/LatentGraph.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from typing import List, Iterable, Set, Tuple 3 | 4 | from ..core.Graph import Graph 5 | 6 | 7 | class LatentGraph(Graph): 8 | 9 | def __init__(self, vertices: Set[str], edges: Set[Tuple[str, str]], e_bidirected: Set[Tuple[str, str]], fixed_topology: List[str] = None): 10 | super().__init__(vertices, edges, fixed_topology) 11 | self.e_bidirected = e_bidirected.copy() 12 | self.V = vertices 13 | self.C = self.make_components() 14 | 15 | # Allows passing a topology down to a subgraph 16 | if fixed_topology: 17 | 18 | # filter any vertices from the given topology that don't exist as vertices in the graph 19 | filtered_topology = [x for x in fixed_topology if x in vertices] 20 | 21 | # ensure topology fully represents the graph 22 | assert all(x in vertices for x in filtered_topology), "vertex in the given topology is not in the graph!" 23 | assert all(x in filtered_topology for x in vertices), "vertex in the graph is not in the given topology!" 24 | 25 | self.v_Pi = filtered_topology 26 | 27 | # Otherwise, generate it 28 | else: 29 | self.v_Pi = self.__kahns() 30 | 31 | def __str__(self): 32 | return f"Graph: V = {', '.join(self.v)}, E = {', '.join(list(map(str, self.e)))}, E (Bidirected) = {', '.join(list(map(str, self.e_bidirected)))}" 33 | 34 | def __getitem__(self, v: Set[str]): 35 | e = {(s, t) for (s, t) in self.e if s in v and t in v} 36 | e_bidirected = {(s, t) for (s, t) in self.e_bidirected if s in v and t in v} 37 | return LatentGraph(self.v & v, e, e_bidirected, self.v_Pi) 38 | 39 | def __eq__(self, other): 40 | if not isinstance(other, LatentGraph): 41 | return False 42 | 43 | return self.v == other.v and self.e == other.e and \ 44 | all([(e[0], e[1]) in other.e_bidirected or (e[1], e[0]) in other.e_bidirected for e in self.e_bidirected]) and \ 45 | all([(e[0], e[1]) in self.e_bidirected or (e[1], e[0]) in self.e_bidirected for e in other.e_bidirected]) 46 | 47 | def biadjacent(self, v: str): 48 | return {e[0] if e[0] != v else e[1] for e in self.e_bidirected if v in e} 49 | 50 | def ancestors(self, y: Set[str]): 51 | ans = y.copy() 52 | for v in y: 53 | for p in self.parents(v): 54 | ans |= self.ancestors({p}) 55 | return ans 56 | 57 | # puts nodes in topological ordering 58 | def __kahns(self): 59 | 60 | edges = self.e.copy() 61 | vertices = self.v.copy() 62 | v_Pi = [] 63 | 64 | s = vertices - ({e[0] for e in edges} | {e[1] for e in edges}) 65 | s |= set([e[0] for e in edges if e[0] not in {g[1] for g in edges}]) 66 | s = list(s) 67 | 68 | while s: 69 | n = s.pop() 70 | v_Pi.append(n) 71 | 72 | ms = {e[1] for e in edges if e[0] == n} 73 | for m in ms: 74 | edges.remove((n, m)) 75 | if {e for e in edges if e[1] == m} == set(): 76 | s.append(m) 77 | 78 | return v_Pi 79 | 80 | def make_components(self): 81 | 82 | ans = [] 83 | all_v = self.v.copy() 84 | visited = set() 85 | 86 | while all_v: 87 | start = all_v.pop() 88 | component = [] 89 | q = [start] 90 | 91 | while q: 92 | v = q.pop(0) 93 | if v not in visited: 94 | visited.add(v) 95 | component.append(v) 96 | q.extend([vs for vs in self.biadjacent(v) if vs not in visited]) 97 | 98 | if component: 99 | ans.append(set(component)) 100 | 101 | return ans 102 | 103 | def without_incoming(self, x: Iterable[str]): 104 | # return Graph(self.Edges - {edge for edge in self.Edges if edge[1] in x and edge[2] == "->"}, self.V) 105 | return LatentGraph(self.v, self.e - {e for e in self.e if e[1] in x}, self.e_bidirected, self.v_Pi) 106 | 107 | def collider(self, v1, v2, v3): 108 | return v1 in self.V and v2 in self.V and v3 in self.V and v1 in self.parents(v2) and v3 in self.children(v2) 109 | 110 | def all_paths(self, x: Iterable[str], y: Iterable[str]): 111 | 112 | def path_list(s, t): # returns all paths from X to Y regardless of direction of link (no bd links) 113 | 114 | # generate a fake variable to represent unobservable variables 115 | UNOBSERVABLE = "U" 116 | while UNOBSERVABLE in self.V: 117 | UNOBSERVABLE += "U" 118 | 119 | from_s_s = [[s]] 120 | ans = [] 121 | while from_s_s: 122 | from_s = from_s_s.pop(0) 123 | 124 | # Directed links 125 | for each in set(self.parents(from_s[-1])) | set(self.children(from_s[-1])): 126 | if each == t: 127 | path = from_s.copy() 128 | path.append(t) 129 | ans.append(path) 130 | elif each not in from_s: 131 | r = from_s.copy() 132 | r.append(each) 133 | from_s_s.append(r) 134 | 135 | # Bidirected links 136 | for each in self.biadjacent(from_s[-1]): 137 | if each == t: 138 | path = from_s.copy() 139 | path.append(UNOBSERVABLE) 140 | path.append(t) 141 | ans.append(path) 142 | elif each not in from_s: 143 | r = from_s.copy() 144 | r.append(UNOBSERVABLE) 145 | r.append(each) 146 | from_s_s.append(r) 147 | return ans 148 | 149 | return [path_list(q, w) for q, w in product(x, y)] 150 | 151 | def ci(self, x: Set[str], y: Set[str], z: Set[str]): 152 | paths = self.all_paths(x, y) 153 | for path_pair in paths: 154 | for path in path_pair: 155 | broke = False 156 | for idx, element in enumerate(path): 157 | if 0 < idx < len(path) - 1: 158 | if self.collider(path[idx - 1], element, path[idx + 1]): 159 | return False 160 | if element in z: 161 | broke = True 162 | break 163 | if not broke: 164 | return False 165 | return True 166 | 167 | 168 | def latent_transform(g: Graph, u: Set[str]): 169 | 170 | V = g.v.copy() 171 | E = set(g.e.copy()) 172 | E_Bidirected = set() 173 | 174 | Un = u.copy() 175 | 176 | # Collapse unobservable variables, such as U1 -> U2 -> V ==> U1 -> V 177 | reduction = True 178 | while reduction: 179 | reduction = False 180 | 181 | remove = set() 182 | for un in Un: 183 | 184 | parents = [edge[0] for edge in E if edge[1] == un] # Edges : parent -> u 185 | children = [edge[1] for edge in E if edge[0] == un] # Edges : u -> child 186 | 187 | # All parents are unobservable, all children are observable, at least one parent 188 | if all(x in u for x in parents) and len(parents) > 0 and all(x not in u for x in children): 189 | reduction = True 190 | 191 | # Remove edges from parents to u 192 | for parent in parents: 193 | E.remove((parent, un)) 194 | 195 | # Remove edges from u to children 196 | for child in children: 197 | E.remove((un, child)) 198 | 199 | # Replace with edge from edge parent to each child 200 | for cr in product(parents, children): 201 | E.add((cr[0], cr[1])) 202 | 203 | # U can be removed entirely from graph 204 | remove.add(un) 205 | 206 | V -= remove 207 | Un -= remove 208 | 209 | # Convert all remaining unobservable to a list to iterate through 210 | Un = list(Un) 211 | 212 | # Replace each remaining unobservable with bi-directed arcs between its children 213 | while len(Un) > 0: 214 | 215 | # Take one "current" unobservable to remove, and remove it from the graph entirely 216 | cur = Un.pop() 217 | V.remove(cur) 218 | 219 | assert len([edge for edge in E if edge[1] == cur]) == 0, \ 220 | "Unobservable still had parent left." 221 | 222 | # All outgoing edges of this unobservable 223 | child_edges = {edge for edge in E if edge[0] == cur} 224 | E -= child_edges 225 | 226 | # Replace all edges from this unobservable to its children with bidirected arcs 227 | child_edges = list(child_edges) 228 | for i in range(len(child_edges)): 229 | a, b = child_edges[i], child_edges[(i + 1) % len(child_edges)] 230 | E_Bidirected.add((a[1], b[1])) 231 | 232 | print(V, u) 233 | return LatentGraph(V, E, E_Bidirected, [x for x in g.topology_sort() if x in V - u]) 234 | -------------------------------------------------------------------------------- /do/deconfounding/Backdoor.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from typing import Collection, List, Optional 3 | 4 | from ..core.Graph import Graph 5 | from ..core.Types import Path, Vertex 6 | from ..core.Exceptions import IntersectingSets 7 | 8 | from ..core.helpers import disjoint, minimal_sets, power_set 9 | 10 | 11 | def backdoors(src: Collection[Vertex], dst: Collection[Vertex], graph: Graph, dcf: Optional[Collection[Vertex]] = None) -> Collection[Path]: 12 | """ 13 | Get all possible backdoor paths between some source set of vertices in the internal graph to any vertices in 14 | some destination set of vertices. A given (possibly empty) set of deconfounding vertices may serve to block, or 15 | even open, some backdoor paths. 16 | @param src: The source set of (string) vertices to search for paths from 17 | @param dst: The destination set of (string) vertices to search from src towards. 18 | @param dcf: An optional set of (string) vertices that may serve as a sufficient deconfounding set to block or open 19 | backdoor paths. 20 | @return: A list of lists, where each sublist contains a backdoor path, the first and last element being a 21 | vertex from src and dst, respectively, with all vertices between representing the path. All elements are 22 | string vertices. 23 | """ 24 | 25 | src_str = str_map(src) 26 | dst_str = str_map(dst) 27 | dcf_str = str_map(dcf) if dcf else set() 28 | 29 | if not disjoint(src_str, dst_str, dcf_str): 30 | raise IntersectingSets 31 | 32 | paths = [] 33 | 34 | # Use the product of src, dst to try each possible pairing 35 | for s, t in product(src_str, dst_str): 36 | paths += _backdoor_paths_pair(s, t, graph, dcf_str) 37 | 38 | return paths 39 | 40 | 41 | def deconfound(src: Collection[Vertex], dst: Collection[Vertex], graph: Graph) -> Collection[Collection[Vertex]]: 42 | 43 | src_str = str_map(src) 44 | dst_str = str_map(dst) 45 | 46 | # Can't use anything in src, dst, or any descendant of any vertex in src as a deconfounding/blocking vertex 47 | disallowed_vertices = src_str | dst_str | set().union(*[graph.descendants(s) for s in src_str]) 48 | 49 | valid_deconfounding_sets = list() 50 | 51 | # Candidates deconfounding sets remaining are the power set of all the possible remaining vertices 52 | for tentative_dcf in power_set(graph.v - disallowed_vertices): 53 | 54 | # Tentative, indicating that no specific cross product in this subset has yet yielded any backdoor paths 55 | any_backdoor_paths = False 56 | 57 | # Cross represents one (x in X, y in Y) tuple 58 | for s, t in product(src_str, dst_str): 59 | 60 | # Get any/all backdoor paths for this particular pair of vertices in src,dst with given potential 61 | # deconfounding set 62 | if len(_backdoor_paths_pair(s, t, graph, set(tentative_dcf))) > 0: 63 | any_backdoor_paths = True 64 | break 65 | 66 | # None found in any cross product -> Valid subset 67 | if not any_backdoor_paths: 68 | valid_deconfounding_sets.append(tentative_dcf) 69 | 70 | return list(minimal_sets(*valid_deconfounding_sets)) 71 | 72 | 73 | def all_paths_cumulative(s: str, t: str, path: list, path_list: list, graph: Graph) -> Collection[Path]: 74 | """ 75 | Return a list of lists of all paths from a source to a target, with conditional movement from child to parent, 76 | or parent to child. 77 | This is a modified version of the graph-traversal algorithm provided by Dr. Eric Neufeld. 78 | @param s: A source (string) vertex defined in the graph. 79 | @param t: A target (string) destination vertex defined in the graph. 80 | @param path: A list representing the current path at any given point in the traversal. 81 | @param path_list: A list which will contain lists of paths from s to t. 82 | @return: A list of lists of Variables, where each sublist denotes a path from s to t . 83 | """ 84 | if s == t: 85 | return path_list + [path + [t]] 86 | if s not in path: 87 | for child in graph.children(s): 88 | path_list = all_paths_cumulative(child, t, path + [s], path_list, graph) 89 | return path_list 90 | 91 | 92 | def independent(src: Collection[Vertex], dst: Collection[Vertex], dcf: Optional[Collection[Vertex]], graph: Graph) -> bool: 93 | """ 94 | Helper function that makes some do_calculus logic more readable; determine if two sets are independent, given 95 | some third set. 96 | @param src: A source set (of strings) X, to be independent from Y 97 | @param dst: A destination set (of strings) Y, to be independent from X 98 | @param dcf: A deconfounding set (of strings) Z, to block paths between X and Y 99 | @return: True if there are no backdoor paths and no straight-line paths, False otherwise 100 | """ 101 | 102 | src_str = str_map(src) 103 | dst_str = str_map(dst) 104 | dcf_str = str_map(dcf) if dcf else set() 105 | 106 | # Not independent if there are any unblocked backdoor paths 107 | if len(backdoors(src_str, dst_str, graph, dcf_str)) > 0: 108 | return False 109 | 110 | # Ensure no straight-line variables from any X -> Y or Y -> X 111 | for s, t in product(src_str, dst_str): 112 | if len(all_paths_cumulative(s, t, [], [], graph)) != 0: 113 | return False # x -> y 114 | if len(all_paths_cumulative(t, s, [], [], graph)) != 0: 115 | return False # y -> x 116 | 117 | # No paths, must be independent 118 | return True 119 | 120 | 121 | def _backdoor_paths_pair(s: Collection[str], t: Collection[str], graph: Graph, dcf: Collection[str]) -> List[Path]: 122 | """ 123 | Find all backdoor paths between any particular pair of vertices in the loaded graph 124 | @param s: A source (string) vertex in the graph 125 | @param t: A destination (string) vertex in the graph 126 | @param dcf: A set of (string) variables, by which movement through any variable is controlled. This can serve 127 | as a sufficient "blocking" set, or may open additional backdoor paths 128 | @return Return a list of lists, where each sublist is a path of string vertices connecting s and t. 129 | Endpoints s and t are the first and last elements of any sublist. 130 | """ 131 | 132 | def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") -> list: 133 | """ 134 | Return a list of lists of all paths from a source to a target, with conditional movement of either 135 | child to parent or parent to child. This may include an edge case that is not a backdoor path, which 136 | is filtered in the parent function, otherwise all paths will be backdoor paths. 137 | This is a heavily modified version of the graph-traversal algorithm provided by Dr. Eric Neufeld. 138 | @param cur: The current (string) vertex we are at in a traversal. 139 | @param path: The current path from s, our source. 140 | @param path_list: A list of lists, each sublist being a path discovered so far. 141 | @param previous: Whether moving from the previous variable to current we moved "up" (child to parent) or 142 | "down" (from parent to child); this movement restriction is involved in backdoor path detection 143 | @return: A list of lists, where each sublist is a path from s to t. 144 | """ 145 | 146 | # Reached target 147 | if cur == t: 148 | return path_list + [path + [t]] 149 | 150 | # No infinite loops 151 | if cur not in path: 152 | 153 | if previous == "down": 154 | 155 | # We can ascend on a controlled collider, OR an ancestor of a controlled collider 156 | if cur in dcf or any(map(lambda v: v in dcf, graph.descendants(cur))): 157 | for parent in graph.parents(cur): 158 | path_list = get_backdoor_paths(parent, path + [cur], path_list, "up") 159 | 160 | # We can *continue* to descend on a non-controlled variable 161 | if cur not in dcf: 162 | for child in graph.children(cur): 163 | path_list = get_backdoor_paths(child, path + [cur], path_list, "down") 164 | 165 | if previous == "up" and cur not in dcf: 166 | 167 | # We can ascend on a non-controlled variable 168 | for parent in graph.parents(cur): 169 | path_list = get_backdoor_paths(parent, path + [cur], path_list, "up") 170 | 171 | # We can descend on a non-controlled reverse-collider 172 | for child in graph.children(cur): 173 | path_list = get_backdoor_paths(child, path + [cur], path_list, "down") 174 | 175 | return path_list 176 | 177 | # Get all possible backdoor paths 178 | backdoor_paths = get_backdoor_paths(s, [], []) 179 | 180 | # Filter out the paths that don't "enter" x; see the definition of a backdoor path 181 | return list(filter(lambda l: len(l) > 2 and l[0] in graph.children(l[1]) and l[1] != t, backdoor_paths)) 182 | 183 | 184 | def str_map(to_filter: Collection[Vertex]): 185 | return set(map(lambda v: v if isinstance(v, str) else v.name, to_filter)) 186 | -------------------------------------------------------------------------------- /tests/identification/test_LatentGraph.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | from do.identification.Identification import Identification, simplify_expression 4 | from do.identification.LatentGraph import LatentGraph 5 | from do.identification.PExpression import PExpression, TemplateExpression 6 | 7 | 8 | def parse_graph_string(graph_string: str) -> LatentGraph: 9 | 10 | from re import split 11 | 12 | arrows = ["<->", "<-", "->"] 13 | 14 | splits = graph_string.strip().split(".")[:-1] 15 | print(splits) 16 | 17 | e = set() 18 | v = set() 19 | e_b = set() 20 | 21 | for item in splits: 22 | 23 | # This would be an item like "X." or "X,Y". in which the vertices exist, but don't have any edges. 24 | if not any(arrow in item for arrow in arrows): 25 | v.update(item.split(",")) 26 | 27 | parse = split(f'({"|".join(arrows)})', item) 28 | 29 | for i in range(1, len(parse), 2): 30 | 31 | # Left and Right are comma-separated lists of values, arrow being the "->" -style arrow joining them. 32 | left, arrow, right = parse[i-1].split(","), parse[i], parse[i+1].split(",") 33 | 34 | # Add all vertices into V 35 | v.update(left) 36 | v.update(right) 37 | 38 | for s, t in product(left, right): 39 | 40 | if arrow == "<-": 41 | e.add((t, s)) 42 | 43 | elif arrow == "->": 44 | e.add((s, t)) 45 | 46 | elif arrow == "<->": 47 | e_b.add((s, t)) 48 | 49 | else: 50 | print("Invalid Arrow Type:", arrow) 51 | 52 | return LatentGraph(v, e, e_b) 53 | 54 | 55 | ######################################### 56 | # graph 1 57 | ######################################### 58 | 59 | g_1 = LatentGraph({'C', 'S', 'M'}, {('M', 'S'), ('S', 'C'), ('M', 'C')}, set()) 60 | g_1_string = "M->S.S,M->C." 61 | 62 | g1_q1 = ({'C'}, {'S'}) 63 | g1_q2 = ({'C'}, {'M'}) 64 | g1_q3 = ({'C'}, {'S', 'M'}) 65 | 66 | g1_a1 = "C | S = " 67 | g1_a2 = "C | M = " 68 | g1_a3 = "C | S, M = [C|M,S]" 69 | 70 | g1_queries = [g1_q1, g1_q2, g1_q3] 71 | g1_answers = [g1_a1, g1_a2, g1_a3] 72 | 73 | 74 | 75 | ######################################### 76 | # queries - graph 2 77 | ######################################### 78 | 79 | g_2 = LatentGraph({'A', 'B', 'C', 'D'}, {('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')}, set()) 80 | g_2_string = "A->B,C.B,C->D." 81 | 82 | g2_q1 = ({'D'}, {'A'}) 83 | g2_q2 = ({'D'}, {'B'}) 84 | g2_q3 = ({'D'}, {'C'}) 85 | g2_q4 = ({'D'}, {'B', 'C'}) 86 | 87 | g2_a1 = "D | A = " 88 | g2_a2 = "D | B = " 89 | g2_a3 = "D | C = " 90 | g2_a4 = "D | C, B = [D|A,B,C]" 91 | 92 | g2_queries = [g2_q1, g2_q2, g2_q3, g2_q4] 93 | g2_answers = [g2_a1, g2_a2, g2_a3, g2_a4] 94 | 95 | ######################################### 96 | # queries - graph 3 97 | ######################################### 98 | 99 | g_3 = LatentGraph({'B', 'C', 'D'}, {('B', 'D'), ('C', 'D')}, {('B', 'C')}) 100 | g_3_string = "B<->C.B,C->D." 101 | 102 | g3_q1 = ({'D'}, {'B'}) 103 | g3_q2 = ({'D'}, {'C'}) 104 | g3_q3 = ({'D'}, {'B', 'C'}) 105 | 106 | g3_a1 = "D | B = " 107 | g3_a2 = "D | C = " 108 | g3_a3 = "D | C, B = [D|B,C]" 109 | 110 | g3_queries = [g3_q1, g3_q2, g3_q3] 111 | g3_answers = [g3_a1, g3_a2, g3_a3] 112 | 113 | ######################################### 114 | # queries - graph 4 115 | ######################################### 116 | 117 | g_4 = LatentGraph({'S', 'T', 'C'}, {('S', 'T'), ('T', 'C')}, {('S', 'C')}) 118 | g_4_string = "S->T->C.S<->C." 119 | 120 | g4_q1 = ({'C'}, {'S'}) 121 | 122 | g4_a1 = "C | S = " 123 | 124 | g4_queries = [g4_q1] 125 | g4_answers = [g4_a1] 126 | 127 | ######################################### 128 | # queries - graph 5 129 | ######################################### 130 | 131 | g_5 = LatentGraph({"X", "Y", "Z1", "Z2", "Z3"}, 132 | {("Z1", "Z2"), ("X", "Z2"), ("Z2", "Z3"), ("X", "Y"), ("Z2", "Y"), ("Z3", "Y")}, {("X", "Z1"), ("Z1", "Z3")} 133 | ) 134 | g_5_string = "X<->Z1<->Z3.X,Z1->Z2->Z3.X,Z2,Z3->Y." 135 | 136 | g5_q1 = ({"Y"}, {"X"}) # paper: Sum_{X} [P(Z3 | Z2, Z1, X), P(Z1 | X), P(X)] 137 | g5_q2 = ({"Y"}, {"X", "Z1", "Z2", "Z3"}) 138 | 139 | g5_a1 = "Y | X = " 140 | g5_a2 = "Y | Z2, Z1, X, Z3 = [Y|X,Z2,Z3]" 141 | 142 | g5_queries = [g5_q1, g5_q2] 143 | g5_answers = [g5_a1, g5_a2] 144 | 145 | ######################################### 146 | # queries - graph 6 147 | ######################################### 148 | 149 | g_6 = LatentGraph({"X", "Y1", "Y2", "W1", "W2"}, 150 | {("W1", "X"), ("X", "Y1"), ("W2", "Y2")}, {("W1", "W2"), ("W1", "Y1"), ("W2", "X")} 151 | ) 152 | g_6_string = "Y1<->W1<->W2<->X.W1->X->Y1.W2->Y2." 153 | 154 | g6_q1 = ({"Y1", "Y2"}, {"X"}) 155 | g6_a1 = "Y2, Y1 | X = [W2] " 156 | 157 | g6_queries = [g6_q1] 158 | g6_answers = [g6_a1] 159 | 160 | ######################################### 161 | # queries - graph 7 162 | ######################################### 163 | 164 | g_7 = LatentGraph({"Z1", "Z2", "W", "X", "Y"}, 165 | {("Z2", "X"), ("X", "W"), ("W", "Y"), ("Z1", "Y")}, {("Z1", "Z2"), ("Z2", "W"), ("Z1", "W")} 166 | ) 167 | g_7_string = "Z1<->Z2<->W<->Z1.Z2->X->W->Y<-Z1." 168 | 169 | g7_q1 = ({"Y"}, {"X"}) 170 | g7_a1 = "Y | X = " 171 | 172 | g7_queries = [g7_q1] 173 | g7_answers = [g7_a1] 174 | 175 | ######################################### 176 | # queries - graph 8 177 | ######################################### 178 | 179 | g_8 = LatentGraph({'S1', 'T1', 'C1', 'S2', 'T2', 'C2', 'C'}, {('S1', 'T1'), ('T1', 'C1'), ('S2', 'T2'), ('T2', 'C2'), ('C2', 'C'), ('C1', 'C')}, {('S1', 'C1'), ('S2', 'C2')}) 180 | 181 | g_8_string = "S1->T1->C1<->S1.S2->T2->C2<->S2.C2->C<-C1." 182 | 183 | g8_q1 = ({'C1'}, {'S1'}) 184 | g8_q2 = ({'C2'}, {'S2'}) 185 | g8_q3 = ({'C1', 'C2'}, {'S1', 'S2'}) 186 | g8_q4 = ({'C'}, {'S1', 'S2'}) 187 | 188 | g8_a1 = ">" 189 | g8_a2 = ">" 190 | g8_a3 = ">" 191 | g8_a4 = ">" 192 | 193 | g8_queries = [g8_q1, g8_q2, g8_q3, g8_q4] 194 | g8_answers = [g8_a1, g8_a2, g8_a3, g8_a4] 195 | 196 | ######################################### 197 | 198 | all_tests = [ 199 | { 200 | "queries": g1_queries, 201 | "answers": g1_answers, 202 | "g": g_1, 203 | "as_string": g_1_string, 204 | }, { 205 | "queries": g2_queries, 206 | "answers": g2_answers, 207 | "g": g_2, 208 | "as_string": g_2_string, 209 | }, { 210 | "queries": g3_queries, 211 | "answers": g3_answers, 212 | "g": g_3, 213 | "as_string": g_3_string, 214 | }, { 215 | "queries": g4_queries, 216 | "answers": g4_answers, 217 | "g": g_4, 218 | "as_string": g_4_string, 219 | }, { 220 | "queries": g5_queries, 221 | "answers": g5_answers, 222 | "g": g_5, 223 | "as_string": g_5_string, 224 | }, { 225 | "queries": g6_queries, 226 | "answers": g6_answers, 227 | "g": g_6, 228 | "as_string": g_6_string, 229 | }, { 230 | "queries": g7_queries, 231 | "answers": g7_answers, 232 | "g": g_7, 233 | "as_string": g_7_string, 234 | }, { 235 | "queries": g8_queries, 236 | "answers": g8_answers, 237 | "g": g_8, 238 | "as_string": g_8_string, 239 | } 240 | ] 241 | 242 | 243 | def test_GraphParse1(): 244 | assert g_1 == parse_graph_string(g_1_string) 245 | 246 | def test_GraphParse2(): 247 | assert g_2 == parse_graph_string(g_2_string) 248 | 249 | def test_GraphParse3(): 250 | assert g_3 == parse_graph_string(g_3_string) 251 | 252 | def test_GraphParse4(): 253 | assert g_4 == parse_graph_string(g_4_string) 254 | 255 | def test_GraphParse5(): 256 | assert g_5 == parse_graph_string(g_5_string) 257 | 258 | def test_GraphParse6(): 259 | assert g_6 == parse_graph_string(g_6_string) 260 | 261 | def test_GraphParse7(): 262 | assert g_7 == parse_graph_string(g_7_string) 263 | 264 | def test_GraphParse8(): 265 | assert g_8 == parse_graph_string(g_8_string) 266 | 267 | 268 | def tests(): 269 | 270 | for index, problem_set in enumerate(all_tests, start=1): 271 | 272 | print("*" * 20, f"Beginning Graph {index}", "*" * 20) 273 | 274 | g = problem_set["g"] 275 | p = PExpression([], [TemplateExpression(x, list(g.parents(x))) for x in g.V]) 276 | 277 | # Verify Graph-Parsing 278 | g_str = problem_set["as_string"] 279 | 280 | print(f"Graph String: {index}") 281 | print(g_str) 282 | parsed = parse_graph_string(g_str) 283 | 284 | print("Original:", g) 285 | print(" Parsed:", parsed) 286 | assert g == parsed 287 | 288 | # Verify ID 289 | for i, (query, answer) in enumerate(zip(problem_set["queries"], problem_set["answers"]), start=1): 290 | 291 | y, x = query 292 | 293 | query_str = f"{', '.join(y)} | {', '.join(x)}" 294 | print(f"Beginning problem ({i}): {query_str}") 295 | result = Identification(y, x, p, g, True) 296 | simplify = simplify_expression(result, g) 297 | 298 | print("*********** Proof") 299 | print(result.proof()) 300 | 301 | print("*********** Proof (Simplified)") 302 | print(simplify.proof()) 303 | -------------------------------------------------------------------------------- /do/core/Inference.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from loguru import logger 3 | from typing import Collection 4 | 5 | from .Exceptions import ExogenousNonRoot, ProbabilityIndeterminableException 6 | from .Expression import Expression 7 | from .Model import Model 8 | from .Variables import Outcome, Intervention 9 | 10 | from .helpers import within_precision 11 | 12 | 13 | def inference(expression: Expression, model: Model): 14 | 15 | def _compute(head: Collection[Outcome], body: Collection[Intervention], depth=0) -> float: 16 | """ 17 | Compute the probability of some head given some body 18 | @param head: A list of some number of Outcome objects 19 | @param body: A list of some number of Outcome objects 20 | @param depth: Used for horizontal offsets in outputting info 21 | @return: A probability between [0.0, 1.0] 22 | @raise ProbabilityIndeterminableException if the result cannot be computed for any reason 23 | """ 24 | 25 | ############################################### 26 | # Begin with bookkeeping / error-checking # 27 | ############################################### 28 | 29 | current_expression = Expression(head, body) 30 | logger.info(f"query: {current_expression}") 31 | 32 | # If the calculation for this contains two separate outcomes for a variable (Y = y | Y = ~y), 0 33 | if contradictory_outcome_set(head + body): 34 | logger.error("two separate outcomes for one variable, P = 0.0") 35 | return 0.0 36 | 37 | ############################################### 38 | # Reverse product rule # 39 | # P(y, x | ~z) = P(y | x, ~z) * P(x | ~z) # 40 | ############################################### 41 | 42 | if len(head) > 1: 43 | logger.info(f"applying reverse product rule to {current_expression}") 44 | 45 | result_1 = _compute(head[:-1], [head[-1]] + body, depth+1) 46 | result_2 = _compute([head[-1]], body, depth+1) 47 | result = result_1 * result_2 48 | 49 | logger.success(f"{current_expression} = {result}") 50 | return result 51 | 52 | ############################################### 53 | # Attempt direct lookup # 54 | ############################################### 55 | 56 | if set(model.variable(head[0].name).parents) == set(v.name for v in body): 57 | logger.info(f"querying table for: {current_expression}") 58 | table = model.table(head[0].name) # Get table 59 | probability = table.probability_lookup(head[0], body) # Get specific row 60 | logger.success(f"{current_expression} = {probability}") 61 | 62 | return probability 63 | else: 64 | logger.info("no direct table found") 65 | 66 | ################################################################## 67 | # Easy identity rule; P(X | X) = 1, so if LHS ⊆ RHS, P = 1.0 # 68 | ################################################################## 69 | 70 | if set(head).issubset(set(body)): 71 | logger.success(f"identity rule: X|X = 1.0, therefore {current_expression} = 1.0") 72 | return 1.0 73 | 74 | ################################################# 75 | # Bayes' Rule # 76 | # Detect children of the LHS in the RHS # 77 | # p(a|Cd) = p(d|aC) * p(a|C) / p(d|C) # 78 | ################################################# 79 | 80 | reachable_from_head = set().union(*[model.graph().descendants(outcome) for outcome in head]) 81 | descendants_in_rhs = set([var.name for var in body]) & reachable_from_head 82 | 83 | if descendants_in_rhs: 84 | logger.info(f"Children of the LHS in the RHS: {','.join(descendants_in_rhs)}") 85 | logger.info("Applying Bayes' rule.") 86 | 87 | # Not elegant, but simply take one of the children from the body out and recurse 88 | child = list(descendants_in_rhs)[0] 89 | child = list(filter(lambda x: x.name == child, body)) 90 | new_body = list(set(body) - set(child)) 91 | 92 | logger.info(f"{Expression(child, head + new_body)} * {Expression(head, new_body)} / {Expression(child, new_body)}") 93 | 94 | result_1 = _compute(child, head + new_body, depth+1) 95 | result_2 = _compute(head, new_body, depth+1) 96 | result_3 = _compute(child, new_body, depth+1) 97 | if result_3 == 0: # Avoid dividing by 0! coverage: skip 98 | logger.success(f"{Expression([child], new_body)} = 0, therefore the result is 0.") 99 | return 0 100 | 101 | # flip flop flippy flop 102 | result = result_1 * result_2 / result_3 103 | logger.success(f"{current_expression} = {result}") 104 | return result 105 | 106 | ####################################################################################################### 107 | # Jeffrey's Rule / Distributive Rule # 108 | # P(y | x) = P(y | z, x) * P(z | x) + P(y | ~z, x) * P(~z | x) === sigma_Z P(y | z, x) * P(z | x) # 109 | ####################################################################################################### 110 | 111 | missing_parents = set() 112 | for outcome in head: 113 | missing_parents.update(set(model.variable(outcome.name).parents) - set([parent.name for parent in head + body])) 114 | 115 | if missing_parents: 116 | logger.info("Attempting application of Jeffrey's Rule") 117 | 118 | for missing_parent in missing_parents: 119 | 120 | # Add one parent back in and recurse 121 | parent_outcomes = model.variable(missing_parent).outcomes 122 | 123 | # Consider the missing parent and sum every probability involving it 124 | total = 0.0 125 | for parent_outcome in parent_outcomes: 126 | 127 | as_outcome = Outcome(missing_parent, parent_outcome) 128 | 129 | logger.info(f"{Expression(head, [as_outcome] + body)}, * {Expression([as_outcome], body)}") 130 | 131 | result_1 = _compute(head, [as_outcome] + body, depth+1) 132 | result_2 = _compute([as_outcome], body, depth+1) 133 | outcome_result = result_1 * result_2 134 | 135 | total += outcome_result 136 | 137 | logger.success(f"{current_expression} = {total}") 138 | return total 139 | 140 | ############################################### 141 | # Single element on LHS # 142 | # Drop non-parents # 143 | ############################################### 144 | 145 | if len(head) == 1 and not missing_parents and not descendants_in_rhs: 146 | 147 | head_variable = head[0].name 148 | can_drop = [v for v in body if v.name not in model.variable(head_variable).parents] 149 | 150 | if can_drop: 151 | logger.info(f"can drop: {[str(item) for item in can_drop]}") 152 | result = _compute(head, list(set(body) - set(can_drop)), depth+1) 153 | logger.success(f"{current_expression} = {result}") 154 | return result 155 | 156 | ############################################### 157 | # Cannot compute # 158 | ############################################### 159 | 160 | raise ProbabilityIndeterminableException 161 | 162 | head = set(expression.head()) 163 | body = set(expression.body()) 164 | 165 | for out in head | body: 166 | assert out.name in model.graph().v, f"Error: Unknown variable {out}" 167 | assert out.outcome in model.variable(out.name).outcomes, f"Error: Unknown outcome {out.outcome} for {out.name}" 168 | assert not isinstance(out, Intervention), \ 169 | f"Error: basic inference engine does not handle Interventions ({out.name} is an Intervention)" 170 | 171 | return _compute(list(head), list(body)) 172 | 173 | 174 | def contradictory_outcome_set(outcomes: Collection[Outcome]) -> bool: 175 | """ 176 | Check whether a list of outcomes contain any contradictory values, such as Y = y and Y = ~y 177 | @param outcomes: A list of Outcome objects 178 | @return: True if there is a contradiction/implausibility, False otherwise 179 | """ 180 | for x, y in product(outcomes, outcomes): 181 | if x.name == y.name and x.outcome != y.outcome: 182 | return True 183 | return False 184 | 185 | 186 | def validate(model: Model) -> bool: 187 | """ 188 | Ensures a model is 'valid' and 'consistent'. 189 | 1. Ensures the is a DAG (contains no cycles) 190 | 2. Ensures all variables denoted as exogenous are roots. 191 | 3. Ensures all distributions are consistent (the sum of probability of each outcome is 1.0) 192 | 193 | Returns True on success (indicating a valid model), or raises an appropriate Exception indicating a failure. 194 | """ 195 | # no cycles 196 | ... 197 | 198 | # exogenous variables are all roots 199 | exogenous = model._g.v - set(model._v.keys()) 200 | roots = model._g.roots() 201 | for variable in exogenous: 202 | if variable not in roots: 203 | raise ExogenousNonRoot(variable) 204 | 205 | # consistent distributions 206 | for name, variable in model._v.items(): 207 | t = 0 208 | for outcome in variable.outcomes: 209 | t += inference(Expression(Outcome(name, outcome)), model) 210 | 211 | assert within_precision(t, 1) 212 | 213 | # all checks passed -> valid model 214 | return True 215 | -------------------------------------------------------------------------------- /do/identification/Identification.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Set, Tuple, Union 2 | 3 | from .Exceptions import Fail as FAIL 4 | from .LatentGraph import LatentGraph as Graph 5 | from .PExpression import PExpression, TemplateExpression 6 | 7 | 8 | def Identification(y: Set[str], x: Set[str], p: PExpression, g: Graph, prove: bool = True): 9 | """ 10 | The Identification algorithm presented in Shpitser & Pearl, 2007. 11 | 12 | Args: 13 | y (Set[str]): A set of (outcome) variable names, corresponding to vertices present in graph G. 14 | x (Set[str]): A set of (treatment) variable names, corresponding to vertices present in graph G. 15 | p (PExpression): A custom data structure representing a distribution as a summation of variables 16 | (which can be empty) and collection of 'tables' (TemplateExpressions) represented as a variable 17 | name "given" some set of prior variables. 18 | g (Graph): A LatentGraph which has undergone augmentation to remove any exogenous variables, replacing 19 | them with bidirected arcs connecting their children. 20 | prove (bool, optional): Controls whether or not an additional process of proof generation should be 21 | undertaken when identifying the resulting expression. Defaults to True. 22 | 23 | Returns: 24 | PExpression: A resulting PExpression containing any number of nested PExpressions or (terminal) 25 | TemplateExpressions. This is not particularly useful on its own, but instead, can be evaluated 26 | through the main API. 27 | """ 28 | 29 | def _identification(_y: Set[str], _x: Set[str], _p: PExpression, _g: Graph, _prove: bool = True, i=0, passdown_proof: Optional[List[Tuple[int, List[str]]]] = None) -> PExpression: 30 | 31 | def s(a_set): 32 | if len(a_set) == 0: 33 | return "Ø" 34 | return "{" + ', '.join(a_set) + "}" 35 | 36 | # The continuation of a proof that is ongoing if this is a recursive ID call, or a 'fresh' new proof sequence otherwise 37 | proof_chain = passdown_proof if passdown_proof else [] 38 | 39 | # noinspection PyPep8Naming 40 | def An(vertices): 41 | return _g.ancestors(vertices) 42 | 43 | if _prove: 44 | proof_chain.append((i, [f"ID Begin: Y = {s(_y)}, X = {s(_x)}"])) 45 | 46 | # 1 47 | if _x == set(): 48 | if _prove: 49 | proof_chain.append((i, [ 50 | "1: if X == Ø, return Σ_{V \\ Y} P(V)", 51 | f" --> Σ_{s(_g.V - _y)} P({s(_g.V)})", 52 | "", 53 | f"[***** Standard Probability Rules *****]" 54 | ])) 55 | 56 | return p_operator(_g.V - _y, _p, proof_chain) 57 | 58 | # 2 59 | if _g.V != An(_y): 60 | w = _g.V - An(_y) 61 | if _prove: 62 | proof_chain.append((i, [ 63 | "2: if V != An(Y)", 64 | f"--> {s(_g.V)} != {s(An(_y))}", 65 | " return ID(y, x ∩ An(y), P(An(Y)), An(Y)_G)", 66 | f" --> ID({s(_y)}, {s(_x)} ∩ {s(An(_y))}, P({s(An(_y))}), An({s(An(_y))})_G)", 67 | "", 68 | f" [***** Do-Calculus: Rule 3 *****]", 69 | " let W = V \\ An(Y)_G", 70 | f" W = {s(_g.V)} \\ {s(An(_y))}", 71 | f" W = {s(w)}", 72 | f" G \\ W = An(Y)_G", 73 | f" {s(_g.V)} \\ {s(w)} = {s(An(_y))}", 74 | " P_{x,z} (y | w) = P_{x} (y | w) if (Y ⊥⊥ Z | X, W) _G_X,Z(W)", 75 | f" let y = y ({s(_y)}), x = x ∩ An(Y) ({s(_x & An(_y))}), z = w ({s(w)})" ", w = Ø", 76 | " P_{" f"{s((_x & An(_y)) | w)}" "} " f"({s(_y)}) = P_{s(_x & An(_y))} ({s(_y)}) if ({s(_y)} ⊥⊥ {s(w)} | {s(_x & An(_y))}) _G_{s(_x)}", 77 | ])) 78 | 79 | return _identification(_y, _x & An(_y), p_operator(_g.V - _g[An(_y)].V, _p), _g[An(_y)], _prove, i+1, proof_chain) 80 | 81 | 82 | # 3 83 | w = (_g.V - _x) - _g.without_incoming(_x).ancestors(_y) 84 | 85 | if _prove: 86 | proof_chain.append((i, [ 87 | "let W = (V \\ X) \\ An(Y)_G_X", 88 | f"--> W = ({s(_g.V)} \\ {s(_x)}) \\ An({s(_y)})_G_{s(_x)}", 89 | f"--> W = {s(_g.V - _x)} \\ {s(_g.without_incoming(_x).ancestors(_y))}", 90 | f"--> W = {s(w)}" 91 | ])) 92 | 93 | if w != set(): 94 | if _prove: 95 | proof_chain.append((i, [ 96 | "3: W != Ø", 97 | " return ID(y, x ∪ w, P, G)", 98 | f" --> ID({s(_y)}, {s(_x)} ∪ {s(w)}, P, G)", 99 | "", 100 | " [***** Do-Calculus: Rule 3 *****]", 101 | " P_{x, z} (y | w) = P_{x} if (Y ⊥⊥ Z | X, W)_G_X_Z(W)", 102 | " let y = y, x = x, z = w, w = Ø", 103 | " P_{x} (y | w) = P_{x,z} (y | w) if (Y ⊥⊥ Z | X, W) _G_X,Z(W)", 104 | f" P_{s(_x)} ({s(_y)}) = P_" "{" f"{s(_x)[1:-1]}, {s(w)[1:-1]}" "}" f" ({s(_y)}) if ({s(_y)} ⊥⊥ {s(w)} | {s(_x)})_G_{s(_x)}" 105 | ])) 106 | 107 | return _identification(_y, _x | w, _p, _g, _prove, i+1, proof_chain) 108 | 109 | C_V_minus_X = _g[_g.V - _x].C 110 | 111 | # Line 4 112 | if len(C_V_minus_X) > 1: 113 | if _prove: 114 | proof_chain.append((i, [ 115 | "4: C(G \\ X) = {S_1, ..., S_k}", 116 | f"--> C(G \\ X) = C({s(_g.V)} \\ {s(_x)}) = {', '.join(list(map(s, C_V_minus_X)))}", 117 | " return Σ_{V \\ y ∪ x} Π_i ID(Si, v \\ Si, P, G)", 118 | " --> Σ_{" f"{s(_g.V)} \\ {s(_y)} ∪ {s(_x)}" "} Π [", 119 | *[f" --> ID({s(Si)}, {s(_g.V - Si)}, P, G)" for Si in C_V_minus_X], 120 | " ]", 121 | "", 122 | " [***** Proof *****]", 123 | " P_{x} (y) = Σ_{v \\ (y ∪ x)} Π_i P_{v \\ S_i} (S_i)", 124 | " 1. [***** Do-Calculus: Rule 3 *****]", 125 | " Π_i P_{v \\ S_i} (S_i) = Π_i P_{A_i} (S_i), where A_i = An(S_i)_G \\ S_i", 126 | " Π [", 127 | *[f" P_{s(_g.V - si)} ({s(si)[1:-1]})" for si in C_V_minus_X], 128 | " ] = Π [", 129 | *[f" P_{s(_g.ancestors(si)-si)} ({s(si)[1:-1]})" for si in C_V_minus_X], 130 | " ]", 131 | 132 | " 2. [***** Chain Rule *****]", 133 | " Π_i P_{A_i} (S_i) = Π_i Π_{V_j ∈ S_i} P_{A_i} (V_j | V_π^(j-1) \\ A_i)", 134 | 135 | " Π [", 136 | *[f" P_{s(_g.ancestors(si)-si)} ({s(si)[1:-1]})" for si in C_V_minus_X], 137 | " ] = Π [", 138 | *[" ".join([" Π ["] + [ 139 | f"P_{s(_g.ancestors(si)-si)} ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]) - _g.ancestors({vj}))})" for vj in si 140 | ] + ["]"]) for si in C_V_minus_X], 141 | " ]", 142 | 143 | " 3. [***** Rule 2 or Rule 3 *****]", 144 | " Π_i Π_{V_j ∈ S_i} P_{A_i} (V_j | V_π^(j-1) \\ A_i) = Π_i Π_{V_j ∈ S_i} P(V_j | V_π^(j-1))", 145 | " a. if A ∈ A_i ∩ V_π^(j-1), A can be removed as an intervention by Rule 2", 146 | " All backdoor paths from A_i to V_j with a node not in V_π^(j-1) are d-separated.", 147 | " Paths must also be bidirected arcs only.", 148 | " let x = x, y = y, z = {A}, w = Ø", 149 | " P_{x,z} (y | w) = P_{x} (y | z, w) if (Y ⊥⊥ Z | X, W)_X_Z_", 150 | " b. if A ∈ A_i \\ V_π^(j-1), A can be removed as an intervention by Rule 3", 151 | " let x = x, y = V_j, z = {A}, w = Ø", 152 | " P_{x,z} (y | w) = P_{x} (y | w) if (Y ⊥⊥ Z | X, W)_G_X_Z(W)", 153 | " (V_j ⊥⊥ A | V_π^(j-1)) G_{A_i}", 154 | 155 | " Π [", 156 | *[" ".join([" Π ["] + [ 157 | f"P_{s(_g.ancestors(si)-si)} ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]) - _g.ancestors({vj}))})" for vj in si 158 | ] + ["]"]) for si in C_V_minus_X], 159 | " ] = Π [", 160 | *[" ".join([" Π ["] + [ 161 | f"P ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]))})" for vj in si 162 | ] + ["]"]) for si in C_V_minus_X], 163 | " ]", 164 | 165 | " 4. [***** Grouping *****]", 166 | " Π_i Π_{V_j ∈ S_i} P(V_j | V_π^(j-1)) = Π_i P(V_i | V_π^(i-1))", 167 | 168 | " Π [", 169 | *[" ".join([" Π ["] + [ 170 | f"P ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]))})" for vj in si 171 | ] + ["]"]) for si in C_V_minus_X], 172 | " ] = Π [", 173 | 174 | " ]", 175 | 176 | " 5. [***** Chain Rule *****]", 177 | " Π_i P(V_i | V_π^(i-1)) = P(v)" 178 | ])) 179 | 180 | return PExpression(_g.V - (_y | _x), [_identification(s_i, _g.V - s_i, _p, _g, _prove, i+1) for s_i in C_V_minus_X], proof_chain) 181 | 182 | else: 183 | 184 | # At this point we have a single component 185 | S = C_V_minus_X[0] 186 | 187 | if _prove: 188 | proof_chain.append((i, [ 189 | "if C(G \\ X) = {S}", 190 | f"--> C({s(_g.V)} \\ {s(_x)}) = {s(S)}" 191 | ])) 192 | 193 | # Line 5 194 | if set(S) == _g.V: 195 | if _prove: 196 | proof_chain.append((i, [ 197 | "5: if C(G) = {G}: FAIL(G, S)", 198 | f"--> G, S form hedges F, F' for Px(Y) -> {_g}, {S} for P_{_x}({_y})" 199 | ])) 200 | 201 | raise FAIL(_g, S, proof_chain) 202 | 203 | # Line 6 - a single c-component 204 | if S in _g.C: 205 | 206 | dists = [] 207 | dist_str = [] 208 | for vi in S: 209 | given = _g.v_Pi[:_g.v_Pi.index(vi)] 210 | if _prove: 211 | dist_str.append(f"P({vi})" if len(given) == 0 else f"P({vi} | {', '.join(given)})") 212 | dists.append(TemplateExpression(vi, given)) 213 | 214 | if _prove: 215 | proof_chain.append((i, [ 216 | f"6: S ∈ C(G)", 217 | f"--> {s(S)} ∈ {', '.join(list(map(s, _g.C)))}", 218 | " return Σ_{S-Y} π_{Vi ∈ S} P(Vi | V_π^(i-1))", 219 | f" --> Σ_{s(S - _y)} π [{', '.join(dist_str)}]", 220 | "", 221 | " [***** Proof *****]", 222 | f" G has been partitioned into S = {s(S)} and X = {s(_x)} in G = {s(_g.V)}.", 223 | " There are no bidirected arcs between S and X." 224 | ])) 225 | 226 | return PExpression(S - _y, dists, proof_chain) 227 | 228 | # 7 229 | else: 230 | s_prime = next(s for s in _g.C if set(s) > set(S)) 231 | p = [] 232 | 233 | msg = " --> P = " 234 | 235 | for v in s_prime: 236 | rhs0 = _g.v_Pi[:_g.v_Pi.index(v)] 237 | rhs1 = rhs0.copy() 238 | 239 | rhs0 = list(set(rhs0) & s_prime) 240 | rhs1 = list(set(rhs1) - s_prime) 241 | rhs = rhs0 + rhs1 242 | p.append(TemplateExpression(v, rhs)) 243 | if _prove: 244 | msg += f"[{v}{(f' | ' + ', '.join(rhs)) if len(rhs) > 0 else ''}]" 245 | 246 | g_s_prime = _g[s_prime] 247 | 248 | if _prove: 249 | proof_chain.append((i, [ 250 | f"7: if ∃(S') S ⊂ S' ∈ C(G)", 251 | f"--> let S = {s(S)}, S' = {s(s_prime)}", 252 | f"--> {s(S)} ⊂ {s(s_prime)} ∈ {', '.join(list(map(s, _g.C)))}", 253 | " return ID(y, x ∩ S', π_{V_i ∈ S'} P(V_i | V_π^(i-1) ∩ S', V_π^(i-1) \\ S'), S')", 254 | msg, 255 | f" --> ID({s(_y)}, {s(_x)} ∩ {s(s_prime)}, P, G = ({g_s_prime.V}, {g_s_prime.e}, {g_s_prime.e_bidirected}))", 256 | "", 257 | " [***** Proof *****]", 258 | f" G is partitioned into X = {s(_x)} and S = {s(S)}, where X ⊂ An(S).", 259 | " M_{X \\ S'} induces G \\ (X \\ S') = S'.", 260 | " P_{x} = P_{x ∩ S', X \\ S'} = P_{x ∩ S'}.", 261 | ])) 262 | 263 | return _identification(_y, _x & s_prime, PExpression([], p), g_s_prime, _prove, i+1, proof_chain) 264 | 265 | return _identification(y, x, p, g, prove) 266 | 267 | def simplify_expression(original: PExpression, g: Graph, debug=False) -> PExpression: 268 | 269 | def _simplify(current,i = 0): 270 | 271 | cpt_list_copy = list(filter(lambda i: isinstance(i, TemplateExpression), current.terms)) 272 | for s in current.terms: 273 | 274 | if isinstance(s, TemplateExpression): 275 | continue 276 | 277 | c = _simplify(s, i + 1) 278 | 279 | if s.internal_proof: 280 | offset = original.internal_proof[-1][0] + 2 281 | else: 282 | offset = 1 283 | 284 | s.internal_proof.append((offset, c)) 285 | 286 | steps = [] 287 | 288 | # """ 289 | # Remove unnecessary variables from body 290 | for expression in cpt_list_copy: 291 | 292 | while True: 293 | removed_one = False 294 | x = {expression.head} 295 | for variable in expression.given: 296 | y = {variable} 297 | z = set(expression.given) - y 298 | if g.ci(x, y, z): 299 | msg1 = f"{', '.join(x)} is independent of {', '.join(y)} given {', '.join(z)}, and can be removed." 300 | msg2 = f"p operator removed {variable} from body of {expression}" 301 | if debug: 302 | print(msg1) 303 | print(msg2) 304 | steps.append(msg1) 305 | expression.given.remove(variable) 306 | removed_one = True 307 | 308 | if not removed_one: 309 | break 310 | # """ 311 | 312 | # Remove unnecessary expressions 313 | # """ 314 | while True: 315 | bodies = set().union(*[el.given for el in current.terms if isinstance(el, TemplateExpression)]) 316 | search = filter(lambda el: isinstance(el, TemplateExpression) and el.head in current.sigma, current.terms) 317 | remove = list(filter(lambda el: el.head not in bodies, search)) 318 | 319 | if len(remove) == 0: 320 | break 321 | 322 | for query in remove: 323 | current.sigma.remove(query.head) 324 | current.terms.remove(query) 325 | msg = f"{query.head} can be removed." 326 | if debug: 327 | print(msg) 328 | steps.append(msg) 329 | # """ 330 | 331 | while True: 332 | sumout = [cpt for cpt in current.terms if isinstance(cpt, TemplateExpression) and cpt.head in current.sigma and not any([cpt.head in el.given for el in current.terms if isinstance(el, TemplateExpression)])] 333 | if not sumout: 334 | break 335 | for cpt in sumout: 336 | current.terms.remove(cpt) 337 | current.sigma.remove(cpt.head) 338 | 339 | if len(steps) > 0: 340 | tables = ", ".join(f"P({table.head} | {', '.join(table.given)})" if len(table.given) > 0 else f"P({table.head})" for table in cpt_list_copy) 341 | steps.append(f"After simplification: {tables}") 342 | 343 | def distribution_position(item: Union[PExpression, TemplateExpression]): 344 | if isinstance(item, PExpression): 345 | if len(item.sigma) == 0: 346 | return len(g.v_Pi) 347 | return len(g.v_Pi) + min(0, *list(map(lambda v: g.v_Pi.index(v), item.sigma))) 348 | else: 349 | return g.v_Pi.index(item.head) 350 | 351 | # Sort remaining expressions by the topological ordering 352 | current.terms.sort(key=distribution_position) 353 | 354 | if len(steps) > 0: 355 | steps.insert(0, "[***** Simplification *****]") 356 | 357 | return steps 358 | 359 | if original.internal_proof: 360 | depth = original.internal_proof[-1][0] + 1 361 | else: 362 | depth = 1 363 | 364 | p = original.copy() 365 | changes = _simplify(p) 366 | p.internal_proof.append((depth, changes)) 367 | return p 368 | 369 | 370 | def p_operator(v: Set[str], p: PExpression, proof: List[Tuple[int, List[str]]] = None): 371 | return PExpression(list(v.copy() | set(p.sigma)), p.terms.copy(), proof) 372 | --------------------------------------------------------------------------------