├── tests
    ├── __init__.py
    ├── deconfounding
    │   ├── __init__.py
    │   ├── test_Do.py
    │   ├── backdoor_files
    │   │   ├── xi_xj.yml
    │   │   └── graphs
    │   │   │   └── pearl-3.4.yml
    │   └── test_Backdoor.py
    ├── identification
    │   ├── __init__.py
    │   ├── test_Identification.py
    │   └── test_LatentGraph.py
    ├── core
    │   ├── __init__.py
    │   ├── inference_files
    │   │   └── xi_xj.yml
    │   ├── test_Model.py
    │   ├── test_ConditionalProbabilityTable.py
    │   ├── test_helpers.py
    │   ├── test_Variables.py
    │   ├── test_Inference.py
    │   ├── test_Expression.py
    │   └── test_Graph.py
    └── source.py
├── do
    ├── identification
    │   ├── __init__.py
    │   ├── Exceptions.py
    │   ├── PExpression.py
    │   ├── API.py
    │   ├── LatentGraph.py
    │   └── Identification.py
    ├── deconfounding
    │   ├── __init__.py
    │   ├── Exceptions.py
    │   ├── API.py
    │   ├── Do.py
    │   └── Backdoor.py
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── API.py
    │   ├── Exceptions.py
    │   ├── Types.py
    │   ├── Expression.py
    │   ├── helpers.py
    │   ├── Model.py
    │   ├── ConditionalProbabilityTable.py
    │   ├── Variables.py
    │   ├── Graph.py
    │   └── Inference.py
    └── API.py
├── MANIFEST.in
├── requirements.txt
├── archive
    ├── __init__.py
    ├── Home.md
    ├── Exceptions.md
    ├── README.md
    ├── Roots.md
    ├── Sinks.md
    ├── Resources.md
    ├── Parents.md
    ├── Children.md
    ├── Ancestors.md
    ├── Descendants.md
    ├── Literature.md
    ├── Joint Distribution Table.md
    ├── Installation.md
    ├── Standard Paths.md
    ├── Topology.md
    ├── _Sidebar.md
    ├── Conditional Independence.md
    ├── Loading a Model.md
    ├── Deconfounding Sets.md
    ├── Do API.md
    ├── PyPI.md
    ├── GitHub.md
    ├── Backdoor Paths.md
    ├── Configuration.md
    ├── Output.md
    ├── build_wiki.py
    ├── Markovian Models.md
    ├── Definitions.md
    ├── Probability Queries.md
    └── __init__.md
├── wiki
    ├── _Sidebar.md
    ├── Home.md
    ├── Installation.md
    ├── API.md
    ├── PyPI.md
    └── GitHub.md
├── .gitignore
├── examples
    ├── 3-latent
    │   ├── deconfound.py
    │   └── 3.4-latent.yml
    ├── 1-basic-backdoor
    │   ├── deconfound.py
    │   └── pearl-3.4.yml
    └── 2-backdoor-paths
    │   ├── deconfound.py
    │   └── pearl-3.4.yml
├── .github
    ├── workflows
    │   ├── update_wiki.yml
    │   ├── codeql-analysis.yml
    │   └── test_and_release.yml
    ├── SECURITY.md
    └── CODE_OF_CONDUCT.md
├── .releaserc.yml
├── models
    ├── simulation.json.yml
    ├── melanoma.yml
    ├── square-game.yml
    ├── pearl-3.6.yml
    ├── abcd.yml
    ├── m-game.yml
    ├── pearl-7.5.yml
    ├── pearl-3.7c.yml
    ├── fumigants_eelworms.yml
    └── pearl-3.4.yml
├── setup.cfg
├── setup.py
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/do/identification/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/deconfounding/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/deconfounding/test_Do.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/identification/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include do/ *
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy >= 1.19.4
2 | PyYAML >= 5.4.0
3 | loguru >= 0.5.3


--------------------------------------------------------------------------------
/archive/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "pages",
3 |     "build_wiki"
4 | ]
5 | 


--------------------------------------------------------------------------------
/do/deconfounding/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "API",
3 |     "BackdoorController",
4 |     "CausalGraph"
5 | ]
6 | 


--------------------------------------------------------------------------------
/wiki/_Sidebar.md:
--------------------------------------------------------------------------------
1 | ### [[Home]]
2 | 
3 | ### [[Installation]]
4 | * [[PyPI]]
5 | * [[GitHub]]
6 | 
7 | ### [[API]]
8 | 


--------------------------------------------------------------------------------
/tests/core/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 |     "test_ConditionalProbabilityTable",
3 |     "test_Graph",
4 |     "test_Variables"
5 | ]
6 | 


--------------------------------------------------------------------------------
/do/__init__.py:
--------------------------------------------------------------------------------
1 | from .API import API
2 | 
3 | from .core.Expression import Expression
4 | from .core.Variables import Intervention, Outcome, Variable
5 | 


--------------------------------------------------------------------------------
/archive/Home.md:
--------------------------------------------------------------------------------
1 | # do-calculus wiki
2 | 
3 | This wiki is *under construction*, and most documentation is still a work in progress.
4 | 
5 | See the Sidebar for relevant links.
6 | 


--------------------------------------------------------------------------------
/archive/Exceptions.md:
--------------------------------------------------------------------------------
1 | Details on the custom exceptions that can be raised when using the [[API|Do API]].
2 | 
3 | Exceptions are stored in ``do/structures/Exceptions``.
4 | 
5 | STUB|exceptions
6 | 


--------------------------------------------------------------------------------
/archive/README.md:
--------------------------------------------------------------------------------
1 | # Archive
2 | 
3 | Contained here is some old code that built a (now-outdated) wiki that I don't have the heart to delete.
4 | 
5 | Don't rely on it expecting accurate information.
6 | 


--------------------------------------------------------------------------------
/do/core/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = [
 2 |     "API",
 3 |     "ConditionalProbabilityTable",
 4 |     "Exceptions"
 5 |     "Graph",
 6 |     "helpers",
 7 |     "ModelLoader",
 8 |     "Types",
 9 |     "VariableStructures"
10 | ]
11 | 


--------------------------------------------------------------------------------
/wiki/Home.md:
--------------------------------------------------------------------------------
1 | # do-calculus wiki
2 | 
3 | This is a (fairly minimal) wiki detailing usage of the `do-calculus` library.
4 | 
5 | There are some examples in the `examples` subdirectory, and the API is fairly well-documented, to be accompanied by prerequisite domain-specific knowledge.
6 | 
7 | See the sidebar for relevant links.
8 | 


--------------------------------------------------------------------------------
/do/deconfounding/Exceptions.py:
--------------------------------------------------------------------------------
1 | from ..core.Exceptions import ProbabilityException
2 | 
3 | class NoDeconfoundingSet(ProbabilityException):
4 |     """
5 |     Raised when attempting to perform a query on a set of data for which deconfounding is necessary, but no sufficient
6 |     set of variables by which to block backdoor paths is possible.
7 |     """
8 |     pass
9 | 


--------------------------------------------------------------------------------
/archive/Roots.md:
--------------------------------------------------------------------------------
 1 | Get all roots in the graph, where a root is defined as a vertex with no parent.
 2 | 
 3 | STUB|roots
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | roots = do_api.roots()
14 | ```
15 | 
16 | **Important**
17 | - The roots are always returned as a collection of vertices.
18 | 


--------------------------------------------------------------------------------
/archive/Sinks.md:
--------------------------------------------------------------------------------
 1 | Get all sinks in the graph, where a sink is defined as vertex with no child.
 2 | 
 3 | STUB|sinks
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | sinks = do_api.sinks()
14 | ```
15 | 
16 | **Important**
17 | - The sinks are always returned as a collection of vertices.
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore most .files
 2 | .*
 3 | !.github
 4 | !.gitignore
 5 | !.releaserc.yml
 6 | 
 7 | # No pycache
 8 | */__pycache__/*
 9 | /__pycache__/*
10 | *.pyc
11 | 
12 | # Log files from operating software
13 | do-calculus/logs/
14 | 
15 | # Config file is generated on run
16 | *config.*
17 | 
18 | *.egg*
19 | 
20 | # results from building package
21 | build/*
22 | dist/*
23 | 
24 | 


--------------------------------------------------------------------------------
/do/API.py:
--------------------------------------------------------------------------------
 1 | from .core.API import API as Core
 2 | from .deconfounding.API import API as Deconfounding
 3 | from .identification.API import API as Identification
 4 | 
 5 | from .core.Expression import Expression
 6 | 
 7 | class API(Core, Deconfounding, Identification):
 8 | 
 9 |     def __init__(self):
10 |         Core.__init__(self)
11 |         Deconfounding.__init__(self)
12 |         Identification.__init__(self)
13 | 


--------------------------------------------------------------------------------
/examples/3-latent/deconfound.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from do import API, Expression, Intervention, Outcome
 4 | 
 5 | api = API()
 6 | 
 7 | file = Path("3.4-latent.yml")
 8 | model = api.instantiate_model(file)
 9 | 
10 | xj = Outcome("Xj", "xj")
11 | xi = Intervention("Xi", "xi")
12 | e = Expression(xj, [xi])
13 | 
14 | result, proof = api.identification([xj], [xi], model)
15 | print(result)
16 | print(proof)
17 | 


--------------------------------------------------------------------------------
/archive/Resources.md:
--------------------------------------------------------------------------------
1 | A collection of resources for information the project, or *do-calculus* generally.
2 | * [[Definitions]]: Details on various definitions and terminology used within the project.
3 | * [[Markovian Models]]: Details on the structure of a Markovian model for use in the package.
4 | * [[Configuration]]: Settings for the project.
5 | * [[Literature]]: Books and papers referenced in the implementation of this project.
6 | 


--------------------------------------------------------------------------------
/tests/core/inference_files/xi_xj.yml:
--------------------------------------------------------------------------------
 1 | graph_filename: pearl-3.4.yml
 2 | tests:
 3 |   - head: Xj=xj
 4 |     body: Xi=xi
 5 |     expect: 0.41845
 6 | 
 7 |   - head: Xj=xj
 8 |     body: X1=x1
 9 |     expect: 0.45597
10 | 
11 |   - head: Xi=xi
12 |     body: Xi=~xi
13 |     expect: 0.0
14 | 
15 |   - head: Xi=xi
16 |     body: Xi=xi
17 |     expect: 1.0
18 | 
19 |   - head: Xi=xi
20 |     body: Xi=xi, Xj=xj
21 |     expect: 1.0
22 | 


--------------------------------------------------------------------------------
/archive/Parents.md:
--------------------------------------------------------------------------------
 1 | Get the parents of some vertex *v*, where a parent is some vertex *p* such that the edge ``(p, v)`` is in the graph.
 2 | 
 3 | STUB|parents
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | parents = do_api.parents("x")
14 | ```
15 | 
16 | **Important**
17 | - The parents are always returned as a (possibly **empty**) collection of vertices.
18 | 


--------------------------------------------------------------------------------
/archive/Children.md:
--------------------------------------------------------------------------------
 1 | Get the children of some vertex *v*, where a child is some vertex *c* such that the edge ``(v, c)`` is in the graph.
 2 | 
 3 | STUB|children
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | children = do_api.children("x")
14 | ```
15 | 
16 | **Important**
17 | - The children are always returned as a (possibly **empty**) collection of vertices.
18 | 


--------------------------------------------------------------------------------
/do/identification/Exceptions.py:
--------------------------------------------------------------------------------
 1 | from do.core.Exceptions import ProbabilityException
 2 | 
 3 | 
 4 | class Fail(ProbabilityException):
 5 |     """
 6 |     Represents a failure for the Identification algorithm to properly
 7 |     identify a causal effect. This (real) exception is raised as done
 8 |     in the ID algorithm.
 9 |     """
10 | 
11 |     def __init__(self, s, sp, proof):
12 |         super().__init__(s, sp)
13 |         self.proof = proof
14 | 


--------------------------------------------------------------------------------
/tests/core/test_Model.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | 
 3 | from do.core.Exceptions import MissingVariable
 4 | 
 5 | from ..source import models
 6 | model = models["pearl-3.4.yml"]
 7 | 
 8 | 
 9 | def test_Lookup():
10 | 
11 |     # these should raise no issue
12 |     assert model.variable("Xj")
13 |     assert model.variable("Xi")
14 |     
15 |     # ensure a latent variable fails to be retrieved...
16 |     with raises(MissingVariable):
17 |         model.variable("Z")
18 | 


--------------------------------------------------------------------------------
/archive/Ancestors.md:
--------------------------------------------------------------------------------
 1 | Get the ancestors of some vertex *v*, where an ancestor is some vertex *a* such that a directed path ``(a, ..., v)`` is in the graph.
 2 | 
 3 | STUB|ancestors
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | ancestors = do_api.ancestors("x")
14 | ```
15 | 
16 | **Important**
17 | - The ancestors are always returned as a (possibly **empty**) collection of vertices.
18 | 


--------------------------------------------------------------------------------
/archive/Descendants.md:
--------------------------------------------------------------------------------
 1 | Get the descendants of some vertex *v*, where a descendant is some vertex *d* such that a directed path ``(v, ..., d)`` is in the graph.
 2 | 
 3 | STUB|descendants
 4 | 
 5 | ### Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | descendants = do_api.descendants("x")
14 | ```
15 | 
16 | **Important**
17 | - The descendants are always returned as a (possibly **empty**) collection of vertices.
18 | 


--------------------------------------------------------------------------------
/tests/source.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from do.API import API
 4 | from do.core.Inference import validate
 5 | from do.core.Model import from_path
 6 | 
 7 | 
 8 | api = API()
 9 | 
10 | # directory of YML files containing valid models for testing purposes
11 | model_path = Path("models")
12 | assert model_path.is_dir()
13 | 
14 | models = dict()
15 | for file in model_path.iterdir():
16 |     models[file.name] = from_path(file)
17 | 
18 | # verify all the models as correct
19 | #for name, model in models.items():
20 | #    assert validate(model)
21 | 


--------------------------------------------------------------------------------
/.github/workflows/update_wiki.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Wiki
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     paths:
 7 |       - 'wiki/**'
 8 |     branches: [ main ]
 9 | 
10 | jobs:
11 |   deploy-wiki:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 | 
16 |     - name: Push Wiki Changes
17 |       uses: Andrew-Chen-Wang/github-wiki-action@v3
18 |       env:
19 |         WIKI_DIR: wiki/
20 |         GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 |         GH_MAIL: ${{ secrets.EMAIL }}
22 |         GH_NAME: ${{ github.repository_owner }}
23 | 


--------------------------------------------------------------------------------
/examples/1-basic-backdoor/deconfound.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from do import API, Expression, Intervention, Outcome
 4 | 
 5 | api = API()
 6 | 
 7 | file = Path("pearl-3.4.yml")
 8 | model = api.instantiate_model(file)
 9 | 
10 | xj = Outcome("Xj", "xj")
11 | xi = Intervention("Xi", "xi")
12 | e = Expression(xj, [xi])
13 | 
14 | # basic inference won't work!
15 | try:
16 |     api.probability(e, model)
17 |     print("This cannot happen!")
18 | 
19 | except Exception:
20 |     e2 = Expression(xj)
21 |     result = api.treat(e2, [xi], model)
22 |     print(result)
23 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL Analysis"
 2 | on:
 3 |   push:
 4 |     branches: [ main, beta, develop ]
 5 |   pull_request:
 6 |     branches: [ main ]
 7 | 
 8 | jobs:
 9 |   analyze:
10 |     name: Analyze
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |     - name: Checkout repository
15 |       uses: actions/checkout@v2
16 | 
17 |     - name: Initialize CodeQL
18 |       uses: github/codeql-action/init@v1
19 |       with:
20 |         languages: python
21 | 
22 |     - name: Perform CodeQL Analysis
23 |       uses: github/codeql-action/analyze@v1
24 | 
25 | 


--------------------------------------------------------------------------------
/archive/Literature.md:
--------------------------------------------------------------------------------
 1 | TODO - References galore to backdoor paths, deconfounding, and more!
 2 | 
 3 | ## Books
 4 | 
 5 | * Causality (2nd Edition) - Judea Pearl, 2009
 6 | * The Book of Why: The New Science of Cause and Effect - Judea Pearl and Dana Mackenzie, 2018
 7 | * Causal Inference in Statistics: A Primer - Judea Pearl, Madelyn Glymour, Nicholas P. Jewell, 2016
 8 | 
 9 | ## Papers
10 | 
11 | TODO - Shpitser & Pearl 2004, Thesis, and a few more.
12 | 
13 | ## Graph Related
14 | 
15 | Kahn, A. B. (1962). Topological sorting of large networks. Communications of the ACM,
16 | 5(11):558–562.
17 | 


--------------------------------------------------------------------------------
/tests/deconfounding/backdoor_files/xi_xj.yml:
--------------------------------------------------------------------------------
 1 | graph_filename: pearl-3.4.yml
 2 | tests:
 3 |   - type: backdoors
 4 |     src: [ Xi ]
 5 |     dst: [ Xj ]
 6 |     expect: [
 7 |       [ Xi, X4, Xj ],
 8 |       [ Xi, X4, X2, X5, Xj ],
 9 |       [ Xi, X3, X1, X4, Xj ]
10 |     ]
11 |     exhaustive: true
12 | 
13 |   - type: treatment
14 |     head: Xj=xj
15 |     body: do(Xi=xi)
16 |     expect: 0.362828
17 | 
18 |   - type: treatment
19 |     head: Xj=xj
20 |     body: do(X1=x1)
21 |     expect: 0.45597
22 | 
23 |   - type: treatment
24 |     head: Xi=xi
25 |     body: do(Xj=xj)
26 |     expect: 0.2669
27 | 


--------------------------------------------------------------------------------
/.releaserc.yml:
--------------------------------------------------------------------------------
 1 | # .releaserc file
 2 | # Configures the test-and-release workflow used to automatically tag / release on commits to certain branches
 3 | 
 4 | # Only trigger proper release on
 5 | branches:
 6 | 
 7 |   # "main" branch is not prerelease
 8 |   - name: main
 9 |     prerelease: false
10 | 
11 |   # "beta" branch is prerelease
12 |   - name: beta
13 |     prerelease: true
14 | 
15 | # Plugins; Omit the npm plugin entirely, as we are not publishing on npm
16 | plugins:
17 |   - '@semantic-release/commit-analyzer'
18 |   - '@semantic-release/release-notes-generator'
19 |   - '@semantic-release/github'
20 | 


--------------------------------------------------------------------------------
/archive/Joint Distribution Table.md:
--------------------------------------------------------------------------------
 1 | Get a joint distribution table for all possible combination of outcomes for all variables in the model.
 2 | 
 3 | STUB|joint_distribution_table
 4 | 
 5 | ## Example
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | model = "models/model1.yml"
11 | do_api = Do(model)
12 | 
13 | table = do_api.joint_distribution_table()
14 | ```
15 | 
16 | **Important**
17 | - This table can be *extremely* computationally intensive if there are many outcomes and/or many variables in the model.
18 | - To improve performance, ensure that [[computation-caching is enabled|Configuration]].
19 | 


--------------------------------------------------------------------------------
/archive/Installation.md:
--------------------------------------------------------------------------------
 1 | How to install and set up the software.
 2 | 
 3 | ## Table of Contents
 4 | 
 5 | * [Requirements](#requirements)
 6 | * [Options](#options)
 7 | 
 8 | ## Requirements
 9 | 
10 | Setup requirements for the project are:
11 | - **[Python 3.8+](https://www.python.org/)**
12 | - [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages.
13 | 
14 | **Note**: `pip` will already be installed with any installation of **Python 3.4+**.
15 | 
16 | ## Options
17 | 
18 | There are **two** main ways to install the package:
19 | - [[Install from PyPI|PyPI]]
20 | - [[Install from source|GitHub]]
21 | 


--------------------------------------------------------------------------------
/archive/Standard Paths.md:
--------------------------------------------------------------------------------
 1 | Get traditional, directed paths from the *DAG* encoded in the model.
 2 | 
 3 | This includes all standard, directed paths as well-defined in graph terminology, and explicitly does **not** include any backdoor paths.
 4 | 
 5 | STUB|standard_paths
 6 | 
 7 | ## Example
 8 | 
 9 | ```python
10 | from do.API import Do
11 | 
12 | do_api = Do("models/model1.yml")
13 | 
14 | paths = do_api.standard_paths({"x", "y"}, {"z"})
15 | ```
16 | 
17 | **Important**
18 | - Since collections of vertices are provided, any path from some vertex in ``src`` to some vertex in ``dst`` is included in the returned collection.
19 | 


--------------------------------------------------------------------------------
/wiki/Installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | How to install and set up the software.
 4 | 
 5 | ## Table of Contents
 6 | 
 7 | * [Requirements](#requirements)
 8 | * [Options](#options)
 9 | 
10 | ## Requirements
11 | 
12 | Setup requirements for the project are:
13 | - **[Python 3.8+](https://www.python.org/)**
14 | - [**pip**](https://pip.pypa.io/en/stable/) is used to install required packages.
15 | 
16 | **Note**: `pip` will already be installed with any installation of **Python 3.4+**.
17 | 
18 | ## Options
19 | 
20 | There are **two** main ways to install the package:
21 | - [[Install from PyPI|PyPI]]
22 | - [[Install from source|GitHub]]
23 | 


--------------------------------------------------------------------------------
/tests/core/test_ConditionalProbabilityTable.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | 
 3 | from do.API import API
 4 | from do.core.Exceptions import MissingTableRow
 5 | from do.core.Variables import Outcome
 6 | 
 7 | from ..source import models
 8 | 
 9 | model = models["pearl-3.4.yml"]
10 | table = model.table("Xj")
11 | priors = [Outcome("X6", "x6"), Outcome("X4", "x4"), Outcome("X5", "x5")]
12 | 
13 | api = API()
14 | 
15 | 
16 | def test_ValidLookup():
17 |     table.probability_lookup(Outcome("Xj", "xj"), priors)
18 | 
19 | 
20 | def test_InvalidLookup():
21 |     with raises(MissingTableRow):
22 |         table.probability_lookup(Outcome("Xj", "foo"), priors)
23 | 


--------------------------------------------------------------------------------
/models/simulation.json.yml:
--------------------------------------------------------------------------------
 1 | name: Simulation Test
 2 | endogenous:
 3 |   A:
 4 |     outcomes:
 5 |     - a
 6 |     - ~a
 7 |     parents:
 8 |     - B
 9 |     - C
10 |     table: [
11 |       [a, b, c, 0.6],
12 |       [a, b, ~c, 0.3],
13 |       [a, ~b, c, 0.4],
14 |       [a, ~b, ~c, 0.9],
15 |       [~a, b, c, 0.4],
16 |       [~a, b, ~c, 0.7],
17 |       [~a, ~b,  c, 0.6],
18 |       [~a, ~b, ~c, 0.1]
19 |     ]
20 |   B:
21 |     outcomes:
22 |     - b
23 |     - ~b
24 |     parents: []
25 |     table: [
26 |       [b, 0.4],
27 |       [~b, 0.6]
28 |     ]
29 |   C:
30 |     outcomes:
31 |     - c
32 |     - ~c
33 |     parents: []
34 |     table: [
35 |       [c, 0.7],
36 |       [~c, 0.3]
37 |     ]
38 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude =
 3 |     .git,
 4 |     .github
 5 |     __pycache__
 6 |     debug
 7 |     do/probability/do_calculus
 8 |     do/shpitser
 9 |     tests/
10 |     wiki/
11 | 
12 | [coverage:run]
13 | relative_files = True
14 | source =
15 |     do/
16 |     tests/
17 | omit =
18 |     do/API.py
19 |     do/__main__.py
20 |     do/config/generate_config_docs.py
21 |     do/config/config_manager.py
22 |     do/shpitser/*
23 |     do/util/OutputLogger.py
24 |     do/probability
25 | 
26 | [coverage:report]
27 | exclude_lines =
28 |     def __str__
29 |     coverage: skip
30 | 
31 | [tool:pytest]
32 | minversion = 6.0
33 | norecursedirs =
34 |     build
35 |     debug
36 |     dist
37 |     old_tests


--------------------------------------------------------------------------------
/examples/2-backdoor-paths/deconfound.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from do import API
 4 | 
 5 | api = API()
 6 | 
 7 | file = Path("pearl-3.4.yml")
 8 | model = api.instantiate_model(file)
 9 | 
10 | backdoors = api.backdoors({"Xi"}, {"Xj"}, model.graph())
11 | assert len(backdoors) > 0, "No backdoor paths detected!"
12 | assert not api.blocks({"Xi"}, {"Xj"}, model.graph(), set()), "This should not block all paths!" 
13 | 
14 | for path in backdoors:
15 |     print("Path:", path)
16 | 
17 | backdoors = api.backdoors({"Xi"}, {"Xj"}, model.graph(), {"X1", "X4"})
18 | assert len(backdoors) == 0, "Expected this to block!"
19 | assert api.blocks({"Xi"}, {"Xj"}, model.graph(), {"X1", "X4"}), "This should block!"
20 | 


--------------------------------------------------------------------------------
/do/core/API.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Union
 3 | 
 4 | from .Expression import Expression
 5 | from .Inference import inference, validate
 6 | from .Model import Model, from_dict, from_path
 7 | 
 8 | 
 9 | class API:
10 | 
11 |     def validate(self, model: Model) -> bool:
12 |         return validate(model)
13 | 
14 |     def probability(self, query: Expression, model: Model) -> float:
15 |         return inference(query, model)
16 | 
17 |     def instantiate_model(self, model_target: Union[str, Path, dict]) -> Model:
18 |         
19 |         if isinstance(model_target, dict):
20 |             return from_dict(model_target)
21 | 
22 |         return from_path(Path(model_target) if isinstance(model_target, str) else model_target)
23 | 


--------------------------------------------------------------------------------
/wiki/API.md:
--------------------------------------------------------------------------------
 1 | # API
 2 | 
 3 | Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project.
 4 | 
 5 | This assumes the steps in the [[Installation]] section have been followed, and the project is set up.
 6 | 
 7 | **Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package.
 8 | 
 9 | ## Importing
10 | 
11 | To import the package:
12 | 
13 | ```python
14 | import do
15 | ```
16 | 
17 | **Important**:
18 | - The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``.
19 | 
20 | <hr />
21 | 
22 | To create an instance of the API:
23 | 
24 | ```python
25 | from do import API
26 | 
27 | api = API()
28 | ```
29 | 


--------------------------------------------------------------------------------
/models/melanoma.yml:
--------------------------------------------------------------------------------
 1 | name: "Melanoma"
 2 | endogenous:
 3 |   Y:
 4 |     outcomes:
 5 |     - "y"
 6 |     - "~y"
 7 |     parents:
 8 |     - "X"
 9 |     - "Z"
10 |     table: [
11 |       [ "y", "x", "z", 0.3 ],
12 |       [ "y", "x", "~z", 0.6 ],
13 |       [ "y", "~x", "z", 0.5 ],
14 |       [ "y", "~x", "~z", 0.8 ],
15 |       [ "~y", "x", "z", 0.7 ],
16 |       [ "~y", "x", "~z", 0.4 ],
17 |       [ "~y", "~x", "z", 0.5 ],
18 |       [ "~y", "~x", "~z", 0.2 ]
19 |     ]
20 |   X:
21 |     outcomes:
22 |     - "x"
23 |     - "~x"
24 |     parents:
25 |     - "Z"
26 |     table: [
27 |       [ "x", "z", 0.8 ],
28 |       [ "x", "~z", 0.4 ],
29 |       [ "~x", "z", 0.2 ],
30 |       [ "~x", "~z", 0.6 ]
31 |     ]
32 |   Z:
33 |     outcomes:
34 |     - "z"
35 |     - "~z"
36 |     table: [
37 |       [ "z", 0.7 ],
38 |       [ "~z", 0.3 ]
39 |     ]
40 | 


--------------------------------------------------------------------------------
/.github/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Version Support
 4 | 
 5 | Version support always extends only to the current MINOR version number (the second number under semantic versioning, following the pattern MAJOR.MINOR.PATH). There are no plans for any breaking changes to the API moving forward, only feature *additions*, so it seems a reasonable expectation for a dependant of this package to be able to upgrade with no problems.
 6 | 
 7 | ## Reporting a Vulnerability
 8 | 
 9 | Should any vulnerabilities ever be discovered, please contact me directly by email at [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca). Depending on the severity, a fix should be made available within one week. While the vulnerabilities possible are expected to be exceptionally minor, given the capabilities of this project, acknowledgement of the vulnerability reporter will be added to either the main README of the project, or an ACKNOWLEDGEMENTS page in the project.
10 | 
11 | 


--------------------------------------------------------------------------------
/models/square-game.yml:
--------------------------------------------------------------------------------
 1 | name: Square-Game
 2 | endogenous:
 3 |   T:
 4 |     outcomes:
 5 |     - t
 6 |     - ~t
 7 |     parents: []
 8 |     table: [
 9 |       [t, 0.3],
10 |       [~t, 0.7]
11 |     ]
12 |   W:
13 |     outcomes:
14 |     - w
15 |     - ~w
16 |     parents:
17 |     - T
18 |     table: [
19 |       [w, t, 0.2],
20 |       [w, ~t, 0.1],
21 |       [~w, t, 0.8],
22 |       [~w, ~t, 0.9]
23 |     ]
24 |   X:
25 |     outcomes:
26 |     - x
27 |     - ~x
28 |     parents:
29 |     - W
30 |     table: [
31 |       [x, w, 0.4],
32 |       [x, ~w, 0.8],
33 |       [~x, w, 0.6],
34 |       [~x, ~w, 0.2]
35 |     ]
36 |   Y:
37 |     outcomes:
38 |     - y
39 |     - ~y
40 |     parents:
41 |     - X
42 |     - T
43 |     table: [
44 |       [y, x, t, 0.3],
45 |       [y, x, ~t, 0.6],
46 |       [y, ~x, t, 0.5],
47 |       [y, ~x, ~t, 0.8],
48 |       [~y, x, t, 0.7],
49 |       [~y, x, ~t, 0.4],
50 |       [~y, ~x, t, 0.5],
51 |       [~y, ~x, ~t, 0.2]
52 |     ]
53 | 


--------------------------------------------------------------------------------
/models/pearl-3.6.yml:
--------------------------------------------------------------------------------
 1 | endogenous:
 2 |   U:
 3 |     outcomes:
 4 |     - u
 5 |     - ~u
 6 |     parents: []
 7 |     table: [
 8 |       [u, 0.3],
 9 |       [~u, 0.7]
10 |     ]
11 |   X:
12 |     outcomes:
13 |     - x
14 |     - ~x
15 |     parents:
16 |     - U
17 |     table: [
18 |       [x, u, 0.2],
19 |       [x, ~u, 0.1],
20 |       [~x, u, 0.8],
21 |       [~x, ~u, 0.9]
22 |     ]
23 |   Y:
24 |     outcomes:
25 |     - y
26 |     - ~y
27 |     parents:
28 |     - U
29 |     - Z
30 |     table: [
31 |       [y, u, z, 0.3],
32 |       [y, u, ~z, 0.6],
33 |       [y, ~u, z, 0.5],
34 |       [y, ~u, ~z, 0.8],
35 |       [~y, u, z, 0.7],
36 |       [~y, u, ~z, 0.4],
37 |       [~y, ~u, z, 0.5],
38 |       [~y, ~u, ~z, 0.2]
39 |     ]
40 |   Z:
41 |     outcomes:
42 |     - z
43 |     - ~z
44 |     parents:
45 |     - X
46 |     table: [
47 |       [z, x, 0.4],
48 |       [z, ~x, 0.8],
49 |       [~z, x, 0.6],
50 |       [~z, ~x, 0.2]
51 |     ]
52 | name: 'Pearl: Figure 3.6'
53 | 


--------------------------------------------------------------------------------
/archive/Topology.md:
--------------------------------------------------------------------------------
 1 | Details on getting the topology of the model.
 2 | 
 3 | See details in [[Definitions]] for information on the topology-ordering.
 4 | 
 5 | ## Topology
 6 | 
 7 | Getting a topological ordering of the model.
 8 | 
 9 | STUB|topology
10 | 
11 | ### Example
12 | 
13 | ```python
14 | from do.API import Do
15 | 
16 | do_api = Do("models/model1.yml")
17 | 
18 | topology = do_api.topology()
19 | 
20 | for v in topology:
21 |     print(v)
22 | ```
23 | 
24 | **Important**
25 | - A sequence of *N* vertices is returned.
26 | 
27 | <hr />
28 | 
29 | ## Topology Position
30 | 
31 | Get the position of some vertex in the model in its topological ordering.
32 | 
33 | STUB|topology_position
34 | 
35 | ```python
36 | from do.API import Do
37 | 
38 | do_api = Do("models/model1.yml")
39 | 
40 | position = do_api.topology("x")
41 | print(position)
42 | ```
43 | 
44 | **Important**
45 | - The topological ordering begins at V1, so the value returned for a graph of N vertices is in the range \[1, N\].
46 | 


--------------------------------------------------------------------------------
/archive/_Sidebar.md:
--------------------------------------------------------------------------------
 1 | ### [[Home]]
 2 | 
 3 | ### [[Installation]]
 4 | * [[PyPI]]
 5 | * [[GitHub]]
 6 | 
 7 | ### [[Resources]]
 8 | * [[Definitions]]
 9 | * [[Markovian Models]]
10 | * [[Configuration]]
11 | * [[Literature]]
12 | 
13 | ### [[Do API]]
14 | * [[Do.\_\_init\_\_|\_\_init\_\_]]
15 | * [[Do.load_model|Loading a Model]]
16 | * [[Do.p|Probability Queries]]
17 | * [[Do.joint_distribution_table|Joint Distribution Table]]
18 | * [[Do.backdoor_paths|Backdoor Paths]]
19 | * [[Do.standard_paths|Standard Paths]]
20 | * [[Do.deconfounding_sets|Deconfounding Sets]]
21 | * [[Do.independent|Conditional Independence]]
22 | * [[Do.roots|Roots]]
23 | * [[Do.sinks|Sinks]]
24 | * [[Do.parents|Parents]]
25 | * [[Do.children|Children]]
26 | * [[Do.ancestors|Ancestors]]
27 | * [[Do.descendants|Descendants]]
28 | * [[Do.topology|Topology]]
29 | * [[Do.topology_position|Topology]]
30 | * [[Do.set_print_result|Output]]
31 | * [[Do.set_print_detail|Output]]
32 | * [[Do.set_logging|Output]]
33 | * [[Do.set_log_fd|Output]]
34 | * [[Exceptions]]
35 | 


--------------------------------------------------------------------------------
/archive/Conditional Independence.md:
--------------------------------------------------------------------------------
 1 | Determine if two sets of variables in the model are conditionally independent.
 2 | 
 3 | STUB|independent
 4 | 
 5 | ## Independent
 6 | 
 7 | ```python
 8 | from do.API import Do
 9 | 
10 | # Assume this were a detailed model conforming to the above graph...
11 | model = dict()
12 | 
13 | do_api = Do(model)
14 | 
15 | independent = do_api.independent({"x"}, {"y"})
16 | 
17 | independent_2 = do_api.independent({"x"}, {"y"}, {"z"})
18 | 
19 | independent_3 = do_api.independent({"y"}, {"z"}, dcf=None)
20 | 
21 | if independent:
22 |     print("Independent!")
23 | else:
24 |     print("Not independent!")
25 | ```
26 | 
27 | A boolean for whether the two sets are conditionally independent, given some optional deconfounding set, is returned.
28 | 
29 | **Important**
30 | - The third parameter, a set of deconfounding variables, can be given, to block backdoor paths from ``s`` to ``t``.
31 | - If there are no deconfounding variables, an empty collection can be provided, *omitted entirely*, or explicitly set to ``None``.
32 | 


--------------------------------------------------------------------------------
/tests/core/test_helpers.py:
--------------------------------------------------------------------------------
 1 | from do.core.helpers import disjoint, minimal_sets, power_set
 2 | 
 3 | 
 4 | def test_disjoint():
 5 |     d1 = {0, 1, 2, 3, 4}
 6 |     d2 = {3, 4, 5, 6, 7}
 7 |     d3 = {6, 7, 8, 9, 10}
 8 |     assert not disjoint(d1, d2)
 9 |     assert not disjoint(d2, d3)
10 |     assert not disjoint(d1, d2, d3)
11 |     assert disjoint(d1, d3)
12 | 
13 | 
14 | def test_minimal_sets():
15 |     s1 = {1, 2, 3}
16 |     s2 = {1, 2, 3, 4}
17 |     s3 = {0, 1, 2, 3, 4}
18 |     s4 = {5, 6, 7}
19 |     s5 = {0, 1, 2, 3, 4, 5, 6, 7}
20 | 
21 |     minimums = minimal_sets(s1, s2, s3, s4, s5)
22 |     assert minimums == [s1, s4]
23 | 
24 |     assert minimal_sets(s1) == [s1]
25 |     assert minimal_sets(s1, s2) == [s1]
26 |     assert minimal_sets(s1, s4) == [s1, s4]
27 | 
28 | 
29 | def test_power_set():
30 |     data = [1, 2, 3, 4]
31 |     with_empty = power_set(data, allow_empty_set=True)
32 |     without_empty = power_set(data, allow_empty_set=False)
33 |     assert len(set(with_empty)) == 2 ** len(data)
34 |     assert len(set(without_empty)) == 2 ** len(data) - 1
35 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from setuptools import setup
 3 | from os import environ
 4 | 
 5 | cwd = Path(".")
 6 | 
 7 | README = (cwd / "README.md").read_text()
 8 | dependencies = (cwd / "requirements.txt").read_text().strip().split("\n")
 9 | 
10 | # This should be set by the automated Github workflow
11 | VERSION = environ["SEMANTIC_VERSION"] if "SEMANTIC_VERSION" in environ else "0.0.1"
12 | 
13 | setup(
14 |     name="do-calculus",
15 |     version=VERSION,
16 |     description="A Python implementation of the do-calculus of Judea Pearl et. al.",
17 |     long_description=README,
18 |     long_description_content_type="text/markdown",
19 |     url="https://github.com/bradendubois/do-calculus",
20 |     author="Braden Dubois",
21 |     author_email="braden.dubois@usask.ca",
22 |     packages=["do"],
23 |     keywords="do-calculus causation statistics pearl python",
24 |     include_package_data=True,
25 |     install_requires=dependencies,
26 |     entry_points={
27 |         "console_scripts": [
28 |             "do=do.__main__:main",
29 |         ]
30 |     },
31 | )
32 | 


--------------------------------------------------------------------------------
/do/deconfounding/API.py:
--------------------------------------------------------------------------------
 1 | from typing import Collection, Optional
 2 | 
 3 | from ..core.Expression import Expression
 4 | from ..core.Graph import Graph
 5 | from ..core.Model import Model
 6 | from ..core.Types import Vertex, Path
 7 | from ..core.Variables import Intervention
 8 | 
 9 | from .Backdoor import backdoors, deconfound
10 | from .Do import treat
11 | 
12 | 
13 | class API:
14 | 
15 |     def treat(self, expression: Expression, interventions: Collection[Intervention], model: Model) -> float:
16 |         return treat(expression, interventions, model)
17 | 
18 |     def backdoors(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph, z: Optional[Collection[Vertex]] = None) -> Collection[Path]:
19 |         return backdoors(x, y, graph, z)
20 | 
21 |     def blocks(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph, z: Collection[Vertex]) -> bool:
22 |         return len(backdoors(x, y, graph, z)) == 0
23 | 
24 |     def deconfound(self, x: Collection[Vertex], y: Collection[Vertex], graph: Graph) -> Collection[Collection[Vertex]]:
25 |         return deconfound(x, y, graph)
26 | 


--------------------------------------------------------------------------------
/models/abcd.yml:
--------------------------------------------------------------------------------
 1 | name: ABCD Trace
 2 | endogenous:
 3 |   A:
 4 |     outcomes:
 5 |     - a
 6 |     - ~a
 7 |     parents:
 8 |     - B
 9 |     - C
10 |     table: [
11 |       [a, b, c, 0.3],
12 |       [a, b, ~c, 0.6],
13 |       [a, ~b, c, 0.5],
14 |       [a, ~b, ~c, 0.8],
15 |       [~a, b, c, 0.7],
16 |       [~a, b, ~c, 0.4],
17 |       [~a, ~b, c, 0.5],
18 |       [~a, ~b, ~c, 0.2]
19 |     ]
20 |   B:
21 |     outcomes:
22 |     - b
23 |     - ~b
24 |     parents:
25 |     - D
26 |     table: [
27 |       [b, d, 0.6],
28 |       [b, ~d, 0.8,],
29 |       [~b, d, 0.4],
30 |       [~b, ~d, 0.2]
31 |    ]
32 |   C:
33 |     outcomes:
34 |     - c
35 |     - ~c
36 |     parents:
37 |     - B
38 |     - D
39 |     table: [
40 |       [c, b, d, 0.3],
41 |       [c, b, ~d, 0.6],
42 |       [c, ~b, d, 0.5],
43 |       [c, ~b, ~d, 0.8],
44 |       [~c, b, d, 0.7],
45 |       [~c, b, ~d, 0.4],
46 |       [~c, ~b, d, 0.5],
47 |       [~c, ~b, ~d, 0.2]
48 |     ]
49 |   D:
50 |     outcomes:
51 |     - d
52 |     - ~d
53 |     parents: []
54 |     table: [
55 |       [d, 0.3],
56 |       [~d, 0.7]
57 |    ]
58 | 


--------------------------------------------------------------------------------
/models/m-game.yml:
--------------------------------------------------------------------------------
 1 | name: M-Game
 2 | endogenous:
 3 |   A:
 4 |     outcomes:
 5 |     - a
 6 |     - ~a
 7 |     parents:
 8 |     - X
 9 |     table: [
10 |       [a, x, 0.3],
11 |       [a, ~x, 0.5],
12 |       [~a, x, 0.7],
13 |       [~a, ~x, 0.5]
14 |     ]
15 |   B:
16 |     outcomes:
17 |     - b
18 |     - ~b
19 |     parents:
20 |     - X
21 |     - Y
22 |     table: [
23 |       [b, x, y, 0.3],
24 |       [b, x, ~y, 0.6],
25 |       [b, ~x, y, 0.5],
26 |       [b, ~x, ~y, 0.8],
27 |       [~b, x, y, 0.7],
28 |       [~b, x, ~y, 0.4],
29 |       [~b, ~x, y, 0.5],
30 |       [~b, ~x, ~y, 0.2]
31 |     ]
32 |   C:
33 |     outcomes:
34 |     - c
35 |     - ~c
36 |     parents:
37 |     - Y
38 |     table: [
39 |       [c, y, 0.7],
40 |       [c, ~y, 0.25],
41 |       [~c, y, 0.3],
42 |       [~c, ~y, 0.75]
43 |     ]
44 |   X:
45 |     outcomes:
46 |     - x
47 |     - ~x
48 |     parents: []
49 |     table: [
50 |       [x, 0.3],
51 |       [~x, 0.7]
52 |     ]
53 |   Y:
54 |     outcomes:
55 |     - y
56 |     - ~y
57 |     parents: []
58 |     table: [
59 |       [y, 0.2],
60 |       [~y, 0.8]
61 |     ]
62 | 


--------------------------------------------------------------------------------
/tests/core/test_Variables.py:
--------------------------------------------------------------------------------
 1 | from do.core.Variables import Outcome, Intervention
 2 | 
 3 | 
 4 | o1 = Outcome("X", "x")
 5 | o2 = Outcome("X", "~x")
 6 | o3 = Outcome("Y", "y")
 7 | 
 8 | t1 = Intervention("X", "x")
 9 | t2 = Intervention("X", "~x")
10 | t3 = Intervention("Y", "y")
11 | 
12 | 
13 | def test_Outcome():
14 |     assert o1 == "X"
15 |     assert o2 == "X"
16 |     assert o3 == "Y"
17 | 
18 | 
19 | def test_Intervention():
20 |     assert t1 == "X"
21 |     assert t2 == "X"
22 |     assert t3 == "Y"
23 | 
24 | 
25 | def test_OutcomesEquality():
26 |     assert o1.name == o2.name and o1 != o2
27 |     assert o2 != o3
28 |     assert o1 != o3
29 |     o1_copy = o1.copy()
30 |     assert o1 == o1_copy
31 |     assert o1 is not o1_copy
32 | 
33 | 
34 | def test_InterventionEquality():
35 |     assert t1.name == t2.name and t1 != t2
36 |     assert t2 != t3
37 |     assert t1 != t3
38 |     t1_copy = t1.copy()
39 |     assert t1 == t1_copy
40 |     assert t1 is not t1_copy
41 | 
42 | 
43 | def test_OutcomesInterventionEquality():
44 |     assert o1.name == t1.name and o1 != t1
45 |     assert o2.name == t2.name and o2 != t2
46 |     assert o3.name == t3.name and o3 != t3
47 | 


--------------------------------------------------------------------------------
/archive/Loading a Model.md:
--------------------------------------------------------------------------------
 1 | How to load a model into an instance of the API.
 2 | 
 3 | All examples will be using the model from [[Markovian Models]].
 4 | 
 5 | STUB|load_model
 6 | 
 7 | As shown in [[\_\_init\_\_|\_\_init\_\_]], the following forms of models are acceptable:
 8 | - a Python dictionary
 9 | - a string path to a file
10 | - a [pathlib.Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object
11 | 
12 | One can have instantiated a **Do**, and wish to replace the model loaded, or one may have deferred providing a model at the time of instantiation, and wish to provide one now.
13 | 
14 | ## Examples
15 | 
16 | ### Swapping a Model
17 | 
18 | ```python
19 | from do.API import Do
20 | from pathlib import Path
21 | 
22 | model_1 = "data/graph1.yml"
23 | 
24 | do_api = Do(model=model_1)
25 | 
26 | model_2 = Path("data/graph2.yml")
27 | 
28 | do_api.load_model(model_2)
29 | ```
30 | 
31 | **Important**:
32 | - One can mix and match the model argument provided when swapping models; a dictionary could be given, then a path, or vice versa.
33 | 
34 | ### Deferred Loading a Model
35 | 
36 | ```python
37 | from do.API import Do
38 | 
39 | do_api = Do(model=None)
40 | 
41 | model_path = "data/graph1.yml"
42 | do_api.load_model(model_path)
43 | ```
44 | 


--------------------------------------------------------------------------------
/tests/core/test_Inference.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, abspath
 2 | from pathlib import Path
 3 | from yaml import safe_load
 4 | 
 5 | from do.API import API
 6 | from do.core.Model import from_dict
 7 | from do.core.Expression import Expression
 8 | from do.core.Variables import parse_outcomes_and_interventions
 9 | 
10 | from do.core.helpers import within_precision
11 | 
12 | from ..source import api, models
13 | 
14 | test_file_directory = Path(dirname(abspath(__file__))) / "inference_files"
15 | 
16 | 
17 | def test_Inference():
18 | 
19 |     files = sorted(list(filter(lambda x: x.suffix.lower() == ".yml", test_file_directory.iterdir())))
20 |     assert len(files) > 0, "Inference test files not found"
21 | 
22 |     for file in files:
23 | 
24 |         with file.open("r") as f:
25 |             data = safe_load(f)
26 |         
27 |         m = models[data["graph_filename"]]
28 | 
29 |         for test in data["tests"]:
30 | 
31 |             head = parse_outcomes_and_interventions(test["head"])
32 |             body = parse_outcomes_and_interventions(test["body"]) if "body" in test else set()
33 | 
34 |             expected = test["expect"]
35 | 
36 |             result = api.probability(Expression(head, body), m)
37 |             assert within_precision(result, expected)
38 | 


--------------------------------------------------------------------------------
/archive/Deconfounding Sets.md:
--------------------------------------------------------------------------------
 1 | Finding all deconfounding sets between two sets of vertices.
 2 | 
 3 | STUB|deconfounding_sets
 4 | 
 5 | ## Basic Example
 6 | 
 7 | Assuming the basic 3-vertex graph from [[Backdoor Paths]], **G = (V, E)** where:
 8 | - **V** = ``{x, y, z}``
 9 | - **E** = ``{(x, y), (z, x), (z, y)}``
10 | 
11 | ```python
12 | from do.API import Do
13 | 
14 | # Assume this were a detailed model conforming to the above graph...
15 | model = dict()
16 | 
17 | do_api = Do(model)
18 | 
19 | dcf = do_api.deconfounding_sets({"x"}, {"y"})
20 | 
21 | for deconfounding_set in dcf:
22 |     print(f"Deconfounding set for x->y!: {deconfounding_set}")
23 | ```
24 | 
25 | **Important**:
26 | - ``deconfounding_sets`` takes a *source* set of variables, and a *destination/target* set of variables.
27 | - A list of sets is returned, where each set consists of one possible set by which to block all deconfounding paths.
28 | 
29 | ## Usage of Deconfounding Sets
30 | 
31 | Finding a deconfounding set can be helpful, but any [[probability queries involving interventions|Probability Queries]] automatically handle deconfounding. An easy check to verify each deconfounding set:
32 | 
33 | ```python
34 | from do.API import Do
35 | 
36 | # Assume this were a more complicated model
37 | model = dict()
38 | 
39 | do_api = Do(model)
40 | 
41 | dcf = do_api.deconfounding_sets({"x"}, {"y"})
42 | 
43 | for deconfounding_set in dcf:
44 |     assert len(do_api.backdoor_paths({"x"}, {"y"}, deconfounding_set)) == 0
45 | ```
46 | 


--------------------------------------------------------------------------------
/do/core/Exceptions.py:
--------------------------------------------------------------------------------
 1 | class ProbabilityException(BaseException):
 2 |     """
 3 |     A base Exception to catch all Probability-code-related Exceptions,
 4 |     but still crash on any other Exceptions as they should be caught"
 5 |     """
 6 |     pass
 7 | 
 8 | 
 9 | class ProbabilityIndeterminableException(ProbabilityException):
10 |     """
11 |     A slightly more specialized Exception for indicating a failure to compute a probability, and inability to
12 |     continue/no further options. This should never occur with a consistent model.
13 |     """
14 |     pass
15 | 
16 | 
17 | class MissingTableRow(ProbabilityException):
18 |     """
19 |     Raised when a row is missing from a table, but was expected. Can occur during probability queries.
20 |     """
21 |     pass
22 | 
23 | 
24 | class InvalidOutcome(ProbabilityException):
25 |     """
26 |     Raised when attempting to evaluate some query, where a given Outcome or Intervention has been assigned an outcome
27 |     that is not possible for that respective variable.
28 |     """
29 |     pass
30 | 
31 | 
32 | class IntersectingSets(ProbabilityException):
33 |     """
34 |     Raised when attempting any backdoor-path related searches, where the source, destination, and/or optional deconfounding
35 |     set of vertices intersect.
36 |     """
37 |     pass
38 | 
39 | 
40 | class MissingVariable(ProbabilityException):
41 |     pass
42 | 
43 | 
44 | class EmptyExpressionHead(ProbabilityException):
45 |     pass
46 | 
47 | 
48 | class ExogenousNonRoot(ProbabilityException):
49 |     pass
50 | 


--------------------------------------------------------------------------------
/models/pearl-7.5.yml:
--------------------------------------------------------------------------------
 1 | endogenous:
 2 |   U1:
 3 |     outcomes:
 4 |     - u1
 5 |     - ~u1
 6 |     parents: []
 7 |     table: [
 8 |       [u1, 0.75],
 9 |       [~u1, 0.25]
10 |     ]
11 |   U2:
12 |     outcomes:
13 |     - u2
14 |     - ~u2
15 |     parents: []
16 |     table: [
17 |       [u2, 0.4],
18 |       [~u2, 0.6]
19 |     ]
20 |   W:
21 |     outcomes:
22 |     - w
23 |     - ~w
24 |     parents:
25 |     - U1
26 |     - U2
27 |     table: [
28 |       [w, u1, u2, 0.7],
29 |       [w, u1, ~u2, 0.8],
30 |       [w, ~u1, u2, 0.85],
31 |       [w, ~u1, ~u2, 0.3],
32 |       [~w, u1, u2, 0.3],
33 |       [~w, u1, ~u2, 0.2],
34 |       [~w, ~u1, u2, 0.15],
35 |       [~w, ~u1, ~u2, 0.7]
36 |     ]
37 |   X:
38 |     outcomes:
39 |     - x
40 |     - ~x
41 |     parents:
42 |     - Z
43 |     - U2
44 |     table: [
45 |       [x, z, u2, 0.35],
46 |       [x, z, ~u2, 0.1],
47 |       [x, ~z, u2, 0.5],
48 |       [x, ~z, ~u2, 0.9],
49 |       [~x, z, u2, 0.65],
50 |       [~x, z, ~u2, 0.9],
51 |       [~x, ~z, u2, 0.5],
52 |       [~x, ~z, ~u2, 0.1]
53 |     ]
54 |   Y:
55 |     outcomes:
56 |     - y
57 |     - ~y
58 |     parents:
59 |     - X
60 |     - U1
61 |     table: [
62 |       [y, x, u1, 0.2],
63 |       [y, x, ~u1, 0.8],
64 |       [y, ~x, u1, 0.5],
65 |       [y, ~x, ~u1, 0.95],
66 |       [~y, x, u1, 0.8],
67 |       [~y, x, ~u1, 0.2],
68 |       [~y, ~x, u1, 0.5],
69 |       [~y, ~x, ~u1, 0.05]
70 |     ]
71 |   Z:
72 |     outcomes:
73 |     - z
74 |     - ~z
75 |     parents:
76 |     - W
77 |     table: [
78 |       [z, w, 0.6],
79 |       [z, ~w, 0.4],
80 |       [~z, w, 0.4],
81 |       [~z, ~w, 0.6]
82 |     ]
83 | name: 'Pearl: Figure 7.5'
84 | 


--------------------------------------------------------------------------------
/do/core/Types.py:
--------------------------------------------------------------------------------
 1 | from typing import Sequence, Union
 2 | 
 3 | from .Variables import Variable, Outcome, Intervention
 4 | 
 5 | # Variable-related
 6 | 
 7 | VClass = Union[Variable, Outcome, Intervention]
 8 | """
 9 | A VClass is any of a Variable, Outcome, or Intervention.
10 | 
11 | Usage of VClass for type-hinting is useful when the variable need not have
12 | a known value / measurement. For example, VClass might be used in graph-related
13 | components in which the 'name' of the variable is the important feature, whether
14 | the variable be provided as any of the Variable, Outcome, or Intervention class types.
15 | """
16 | 
17 | 
18 | VMeasured = Union[Outcome, Intervention]
19 | """
20 | A VMeasured is either of an Outcome or Intervention.
21 | 
22 | Usage of VMeasured is useful in querying distributions on a model; a variable on its
23 | own does not indicate much, but a discrete value represents a measurement/observation (Outcome)
24 | or a treatment (Intervention).
25 | """
26 | 
27 | 
28 | # Graph-related
29 | 
30 | Vertex = Union[VClass, str]
31 | """
32 | A Vertex is any of a Variable, Outcome, Intervention, or string.
33 | 
34 | It may happen that one has any of (or any combination of) the above, and some path-finding
35 | must be done. The Graph class itself stores only strings as vertices, and we can treat
36 | a string as the name of vertex, or the 'name' field of any of the Variable, Outcome, or Intervention
37 | as corresponding to a named vertex.
38 | """
39 | 
40 | 
41 | Path = Sequence[str]
42 | """
43 | A Path may be yielded in deconfounding (see: Backdoor/Frontdoor Criterions) and is represented
44 | as a sequence of string vertex labels.
45 | """
46 | 


--------------------------------------------------------------------------------
/models/pearl-3.7c.yml:
--------------------------------------------------------------------------------
 1 | endogenous:
 2 |   U1:
 3 |     outcomes:
 4 |     - u1
 5 |     - ~u1
 6 |     parents: []
 7 |     table: [
 8 |       [u1, 0.3],
 9 |       [~u1, 0.7]
10 |    ]
11 |   U2:
12 |     outcomes:
13 |     - u2
14 |     - ~u2
15 |     parents: []
16 |     table: [
17 |       [u2, 0.3],
18 |       [~u2, 0.7]
19 |     ]
20 |   X:
21 |     outcomes:
22 |     - x
23 |     - ~x
24 |     parents:
25 |     - U1
26 |     table: [
27 |       [x, u1, 0.2],
28 |       [x, ~u1, 0.1],
29 |       [~x, u1, 0.8],
30 |       [~x, ~u1, 0.9]
31 |     ]
32 |   Y:
33 |     outcomes:
34 |     - y
35 |     - ~y
36 |     parents:
37 |     - Z1
38 |     - Z2
39 |     table: [
40 |       [y, z1, z2, 0.3],
41 |       [y, z1, ~z2, 0.6],
42 |       [y, ~z1, z2, 0.5],
43 |       [y, ~z1, ~z2, 0.8],
44 |       [~y, z1, z2, 0.7],
45 |       [~y, z1, ~z2, 0.4],
46 |       [~y, ~z1, z2, 0.5],
47 |       [~y, ~z1, ~z2, 0.2]
48 |     ]
49 |   Z1:
50 |     outcomes:
51 |     - z1
52 |     - ~z1
53 |     parents:
54 |     - U2
55 |     - X
56 |     table: [
57 |       [z1, u2, x, 0.3],
58 |       [z1, u2, ~x, 0.6],
59 |       [z1, ~u2, x, 0.5],
60 |       [z1, ~u2, ~x, 0.8],
61 |       [~z1,  u2,  x, 0.7],
62 |       [~z1,  u2, ~x, 0.4],
63 |       [~z1, ~u2, x, 0.5],
64 |       [~z1, ~u2, ~x, 0.2]
65 |     ]
66 |   Z2:
67 |     outcomes:
68 |     - z2
69 |     - ~z2
70 |     parents:
71 |     - U1
72 |     - U2
73 |     table: [
74 |       [z2, u1, u2, 0.3],
75 |       [z2, u1, ~u2, 0.6],
76 |       [z2, ~u1, u2, 0.5],
77 |       [z2, ~u1, ~u2, 0.8],
78 |       [~z2, u1, u2, 0.7],
79 |       [~z2, u1, ~u2, 0.4],
80 |       [~z2, ~u1, u2, 0.5],
81 |       [~z2, ~u1, ~u2, 0.2]
82 |     ]
83 | name: 'Pearl: Figure 3.7c'
84 | 


--------------------------------------------------------------------------------
/archive/Do API.md:
--------------------------------------------------------------------------------
 1 | Details on the [API](https://en.wikipedia.org/wiki/API) provided in the project.
 2 | 
 3 | This assumes the steps in the [[Installation]] section have been followed, and the project is set up.
 4 | 
 5 | **Note**: For simplicity of import-statements, any examples will *assume* the project was installed as [PyPI](https://pypi.org/project/do-calculus/) package.
 6 | 
 7 | ## Importing
 8 | 
 9 | To import the package:
10 | 
11 | ```python
12 | import do
13 | ```
14 | 
15 | **Important**:
16 | - The package name on [PyPI](https://pypi.org/) is [do-calculus](https://pypi.org/project/do-calculus/), but the module to import is called ``do``.
17 | 
18 | <hr />
19 | 
20 | To import *just* the API:
21 | 
22 | ```python
23 | from do.API import Do
24 | ```
25 | 
26 | **Important**:
27 | - The API, represented as a Python class, is called **Do**.
28 | - **Do** is stored in the file ``API``, so it can be imported from ``do.API``.
29 | 
30 | ## Further
31 | 
32 | See any of the specific pages on API functions provided:
33 | * [[Do.\_\_init\_\_|\_\_init\_\_]]
34 | * [[Do.load_model|Loading a Model]]
35 | * [[Do.p|Probability Queries]]
36 | * [[Do.joint_distribution_table|Joint Distribution Table]]
37 | * [[Do.backdoor_paths|Backdoor Paths]]
38 | * [[Do.standard_paths|Standard Paths]]
39 | * [[Do.deconfounding_sets|Deconfounding Sets]]
40 | * [[Do.independent|Conditional Independence]]
41 | * [[Do.roots|Roots]]
42 | * [[Do.sinks|Sinks]]
43 | * [[Do.parents|Parents]]
44 | * [[Do.children|Children]]
45 | * [[Do.ancestors|Ancestors]]
46 | * [[Do.descendants|Descendants]]
47 | * [[Do.topology|Topology]]
48 | * [[Do.topology_position|Topology]]
49 | * [[Do.set_print_result|Output]]
50 | * [[Do.set_print_detail|Output]]
51 | * [[Do.set_logging|Output]]
52 | * [[Do.set_log_fd|Output]]
53 | * [[Exceptions]]
54 | 


--------------------------------------------------------------------------------
/wiki/PyPI.md:
--------------------------------------------------------------------------------
 1 | Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/).
 2 | 
 3 | ## PyPI Package
 4 | 
 5 | The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/).
 6 | 
 7 | To install from [PyPI](https://pypi.org/) as a package:
 8 | 
 9 | ```shell
10 | pip install do-calculus
11 | ```
12 | 
13 | ## Upgrade
14 | 
15 | To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag:
16 | 
17 | ```shell
18 | pip install -U do-calculus
19 | ```
20 | 
21 | ## PyPI Release Cycle
22 | 
23 | By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions).
24 | 
25 | Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project.
26 | 
27 | *Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days.
28 | 
29 | See the [[API]] page for importing and using the package once installed.
30 | 


--------------------------------------------------------------------------------
/archive/PyPI.md:
--------------------------------------------------------------------------------
 1 | Instructions for installing the package through its [PyPI distribution](https://pypi.org/project/do-calculus/).
 2 | 
 3 | ## PyPI Package
 4 | 
 5 | The package is published on [PyPI](https://pypi.org/) as [do-calculus](https://pypi.org/project/do-calculus/).
 6 | 
 7 | To install from [PyPI](https://pypi.org/) as a package:
 8 | 
 9 | ```shell
10 | pip install do-calculus
11 | ```
12 | 
13 | ## Upgrade
14 | 
15 | To upgrade a local installation of the project (such as when a new version is released), add the ``-U`` flag:
16 | 
17 | ```shell
18 | pip install -U do-calculus
19 | ```
20 | 
21 | ## PyPI Release Cycle
22 | 
23 | By default, a new package will be automatically uploaded to PyPI on a new [semantically-versioned](https://semver.org/) [release](https://github.com/bradendubois/do-calculus/releases) which is automatically handled by [semantic-release](https://github.com/semantic-release/semantic-release) in a [workflow](https://github.com/bradendubois/do-calculus/actions).
24 | 
25 | Releases are generated by [semantic-release](https://github.com/semantic-release/semantic-release) on pushes or merges to the [main](https://github.com/bradendubois/do-calculus/tree/main) and [beta](https://github.com/bradendubois/do-calculus/tree/beta) branches of the project.
26 | 
27 | *Only* releases produced from [main](https://github.com/bradendubois/do-calculus/tree/main) will be uploaded to the [PyPI](https://pypi.org/project/do-calculus/) distribution. All development on the project will eventually work its way up to the [PyPI](https://pypi.org/project/do-calculus/) distribution, though it may lag behind [GitHub releases](https://github.com/bradendubois/do-calculus/releases) by anywhere between minutes to a few days.
28 | 
29 | See the [[API|Do API]] page for importing and using the package once installed.
30 | 


--------------------------------------------------------------------------------
/tests/identification/test_Identification.py:
--------------------------------------------------------------------------------
 1 | from do.API import API
 2 | from do.core.Expression import Expression
 3 | from do.core.Variables import Intervention, Outcome
 4 | from do.core.helpers import within_precision
 5 | 
 6 | from ..source import models
 7 | melanoma = models["melanoma.yml"]
 8 | pearl34 = models["pearl-3.4.yml"]
 9 | 
10 | api = API()
11 | 
12 | ##################################################################################
13 | """
14 | Some tests comparing the resulting value computed by ID as being the same as the
15 | value computed by the standard inference and deconfounding modules. Some queries
16 | don't require deconfounding, and ID should correctly handle these; some queries
17 | require substantial deconfounding.
18 | """
19 | 
20 | def test_NoDeconfounding_Pearl34():
21 |     assert within_precision(api.probability(Expression(Outcome("Xj", "xj")), pearl34), api.identification({Outcome("Xj", "xj")}, [], pearl34, False))
22 | 
23 | 
24 | def test_NoDeconfounding_Melanoma():
25 |     assert within_precision(api.probability(Expression(Outcome("Y", "y")), melanoma), api.identification({Outcome("Y", "y")}, [], melanoma, False))
26 | 
27 | 
28 | def test_p34():
29 |     assert within_precision(api.identification({Outcome("Xj", "xj")}, {Intervention("Xi", "xi")}, pearl34, False), api.treat(Expression(Outcome("Xj", "xj")), [Intervention("Xi", "xi")], pearl34))
30 | 
31 | 
32 | def test_melanoma():
33 |     assert within_precision(api.identification({Outcome("Y", "y")}, {Intervention("X", "x")}, melanoma, False), api.treat(Expression(Outcome("Y", "y")), [Intervention("X", "x")], melanoma))
34 | 
35 | def test_proof():
36 |     print(api.proof({Outcome("Y", "y")}, {Intervention("X", "x")}, melanoma))
37 | 
38 | ##################################################################################
39 | 


--------------------------------------------------------------------------------
/examples/3-latent/3.4-latent.yml:
--------------------------------------------------------------------------------
 1 | name: 'Pearl: Figure 3.4'
 2 | endogenous:
 3 |   X1:
 4 |     outcomes:
 5 |     - x1
 6 |     - ~x1
 7 |     parents: []
 8 |     table: [
 9 |       [x1, 0.4],
10 |       [~x1, 0.6]
11 |     ]
12 |   X2:
13 |     outcomes:
14 |     - x2
15 |     - ~x2
16 |     parents: []
17 |     table: [
18 |       [x2, 0.15],
19 |       [~x2, 0.85]
20 |     ]
21 |   X3:
22 |     outcomes:
23 |     - x3
24 |     - ~x3
25 |     table: [
26 |       [x3, 0.1],
27 |       [~x3, 0.9],
28 |     ]
29 |   X4:
30 |     outcomes:
31 |     - x4
32 |     - ~x4
33 |     parents:
34 |     - Xi
35 |     table: [
36 |       [x4, xi, 0.9],
37 |       [x4, ~xi, 0.25],
38 |       [~x4, xi, 0.1],
39 |       [~x4, ~xi, 0.75]
40 |    ]
41 |   Xi:
42 |     outcomes:
43 |     - xi
44 |     - ~xi
45 |     parents:
46 |     - X1
47 |     - X2
48 |     table: [
49 |       [xi, x1, x2, 0.5],
50 |       [xi, x1, ~x2, 0.65],
51 |       [xi, ~x1, x2, 0.1],
52 |       [xi, ~x1, ~x2, 0.25],
53 |       [~xi, x1, x2, 0.5],
54 |       [~xi, x1, ~x2, 0.35],
55 |       [~xi, ~x1, x2, 0.9],
56 |       [~xi, ~x1, ~x2, 0.75]
57 |     ]
58 |   Xj:
59 |     outcomes:
60 |     - xj
61 |     - ~xj
62 |     parents:
63 |     - X2
64 |     - X3
65 |     - X4
66 |     table: [
67 |       [xj, x2, x3, x4, 0.0],
68 |       [xj, x2, x3, ~x4, 0.25],
69 |       [xj, x2, ~x3, x4, 0.7],
70 |       [xj, x2, ~x3, ~x4, 0.45],
71 |       [xj, ~x2, x3, x4, 0.15],
72 |       [xj, ~x2, x3, ~x4, 0.8],
73 |       [xj, ~x2, ~x3, x4, 0.95],
74 |       [xj, ~x2, ~x3, ~x4, 0.05],
75 |       [~xj, x2, x3, x4, 1.0],
76 |       [~xj, x2, x3, ~x4, 0.75],
77 |       [~xj, x2, ~x3, x4, 0.3],
78 |       [~xj, x2, ~x3, ~x4, 0.55],
79 |       [~xj, ~x2, x3, x4, 0.85],
80 |       [~xj, ~x2, x3, ~x4, 0.2],
81 |       [~xj, ~x2, ~x3, x4, 0.05],
82 |       [~xj, ~x2, ~x3, ~x4, 0.95]
83 |     ]
84 | exogenous:
85 |   U1:
86 |   - X1
87 |   - X2
88 |   U2:
89 |   - X2
90 |   - X3
91 | 


--------------------------------------------------------------------------------
/do/core/Expression.py:
--------------------------------------------------------------------------------
 1 | from typing import Collection, Optional, Union
 2 | 
 3 | from .Exceptions import EmptyExpressionHead
 4 | from .Types import Outcome
 5 | 
 6 | 
 7 | class Expression:
 8 | 
 9 |     def __init__(self, head: Union[Outcome, Collection[Outcome]], body: Optional[Collection[Outcome]] = None):
10 |         if head is None:
11 |             raise EmptyExpressionHead
12 | 
13 |         if body is None:
14 |             body = []
15 | 
16 |         self._head = set(head) if not isinstance(head, Outcome) else {head}
17 |         self._body = set(body) if not isinstance(body, Outcome) else {body}
18 | 
19 |     def __str__(self) -> str:
20 |         if len(self._body) == 0:
21 |             return f'P({", ".join(map(str, self._head))})'
22 | 
23 |         return f'P({", ".join(map(str, self._head))} | {", ".join(map(str, self._body))})'
24 | 
25 |     # getters
26 |     def head_contains(self, outcome: Outcome) -> bool:
27 |         return outcome in self._head
28 | 
29 |     def body_contains(self, outcome: Outcome) -> bool:
30 |         return outcome in self._body
31 | 
32 |     def head(self) -> Collection[Outcome]:
33 |         return self._head.copy()
34 | 
35 |     def body(self) -> Collection[Outcome]:
36 |         return self._body.copy()
37 | 
38 |     # setters
39 |     def add_to_head(self, outcome: Outcome) -> bool:
40 |         if outcome in self._head:
41 |             return False
42 |         self._head.add(outcome)
43 |         return True
44 | 
45 |     def add_to_body(self, outcome: Outcome) -> bool:
46 |         if outcome in self._body:
47 |             return False
48 |         self._body.add(outcome)
49 |         return True
50 | 
51 |     def remove_from_head(self, outcome: Outcome) -> bool:
52 |         if outcome not in self._head:
53 |             return False
54 |         self._head.remove(outcome)
55 |         return True
56 | 
57 |     def remove_from_body(self, outcome: Outcome) -> bool:
58 |         if outcome not in self._body:
59 |             return False
60 |         self._body.remove(outcome)
61 |         return True
62 | 


--------------------------------------------------------------------------------
/tests/deconfounding/test_Backdoor.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, abspath
 2 | from pathlib import Path
 3 | from typing import Collection, Mapping
 4 | from yaml import safe_load
 5 | 
 6 | from do.core.Expression import Expression
 7 | from do.core.Model import Model
 8 | from do.core.Variables import Intervention, parse_outcomes_and_interventions
 9 | from do.core.helpers import within_precision
10 | 
11 | from ..source import api, models
12 | 
13 | test_file_directory = Path(dirname(abspath(__file__))) / "backdoor_files"
14 | 
15 | 
16 | def deconfounding_validation(model: Model, tests: Collection[Mapping]):
17 | 
18 |     for test in tests:
19 | 
20 |         expect = test["expect"]
21 |         v = test["type"]
22 | 
23 |         if v == "backdoors":
24 |             src = test["src"]
25 |             dst = test["dst"]
26 |             dcf = test["dcf"] if "dcf" in test else []
27 |             result = api.backdoors(src, dst, model.graph(), dcf)
28 |             assert all(x in expect for x in result)
29 |             if test["exhaustive"]:
30 |                 assert len(result) == len(expect)
31 | 
32 |         elif v == "treatment":
33 |             head = parse_outcomes_and_interventions(test["head"])
34 |             body = parse_outcomes_and_interventions(test["body"])
35 | 
36 |             o = list(filter(lambda x: not isinstance(x, Intervention), body))
37 |             i = list(filter(lambda x: isinstance(x, Intervention), body))
38 | 
39 |             assert within_precision(api.treat(Expression(head, o), i, model), expect)
40 | 
41 |         else:
42 |             raise Exception("unexpected test type")
43 | 
44 | 
45 | def test_Deconfounding():
46 | 
47 |     files = sorted(list(filter(lambda x: x.suffix.lower() == ".yml", test_file_directory.iterdir())))
48 |     assert len(files) > 0, f"Found no backdoor module tests"
49 | 
50 |     for file in files:
51 | 
52 |         with file.open("r") as f:
53 |             data = safe_load(f)
54 |         
55 |         model = models[data["graph_filename"]]
56 | 
57 |         deconfounding_validation(model, data["tests"])
58 | 


--------------------------------------------------------------------------------
/do/core/helpers.py:
--------------------------------------------------------------------------------
 1 | from itertools import chain, combinations
 2 | from typing import Iterator
 3 | 
 4 | 
 5 | def power_set(variable_list: list or set, allow_empty_set=True) -> Iterator[any]:
 6 |     """
 7 |     Quick helper that creates a chain of tuples, which will be the power set of the given list or set
 8 |     @param variable_list: Any arbitrary list or set
 9 |     @param allow_empty_set: Whether or not to consider the empty set {} as a valid set
10 |     @return: A chain object of tuples; power set of variable_list
11 |     """
12 |     p_set = list(variable_list)
13 |     base = 0 if allow_empty_set else 1
14 |     return chain.from_iterable(combinations(p_set, r) for r in range(base, len(p_set)+1))
15 | 
16 | 
17 | def minimal_sets(*sets) -> list:
18 |     """
19 |     Take a set of sets, and return only the minimal sets
20 |     @param sets: An arbitrary number of sets, each set containing strings
21 |     @return: A list of minimal sets; that is, all sets such that there is no superset
22 |     """
23 |     sorted_sets = sorted(map(set, list(sets)), key=len)
24 |     minimal_subsets = []
25 |     for s in sorted_sets:
26 |         if not any(minimal_subset.issubset(s) for minimal_subset in minimal_subsets):
27 |             minimal_subsets.append(s)
28 |     return minimal_subsets
29 | 
30 | 
31 | def disjoint(*sets) -> bool:
32 |     """
33 |     Check whether or not an arbitrary number of sets are completely disjoint; that is, no element in any set exists in
34 |     any other given set.
35 |     @param sets: Any number of sets.
36 |     @return: True if all sets are disjoint, False otherwise
37 |     """
38 |     return len(set().union(*sets)) == sum(map(lambda iterable: len(iterable), sets))
39 | 
40 | 
41 | def within_precision(a: float, b: float) -> bool:
42 |     """
43 |     Check whether two values differ by an amount less than some number of digits of precision
44 |     @param a: The first value
45 |     @param b: The second value
46 |     @return: True if the values are within the margin of error acceptable, False otherwise
47 |     """
48 |     return abs(a - b) < 1 / (10 ** 5)
49 | 


--------------------------------------------------------------------------------
/do/identification/PExpression.py:
--------------------------------------------------------------------------------
 1 | from typing import Collection, List, Sequence, Tuple
 2 | 
 3 | 
 4 | class PExpression:
 5 | 
 6 |     def __init__(self, sigma: Collection[str], terms: list = None, proof: List[Tuple[int, List[str]]] = None):
 7 |         self.sigma = list(sigma)
 8 |         self.terms = list(terms) if terms else []
 9 |         self.internal_proof = proof if proof else []
10 | 
11 |     def __str__(self):
12 | 
13 |         buf = ""
14 |         if self.sigma:
15 |             buf += "<Σ {" + ", ".join(self.sigma) + "} "
16 | 
17 |         # Put Distributions (or PExprs with empty summations) first
18 |         for el in filter(lambda x: isinstance(x, TemplateExpression) or isinstance(x, PExpression) and len(x.sigma) == 0, self.terms):
19 |             buf += str(el)
20 | 
21 |         # Put PExprs after
22 |         for el in filter(lambda x: isinstance(x, PExpression) and len(x.sigma) > 0, self.terms):
23 |             buf += str(el)
24 | 
25 |         if self.sigma:
26 |             buf += '>'
27 |         return buf
28 | 
29 |     def copy(self):
30 |         copied_proof = [(i, [s for s in block]) for i, block in self.internal_proof]
31 |         return PExpression(self.sigma.copy(), [x.copy() for x in self.terms], copied_proof)
32 | 
33 |     def proof(self) -> str:
34 |         s = ""
35 |         for j, block in self.internal_proof:
36 |             indent = " " * 3 * j
37 |             for line in block:
38 |                 s += indent + line + '\n'
39 |             s += '\n'
40 | 
41 |         for child in filter(lambda x: isinstance(x, PExpression), self.terms):
42 |             s += child.proof()
43 |         return s
44 | 
45 | 
46 | class TemplateExpression:
47 |     
48 | 
49 |     def __init__(self, head: str, given: Sequence[str]) -> None:
50 |         self.head = head
51 |         self.given = given
52 | 
53 |     def copy(self):
54 |         return TemplateExpression(self.head, self.given.copy())
55 |     
56 |     def __str__(self) -> str:
57 |         if len(self.given) == 0:
58 |             return f"[{self.head}]"
59 |         return f"[{self.head}|{','.join(self.given)}]"
60 | 


--------------------------------------------------------------------------------
/wiki/GitHub.md:
--------------------------------------------------------------------------------
 1 | Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki).
 2 | 
 3 | ## Acquiring a Copy
 4 | 
 5 | To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli).
 6 | 
 7 | After a copy has been acquired, [install the extra dependencies](#extra-dependencies).
 8 | 
 9 | ## Clone
10 | 
11 | In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed.
12 | 
13 | You can clone the repository using either the **HTTPS** or **SSH** URL.  If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**.
14 | 
15 | To clone with the **HTTPS** URL:
16 | 
17 | ```shell
18 | git clone https://github.com/bradendubois/do-calculus.git
19 | ```
20 | 
21 | To clone with the **SSH** URL:
22 | ```shell
23 | git clone git@github.com:bradendubois/do-calculus.git
24 | ```
25 | 
26 | ## Release
27 | 
28 | The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. 
29 | 
30 | **Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases)
31 | 
32 | Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release). 
33 | 
34 | ## CLI
35 | 
36 | To clone with the [GitHub CLI](https://cli.github.com/).
37 | 
38 | ```shell
39 | gh repo clone bradendubois/do-calculus
40 | ```
41 | 
42 | ## Extra Dependencies
43 | 
44 | After acquiring a copy from any of the above steps:
45 | 
46 | ```shell
47 | pip install -r requirements.txt
48 | ```
49 | 
50 | The above command will install all dependencies listed in ``requirements.txt``.
51 | 
52 | ## Further
53 | 
54 | An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]].
55 | 


--------------------------------------------------------------------------------
/archive/GitHub.md:
--------------------------------------------------------------------------------
 1 | Instructions for installing the project from the [source code](https://github.com/bradendubois/do-calculus/wiki).
 2 | 
 3 | ## Acquiring a Copy
 4 | 
 5 | To acquire a copy of the source code, one can [**clone the repository**](#clone), [**download a release**](#release), or use the [**GitHub CLI**](#cli).
 6 | 
 7 | After a copy has been acquired, [install the extra dependencies](#extra-dependencies).
 8 | 
 9 | ## Clone
10 | 
11 | In order to clone the repository, you must have [git](https://git-scm.com/) installed; if you are on [macOS](https://www.apple.com/ca/macos/) or [Linux](https://www.linux.org/), you almost certainly already have this installed.
12 | 
13 | You can clone the repository using either the **HTTPS** or **SSH** URL.  If you do not know which to choose, or do not intend to commit to the project, use **HTTPS**.
14 | 
15 | To clone with the **HTTPS** URL:
16 | 
17 | ```shell
18 | git clone https://github.com/bradendubois/do-calculus.git
19 | ```
20 | 
21 | To clone with the **SSH** URL:
22 | ```shell
23 | git clone git@github.com:bradendubois/do-calculus.git
24 | ```
25 | 
26 | ## Release
27 | 
28 | The project's [releases page](https://github.com/bradendubois/do-calculus/releases) shows all tagged version of the project, according to [semantic versioning](https://semver.org/). Both **.zip** and **.tar.gz** archives are available. 
29 | 
30 | **Releases**: [https://github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases)
31 | 
32 | Releases are automatically created, tagged, and versioned using [semantic-release](https://github.com/semantic-release/semantic-release). 
33 | 
34 | ## CLI
35 | 
36 | To clone with the [GitHub CLI](https://cli.github.com/).
37 | 
38 | ```shell
39 | gh repo clone bradendubois/do-calculus
40 | ```
41 | 
42 | ## Extra Dependencies
43 | 
44 | After acquiring a copy from any of the above steps:
45 | 
46 | ```shell
47 | pip install -r requirements.txt
48 | ```
49 | 
50 | The above command will install all dependencies listed in ``requirements.txt``.
51 | 
52 | ## Further
53 | 
54 | An [API](https://en.wikipedia.org/wiki/API) is available and [[details can be found here|Do API]].
55 | 


--------------------------------------------------------------------------------
/archive/Backdoor Paths.md:
--------------------------------------------------------------------------------
 1 | How to discover backdoor paths between two sets of variables in a given [[Markovian model|Markovian Models]].
 2 | 
 3 | STUB|backdoor_paths
 4 | 
 5 | ## Basic Backdoor Paths
 6 | 
 7 | Assume the following model uses the graph **G = (V, E)**, where:
 8 | - **V** = ``{x, y, z}``
 9 | - **E** = ``{(x, y), (z, x), (z, y)}``
10 | 
11 | ```python
12 | from do.API import Do
13 | 
14 | # Assume this were a detailed model conforming to the above graph...
15 | model = dict()
16 | 
17 | do_api = Do(model)
18 | 
19 | backdoor_paths = do_api.backdoor_paths({"x"}, {"y"})
20 | 
21 | for path in backdoor_paths:
22 |     print(f"Backdoor path from x->y!: {path}")
23 | ```
24 | 
25 | ``backdoor_paths`` returns a collection of paths, in which each path consists of the vertices (end-points included) connecting some vertex in the ``src`` collection to some vertex in the ``dst`` collection.
26 | - In this example, the return value would be ``[["x", "z", "y"]]``, as this denotes the singular backdoor path ``x <- z -> y``.
27 | 
28 | **Important**
29 | - The first parameter is the collection of source variables from which the pathfinding begins.
30 | - The second parameter is the collection of destination variables to which the pathfinding attempts to reach.
31 | - Each path, a backdoor path, is ordered such that the path order is correctly maintained.
32 | 
33 | ## Blocking Backdoor Paths
34 | 
35 | Assuming the same graph as defined [above](#basic-backdoor-paths)...
36 | 
37 | ```python
38 | from do.API import Do
39 | 
40 | # Assume this were a detailed model conforming to the above graph...
41 | model = dict()
42 | 
43 | do_api = Do(model)
44 | 
45 | backdoor_paths = do_api.backdoor_paths({"x"}, {"y"})
46 | 
47 | for path in backdoor_paths:
48 |     print(f"Backdoor path from x->y!: {path}")
49 | 
50 | blocked = do_api.backdoor_paths({"x"}, {"y"}, {"z"})
51 | 
52 | assert len(blocked) == 0
53 | ```
54 | 
55 | **Important**
56 | - A third parameter is a collection of *deconfounding* variables by which to "block" backdoor paths.
57 | - To represent that there are no deconfounding variables, an *empty* collection of vertices can be given, explicitly set as ``None``, or *omitted entirely*.
58 | - If all backdoor paths are successfully blocked, an **empty list** is returned.
59 | 


--------------------------------------------------------------------------------
/models/fumigants_eelworms.yml:
--------------------------------------------------------------------------------
  1 | name: Fumigants / Eelworms
  2 | endogenous:
  3 |   B:
  4 |     outcomes:
  5 |     - b
  6 |     - ~b
  7 |     parents:
  8 |     - Z0
  9 |     table: [
 10 |       [b, z0, 0.1],
 11 |       [b, ~z0, 0.85],
 12 |       [~b, z0, 0.9],
 13 |       [~b, ~z0, 0.15]
 14 |     ]
 15 |   X:
 16 |     outcomes:
 17 |     - x
 18 |     - ~x
 19 |     parents:
 20 |     - Z0
 21 |     table: [
 22 |     [x, z0, 0.45],
 23 |     [x, ~z0, 0.9],
 24 |     [~x, z0, 0.55],
 25 |     [~x, ~z0, 0.1]]
 26 |   Y:
 27 |     outcomes:
 28 |     - y
 29 |     - ~y
 30 |     parents:
 31 |     - X
 32 |     - Z2
 33 |     - Z3
 34 |     table: [
 35 |     [y, x, z2, z3, 0.7],
 36 |     [y, x, z2, ~z3, 0.65],
 37 |     [y, x, ~z2, z3, 0.4],
 38 |     [y, x, ~z2, ~z3, 0.9],
 39 |     [y, ~x, z2, z3, 0.02],
 40 |     [y, ~x, z2, ~z3, 0.15],
 41 |     [y, ~x, ~z2, z3, 0.22],
 42 |     [y, ~x, ~z2, ~z3, 0.56],
 43 |     [~y, x, z2, z3, 0.3],
 44 |     [~y, x, z2, ~z3, 0.35],
 45 |     [~y, x, ~z2, z3, 0.6],
 46 |     [~y, x, ~z2, ~z3, 0.1],
 47 |     [~y, ~x, z2, z3, 0.98],
 48 |     [~y, ~x, z2, ~z3, 0.85],
 49 |     [~y, ~x, ~z2, z3, 0.78],
 50 |     [~y, ~x, ~z2, ~z3, 0.44]]
 51 |   Z0:
 52 |     outcomes:
 53 |     - z0
 54 |     - ~z0
 55 |     parents: []
 56 |     table: [
 57 |       [z0, 0.75],
 58 |       [~z0, 0.25]
 59 |     ]
 60 |   Z1:
 61 |     outcomes:
 62 |     - z1
 63 |     - ~z1
 64 |     parents:
 65 |     - Z0
 66 |     table: [
 67 |       [z1, z0, 0.2],
 68 |       [z1, ~z0, 0.24],
 69 |       [~z1, z0, 0.8],
 70 |       [~z1, ~z0, 0.76]
 71 |     ]
 72 |   Z2:
 73 |     outcomes:
 74 |     - z2
 75 |     - ~z2
 76 |     parents:
 77 |     - X
 78 |     - Z1
 79 |     table: [
 80 |       [z2, x, z1, 0.8],
 81 |       [z2, x, ~z1, 0.6],
 82 |       [z2, ~x, z1, 0.5],
 83 |       [z2, ~x, ~z1, 0.7],
 84 |       [~z2, x, z1, 0.2],
 85 |       [~z2, x, ~z1, 0.4],
 86 |       [~z2, ~x, z1, 0.5],
 87 |       [~z2, ~x, ~z1, 0.3]
 88 |     ]
 89 |   Z3:
 90 |     outcomes:
 91 |     - z3
 92 |     - ~z3
 93 |     parents:
 94 |     - B
 95 |     - Z2
 96 |     table: [
 97 |       [z3, b, z2, 0.75],
 98 |       [z3, b, ~z2, 0.65],
 99 |       [z3, ~b, z2, 0.4],
100 |       [z3, ~b, ~z2, 0.9],
101 |       [~z3, b, z2, 0.25],
102 |       [~z3, b, ~z2, 0.35],
103 |       [~z3, ~b, z2, 0.6],
104 |       [~z3, ~b, ~z2, 0.1]
105 |     ]
106 | 


--------------------------------------------------------------------------------
/archive/Configuration.md:
--------------------------------------------------------------------------------
 1 | Settings for the project are stored in ``config.yml`` in the same directory as the Python file that imports ``Do``.
 2 | - **Note**: This file will be created if it does not exist, when the project is run.
 3 | 
 4 | ## Output Control
 5 | 
 6 | Control what information is output; the computational steps of queries or regression tests, on launch, whether to minimize acceptable sets Z in backdoor paths.
 7 | 
 8 | #### Output Levels of Precision
 9 | 
10 | How many digits of precision to output a result to.
11 | 
12 | | Setting Name | Options | Default Value |
13 | |:-:|:-:|:-:|
14 | | ``output_levels_of_precision`` | any positive integer | 5 |
15 | 
16 | #### Minimize Backdoor Sets
17 | 
18 | If enabled, when sets X and Y are given, and all feasible sets Z to ensure causal independence are created, only minimal sets will be shown.
19 | 
20 | | Setting Name | Options | Default Value |
21 | |:-:|:-:|:-:|
22 | | ``minimize_backdoor_sets`` | [True, False] | True |
23 | 
24 | ## Accuracy / Formatting / Precision Rules
25 | 
26 | Regards settings on the accuracy/settings of regression tests, computation caching, and noisein function evaluations.
27 | 
28 | #### Cache Computation Results
29 | 
30 | If enabled, any time a specific query is computed, its results will be cached; if the same query is required in any subsequent queries, its cached result will be reused instead of computing the same result from scratch. This can yield a large performance increase in larger causal graphs.
31 | 
32 | | Setting Name | Options | Default Value |
33 | |:-:|:-:|:-:|
34 | | ``cache_computation_results`` | [True, False] | True |
35 | 
36 | #### Topological Sort Variables
37 | 
38 | If enabled, to avoid Bayes rule as much as possible, the head and body of queries can be topologically sorted.
39 | 
40 | | Setting Name | Options | Default Value |
41 | |:-:|:-:|:-:|
42 | | ``topological_sort_variables`` | [True, False] | True |
43 | 
44 | #### Regression Test Result Precision
45 | 
46 | In a regression test (see: ``Regression Tests``) where an 'expected value' is provided, this is how many digits of precision the computed value must meet within. Higher requires more accuracy, but also a longer/more detailed hand-computed 'expected result'.
47 | 
48 | | Setting Name | Options | Default Value |
49 | |:-:|:-:|:-:|
50 | | ``regression_levels_of_precision`` | any positive integer | 5 |
51 | 
52 | 


--------------------------------------------------------------------------------
/archive/Output.md:
--------------------------------------------------------------------------------
  1 | Control over the output that is printed to standard output from usage of the [[API|Do API]].
  2 | 
  3 | Here, we will make clear *two* categorizations of output:
  4 | 1. **Result**: the final result returned from some computation
  5 | 2. **Detail**: any intermediate information involved in some computation
  6 | 
  7 | ## Print Result
  8 | 
  9 | Set whether to print the result of an API call to standard output. 
 10 | 
 11 | STUB|set_print_result
 12 | 
 13 | ### Example
 14 | 
 15 | ```python
 16 | from do.API import Do
 17 | 
 18 | do_api = Do("models/model1.yml")
 19 | 
 20 | do_api.set_print_result(True)
 21 | 
 22 | # queries here...
 23 | ```
 24 | 
 25 | <hr />
 26 | 
 27 | ## Print Detail
 28 | 
 29 | Set whether to print the detail of an API call to standard output.
 30 | 
 31 | STUB|set_print_detail
 32 | 
 33 | ### Example
 34 | 
 35 | ```python
 36 | from do.API import Do
 37 | 
 38 | do_api = Do("models/model1.yml")
 39 | do_api.set_print_detail(True)
 40 | 
 41 | # queries here...
 42 | ```
 43 | 
 44 | <hr />
 45 | 
 46 | ## Set Logging
 47 | 
 48 | Set whether to log results and details to some file descriptor.
 49 | 
 50 | Requires a file descriptor to have been set when [[instantiating the API|Loading a Model]], or [explicitly set](#set-log-fd). 
 51 | 
 52 | STUB|set_logging
 53 | 
 54 | ### Example
 55 | 
 56 | ```python
 57 | from pathlib import Path
 58 | from do.API import Do
 59 | 
 60 | file = Path("output/model1-output")
 61 | f = file.open("w")
 62 | 
 63 | do_api = Do("models/model1.yml", log_fd=f)
 64 | 
 65 | do_api.set_logging(True)
 66 | 
 67 | # queries here...
 68 | 
 69 | f.close()
 70 | ```
 71 | 
 72 | **Important**
 73 | - If logging is enabled, What is written to the file descriptor set will be all results and details will be written to the file, regardless of settings for whether to *print* results and/or details.
 74 | 
 75 | <hr />
 76 | 
 77 | ## Set Log FD
 78 | 
 79 | Set an open file descriptor as the file descriptor to write to.
 80 | 
 81 | STUB|set_log_fd
 82 | 
 83 | ### Example
 84 | 
 85 | ```python
 86 | from pathlib import Path
 87 | from do.API import Do
 88 | 
 89 | do_api = Do("models/model1.yml")
 90 | 
 91 | file = Path("output/model1-output")
 92 | f = file.open("w")
 93 | 
 94 | do_api.set_log_fd(f)
 95 | 
 96 | # queries here...
 97 | 
 98 | f.close()
 99 | ```
100 | 
101 | **Important**
102 | - For this, *any* open file descriptor can be given, as long as the file descriptor object given *has write permission*, and supports a ``.write()`` method that **takes a string as input**.
103 | 


--------------------------------------------------------------------------------
/tests/core/test_Expression.py:
--------------------------------------------------------------------------------
 1 | from pytest import raises
 2 | 
 3 | from do.core.Exceptions import EmptyExpressionHead
 4 | from do.core.Expression import Expression
 5 | from do.core.Variables import Outcome
 6 | 
 7 | 
 8 | def test_SequenceSequence():
 9 |     x = Expression([Outcome("Y", "y"), Outcome("X", "x")], [Outcome("Z", "z")])
10 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
11 | 
12 | def test_SequenceSet():
13 |     x = Expression([Outcome("Y", "y"), Outcome("X", "x")], {Outcome("Z", "z")})
14 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
15 | 
16 | def test_SequenceSingle():
17 |     x = Expression([Outcome("Y", "y"), Outcome("X", "x")], Outcome("Z", "z"))
18 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
19 | 
20 | def test_SequenceNone():
21 |     x = Expression([Outcome("Y", "y"), Outcome("X", "x")])
22 |     assert str(x) == "P(Y = y, X = x)" or str(x) == "P(X = x, Y = y)"
23 | 
24 | 
25 | def test_SetSequence():
26 |     x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, [Outcome("Z", "z")])
27 |     print(str(x))
28 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
29 | 
30 | def test_SetSet():
31 |     x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, {Outcome("Z", "z")})
32 |     print(str(x))
33 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
34 | 
35 | def test_SetSingle():
36 |     x = Expression({Outcome("Y", "y"), Outcome("X", "x")}, Outcome("Z", "z"))
37 |     assert str(x) == "P(Y = y, X = x | Z = z)" or str(x) == "P(X = x, Y = y | Z = z)"
38 | 
39 | def test_SetNone():
40 |     x = Expression({Outcome("Y", "y"), Outcome("X", "x")})
41 |     assert str(x) == "P(Y = y, X = x)" or str(x) == "P(X = x, Y = y)"
42 | 
43 | 
44 | def test_SingleSequence():
45 |     x = Expression(Outcome("Y", "y"), [Outcome("Z", "z")])
46 |     assert str(x) == "P(Y = y | Z = z)"
47 | 
48 | def test_SingleSet():
49 |     x = Expression(Outcome("Y", "y"), {Outcome("Z", "z")})
50 |     assert str(x) == "P(Y = y | Z = z)"
51 | 
52 | def test_SingleSingle():
53 |     x = Expression(Outcome("Y", "y"), Outcome("Z", "z"))
54 |     assert str(x) == "P(Y = y | Z = z)"
55 | 
56 | def test_SingleNone():
57 |     x = Expression(Outcome("Y", "y"))
58 |     assert str(x) == "P(Y = y)"
59 | 
60 | 
61 | def test_NoneSequence():
62 |     with raises(EmptyExpressionHead):    
63 |         Expression(None, [Outcome("Z", "z")])
64 | 
65 | def test_NoneNone():
66 |     with raises(EmptyExpressionHead):    
67 |         Expression(None)
68 | 


--------------------------------------------------------------------------------
/models/pearl-3.4.yml:
--------------------------------------------------------------------------------
  1 | name: 'Pearl: Figure 3.4'
  2 | endogenous:
  3 |   X1:
  4 |     outcomes:
  5 |     - x1
  6 |     - ~x1
  7 |     parents: []
  8 |     table: [
  9 |       [x1, 0.4],
 10 |       [~x1, 0.6]
 11 |     ]
 12 |   X2:
 13 |     outcomes:
 14 |     - x2
 15 |     - ~x2
 16 |     parents: []
 17 |     table: [
 18 |       [x2, 0.15],
 19 |       [~x2, 0.85]
 20 |     ]
 21 |   X3:
 22 |     outcomes:
 23 |     - x3
 24 |     - ~x3
 25 |     parents:
 26 |     - X1
 27 |     table: [
 28 |       [x3, x1, 0.1],
 29 |       [x3, ~x1, 0.3],
 30 |       [~x3, x1, 0.9],
 31 |       [~x3, ~x1, 0.7]
 32 |     ]
 33 |   X4:
 34 |     outcomes:
 35 |     - x4
 36 |     - ~x4
 37 |     parents:
 38 |     - X1
 39 |     - X2
 40 |     table: [
 41 |       [x4, x1, x2, 0.7],
 42 |       [x4, x1, ~x2, 0.9],
 43 |       [x4, ~x1, x2, 0.55],
 44 |       [x4, ~x1, ~x2, 0.15],
 45 |       [~x4, x1, x2, 0.3],
 46 |       [~x4, x1, ~x2, 0.1],
 47 |       [~x4, ~x1, x2, 0.45],
 48 |       [~x4, ~x1, ~x2, 0.85]
 49 |    ]
 50 |   X5:
 51 |     outcomes:
 52 |     - x5
 53 |     - ~x5
 54 |     parents:
 55 |     - X2
 56 |     table: [
 57 |       [x5, x2, 0.8],
 58 |       [x5, ~x2, 0.25],
 59 |       [~x5, x2, 0.2],
 60 |       [~x5, ~x2, 0.75]
 61 |     ]
 62 |   X6:
 63 |     outcomes:
 64 |     - x6
 65 |     - ~x6
 66 |     parents:
 67 |     - Xi
 68 |     table: [
 69 |       [x6, xi, 0.9],
 70 |       [x6, ~xi, 0.25],
 71 |       [~x6, xi, 0.1],
 72 |       [~x6, ~xi, 0.75]
 73 |    ]
 74 |   Xi:
 75 |     outcomes:
 76 |     - xi
 77 |     - ~xi
 78 |     parents:
 79 |     - X3
 80 |     - X4
 81 |     table: [
 82 |       [xi, x3, x4, 0.5],
 83 |       [xi, x3, ~x4, 0.65],
 84 |       [xi, ~x3, x4, 0.1],
 85 |       [xi, ~x3, ~x4, 0.25],
 86 |       [~xi, x3, x4, 0.5],
 87 |       [~xi, x3, ~x4, 0.35],
 88 |       [~xi, ~x3, x4, 0.9],
 89 |       [~xi, ~x3, ~x4, 0.75]
 90 |     ]
 91 |   Xj:
 92 |     outcomes:
 93 |     - xj
 94 |     - ~xj
 95 |     parents:
 96 |     - X6
 97 |     - X4
 98 |     - X5
 99 |     table: [
100 |       [xj, x6, x4, x5, 0.0],
101 |       [xj, x6, x4, ~x5, 0.25],
102 |       [xj, x6, ~x4, x5, 0.7],
103 |       [xj, x6, ~x4, ~x5, 0.45],
104 |       [xj, ~x6, x4, x5, 0.15],
105 |       [xj, ~x6, x4, ~x5, 0.8],
106 |       [xj, ~x6, ~x4, x5, 0.95],
107 |       [xj, ~x6, ~x4, ~x5, 0.05],
108 |       [~xj, x6, x4, x5, 1.0],
109 |       [~xj, x6, x4, ~x5, 0.75],
110 |       [~xj, x6, ~x4, x5, 0.3],
111 |       [~xj, x6, ~x4, ~x5, 0.55],
112 |       [~xj, ~x6, x4, x5, 0.85],
113 |       [~xj, ~x6, x4, ~x5, 0.2],
114 |       [~xj, ~x6, ~x4, x5, 0.05],
115 |       [~xj, ~x6, ~x4, ~x5, 0.95]
116 |     ]
117 | 


--------------------------------------------------------------------------------
/examples/1-basic-backdoor/pearl-3.4.yml:
--------------------------------------------------------------------------------
  1 | name: 'Pearl: Figure 3.4'
  2 | endogenous:
  3 |   X1:
  4 |     outcomes:
  5 |     - x1
  6 |     - ~x1
  7 |     parents: []
  8 |     table: [
  9 |       [x1, 0.4],
 10 |       [~x1, 0.6]
 11 |     ]
 12 |   X2:
 13 |     outcomes:
 14 |     - x2
 15 |     - ~x2
 16 |     parents: []
 17 |     table: [
 18 |       [x2, 0.15],
 19 |       [~x2, 0.85]
 20 |     ]
 21 |   X3:
 22 |     outcomes:
 23 |     - x3
 24 |     - ~x3
 25 |     parents:
 26 |     - X1
 27 |     table: [
 28 |       [x3, x1, 0.1],
 29 |       [x3, ~x1, 0.3],
 30 |       [~x3, x1, 0.9],
 31 |       [~x3, ~x1, 0.7]
 32 |     ]
 33 |   X4:
 34 |     outcomes:
 35 |     - x4
 36 |     - ~x4
 37 |     parents:
 38 |     - X1
 39 |     - X2
 40 |     table: [
 41 |       [x4, x1, x2, 0.7],
 42 |       [x4, x1, ~x2, 0.9],
 43 |       [x4, ~x1, x2, 0.55],
 44 |       [x4, ~x1, ~x2, 0.15],
 45 |       [~x4, x1, x2, 0.3],
 46 |       [~x4, x1, ~x2, 0.1],
 47 |       [~x4, ~x1, x2, 0.45],
 48 |       [~x4, ~x1, ~x2, 0.85]
 49 |    ]
 50 |   X5:
 51 |     outcomes:
 52 |     - x5
 53 |     - ~x5
 54 |     parents:
 55 |     - X2
 56 |     table: [
 57 |       [x5, x2, 0.8],
 58 |       [x5, ~x2, 0.25],
 59 |       [~x5, x2, 0.2],
 60 |       [~x5, ~x2, 0.75]
 61 |     ]
 62 |   X6:
 63 |     outcomes:
 64 |     - x6
 65 |     - ~x6
 66 |     parents:
 67 |     - Xi
 68 |     table: [
 69 |       [x6, xi, 0.9],
 70 |       [x6, ~xi, 0.25],
 71 |       [~x6, xi, 0.1],
 72 |       [~x6, ~xi, 0.75]
 73 |    ]
 74 |   Xi:
 75 |     outcomes:
 76 |     - xi
 77 |     - ~xi
 78 |     parents:
 79 |     - X3
 80 |     - X4
 81 |     table: [
 82 |       [xi, x3, x4, 0.5],
 83 |       [xi, x3, ~x4, 0.65],
 84 |       [xi, ~x3, x4, 0.1],
 85 |       [xi, ~x3, ~x4, 0.25],
 86 |       [~xi, x3, x4, 0.5],
 87 |       [~xi, x3, ~x4, 0.35],
 88 |       [~xi, ~x3, x4, 0.9],
 89 |       [~xi, ~x3, ~x4, 0.75]
 90 |     ]
 91 |   Xj:
 92 |     outcomes:
 93 |     - xj
 94 |     - ~xj
 95 |     parents:
 96 |     - X6
 97 |     - X4
 98 |     - X5
 99 |     table: [
100 |       [xj, x6, x4, x5, 0.0],
101 |       [xj, x6, x4, ~x5, 0.25],
102 |       [xj, x6, ~x4, x5, 0.7],
103 |       [xj, x6, ~x4, ~x5, 0.45],
104 |       [xj, ~x6, x4, x5, 0.15],
105 |       [xj, ~x6, x4, ~x5, 0.8],
106 |       [xj, ~x6, ~x4, x5, 0.95],
107 |       [xj, ~x6, ~x4, ~x5, 0.05],
108 |       [~xj, x6, x4, x5, 1.0],
109 |       [~xj, x6, x4, ~x5, 0.75],
110 |       [~xj, x6, ~x4, x5, 0.3],
111 |       [~xj, x6, ~x4, ~x5, 0.55],
112 |       [~xj, ~x6, x4, x5, 0.85],
113 |       [~xj, ~x6, x4, ~x5, 0.2],
114 |       [~xj, ~x6, ~x4, x5, 0.05],
115 |       [~xj, ~x6, ~x4, ~x5, 0.95]
116 |     ]
117 | 


--------------------------------------------------------------------------------
/examples/2-backdoor-paths/pearl-3.4.yml:
--------------------------------------------------------------------------------
  1 | name: 'Pearl: Figure 3.4'
  2 | endogenous:
  3 |   X1:
  4 |     outcomes:
  5 |     - x1
  6 |     - ~x1
  7 |     parents: []
  8 |     table: [
  9 |       [x1, 0.4],
 10 |       [~x1, 0.6]
 11 |     ]
 12 |   X2:
 13 |     outcomes:
 14 |     - x2
 15 |     - ~x2
 16 |     parents: []
 17 |     table: [
 18 |       [x2, 0.15],
 19 |       [~x2, 0.85]
 20 |     ]
 21 |   X3:
 22 |     outcomes:
 23 |     - x3
 24 |     - ~x3
 25 |     parents:
 26 |     - X1
 27 |     table: [
 28 |       [x3, x1, 0.1],
 29 |       [x3, ~x1, 0.3],
 30 |       [~x3, x1, 0.9],
 31 |       [~x3, ~x1, 0.7]
 32 |     ]
 33 |   X4:
 34 |     outcomes:
 35 |     - x4
 36 |     - ~x4
 37 |     parents:
 38 |     - X1
 39 |     - X2
 40 |     table: [
 41 |       [x4, x1, x2, 0.7],
 42 |       [x4, x1, ~x2, 0.9],
 43 |       [x4, ~x1, x2, 0.55],
 44 |       [x4, ~x1, ~x2, 0.15],
 45 |       [~x4, x1, x2, 0.3],
 46 |       [~x4, x1, ~x2, 0.1],
 47 |       [~x4, ~x1, x2, 0.45],
 48 |       [~x4, ~x1, ~x2, 0.85]
 49 |    ]
 50 |   X5:
 51 |     outcomes:
 52 |     - x5
 53 |     - ~x5
 54 |     parents:
 55 |     - X2
 56 |     table: [
 57 |       [x5, x2, 0.8],
 58 |       [x5, ~x2, 0.25],
 59 |       [~x5, x2, 0.2],
 60 |       [~x5, ~x2, 0.75]
 61 |     ]
 62 |   X6:
 63 |     outcomes:
 64 |     - x6
 65 |     - ~x6
 66 |     parents:
 67 |     - Xi
 68 |     table: [
 69 |       [x6, xi, 0.9],
 70 |       [x6, ~xi, 0.25],
 71 |       [~x6, xi, 0.1],
 72 |       [~x6, ~xi, 0.75]
 73 |    ]
 74 |   Xi:
 75 |     outcomes:
 76 |     - xi
 77 |     - ~xi
 78 |     parents:
 79 |     - X3
 80 |     - X4
 81 |     table: [
 82 |       [xi, x3, x4, 0.5],
 83 |       [xi, x3, ~x4, 0.65],
 84 |       [xi, ~x3, x4, 0.1],
 85 |       [xi, ~x3, ~x4, 0.25],
 86 |       [~xi, x3, x4, 0.5],
 87 |       [~xi, x3, ~x4, 0.35],
 88 |       [~xi, ~x3, x4, 0.9],
 89 |       [~xi, ~x3, ~x4, 0.75]
 90 |     ]
 91 |   Xj:
 92 |     outcomes:
 93 |     - xj
 94 |     - ~xj
 95 |     parents:
 96 |     - X6
 97 |     - X4
 98 |     - X5
 99 |     table: [
100 |       [xj, x6, x4, x5, 0.0],
101 |       [xj, x6, x4, ~x5, 0.25],
102 |       [xj, x6, ~x4, x5, 0.7],
103 |       [xj, x6, ~x4, ~x5, 0.45],
104 |       [xj, ~x6, x4, x5, 0.15],
105 |       [xj, ~x6, x4, ~x5, 0.8],
106 |       [xj, ~x6, ~x4, x5, 0.95],
107 |       [xj, ~x6, ~x4, ~x5, 0.05],
108 |       [~xj, x6, x4, x5, 1.0],
109 |       [~xj, x6, x4, ~x5, 0.75],
110 |       [~xj, x6, ~x4, x5, 0.3],
111 |       [~xj, x6, ~x4, ~x5, 0.55],
112 |       [~xj, ~x6, x4, x5, 0.85],
113 |       [~xj, ~x6, x4, ~x5, 0.2],
114 |       [~xj, ~x6, ~x4, x5, 0.05],
115 |       [~xj, ~x6, ~x4, ~x5, 0.95]
116 |     ]
117 | 


--------------------------------------------------------------------------------
/tests/deconfounding/backdoor_files/graphs/pearl-3.4.yml:
--------------------------------------------------------------------------------
  1 | name: 'Pearl: Figure 3.4'
  2 | endogenous:
  3 |   X1:
  4 |     outcomes:
  5 |     - x1
  6 |     - ~x1
  7 |     parents: []
  8 |     table: [
  9 |       [x1, 0.4],
 10 |       [~x1, 0.6]
 11 |     ]
 12 |   X2:
 13 |     outcomes:
 14 |     - x2
 15 |     - ~x2
 16 |     parents: []
 17 |     table: [
 18 |       [x2, 0.15],
 19 |       [~x2, 0.85]
 20 |     ]
 21 |   X3:
 22 |     outcomes:
 23 |     - x3
 24 |     - ~x3
 25 |     parents:
 26 |     - X1
 27 |     table: [
 28 |       [x3, x1, 0.1],
 29 |       [x3, ~x1, 0.3],
 30 |       [~x3, x1, 0.9],
 31 |       [~x3, ~x1, 0.7]
 32 |     ]
 33 |   X4:
 34 |     outcomes:
 35 |     - x4
 36 |     - ~x4
 37 |     parents:
 38 |     - X1
 39 |     - X2
 40 |     table: [
 41 |       [x4, x1, x2, 0.7],
 42 |       [x4, x1, ~x2, 0.9],
 43 |       [x4, ~x1, x2, 0.55],
 44 |       [x4, ~x1, ~x2, 0.15],
 45 |       [~x4, x1, x2, 0.3],
 46 |       [~x4, x1, ~x2, 0.1],
 47 |       [~x4, ~x1, x2, 0.45],
 48 |       [~x4, ~x1, ~x2, 0.85]
 49 |    ]
 50 |   X5:
 51 |     outcomes:
 52 |     - x5
 53 |     - ~x5
 54 |     parents:
 55 |     - X2
 56 |     table: [
 57 |       [x5, x2, 0.8],
 58 |       [x5, ~x2, 0.25],
 59 |       [~x5, x2, 0.2],
 60 |       [~x5, ~x2, 0.75]
 61 |     ]
 62 |   X6:
 63 |     outcomes:
 64 |     - x6
 65 |     - ~x6
 66 |     parents:
 67 |     - Xi
 68 |     table: [
 69 |       [x6, xi, 0.9],
 70 |       [x6, ~xi, 0.25],
 71 |       [~x6, xi, 0.1],
 72 |       [~x6, ~xi, 0.75]
 73 |    ]
 74 |   Xi:
 75 |     outcomes:
 76 |     - xi
 77 |     - ~xi
 78 |     parents:
 79 |     - X3
 80 |     - X4
 81 |     table: [
 82 |       [xi, x3, x4, 0.5],
 83 |       [xi, x3, ~x4, 0.65],
 84 |       [xi, ~x3, x4, 0.1],
 85 |       [xi, ~x3, ~x4, 0.25],
 86 |       [~xi, x3, x4, 0.5],
 87 |       [~xi, x3, ~x4, 0.35],
 88 |       [~xi, ~x3, x4, 0.9],
 89 |       [~xi, ~x3, ~x4, 0.75]
 90 |     ]
 91 |   Xj:
 92 |     outcomes:
 93 |     - xj
 94 |     - ~xj
 95 |     parents:
 96 |     - X6
 97 |     - X4
 98 |     - X5
 99 |     table: [
100 |       [xj, x6, x4, x5, 0.0],
101 |       [xj, x6, x4, ~x5, 0.25],
102 |       [xj, x6, ~x4, x5, 0.7],
103 |       [xj, x6, ~x4, ~x5, 0.45],
104 |       [xj, ~x6, x4, x5, 0.15],
105 |       [xj, ~x6, x4, ~x5, 0.8],
106 |       [xj, ~x6, ~x4, x5, 0.95],
107 |       [xj, ~x6, ~x4, ~x5, 0.05],
108 |       [~xj, x6, x4, x5, 1.0],
109 |       [~xj, x6, x4, ~x5, 0.75],
110 |       [~xj, x6, ~x4, x5, 0.3],
111 |       [~xj, x6, ~x4, ~x5, 0.55],
112 |       [~xj, ~x6, x4, x5, 0.85],
113 |       [~xj, ~x6, x4, ~x5, 0.2],
114 |       [~xj, ~x6, ~x4, x5, 0.05],
115 |       [~xj, ~x6, ~x4, ~x5, 0.95]
116 |     ]
117 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center" style="border-bottom: none;">do-calculus</h1>
 2 | <h3 align="center">A Python implementation of the <i>do-calculus</i> of Judea Pearl et. al.</h3>
 3 | <p align="center">
 4 |     <a href="https://github.com/bradendubois/do-calculus/actions?query=workflow%3ATest+branch%3Amain">
 5 |         <img alt="Test Workflows" src="https://github.com/bradendubois/do-calculus/workflows/Test and Release/badge.svg">
 6 |     </a>
 7 |     <a href='https://coveralls.io/github/bradendubois/do-calculus?branch=main'>
 8 |         <img src='https://coveralls.io/repos/github/bradendubois/do-calculus/badge.svg?branch=main' alt='Coverage Status' />
 9 |     </a>
10 |     <a href="https://pypi.org/project/do-calculus/">
11 |         <img alt="" src="https://pypip.in/v/do-calculus/badge.svg">
12 |     </a>
13 |     <a href="https://pypi.org/project/do-calculus/">
14 |         <img alt="" src="https://pypip.in/wheel/do-calculus/badge.svg">
15 |     </a>
16 |     <a href="https://github.com/semantic-release/semantic-release">
17 |         <img alt="semantic-release" src="https://img.shields.io/badge/%20%20%F0%9F%93%A6%F0%9F%9A%80-semantic--release-e10079.svg">
18 |     </a>
19 | </p>
20 | 
21 | ## Resources
22 | 
23 | * **Documentation / Wiki**: [github.com/bradendubois/do-calculus/wiki](https://github.com/bradendubois/do-calculus/wiki)
24 | * **Source Code**: [github.com/bradendubois/do-calculus](https://github.com/bradendubois/do-calculus)
25 | * **PyPI**: [pypi.org/project/do-calculus/](https://pypi.org/project/do-calculus/)
26 | * **Releases**: [github.com/bradendubois/do-calculus/releases](https://github.com/bradendubois/do-calculus/releases)
27 | * **Bug reports**: [github.com/bradendubois/do-calculus/issues](https://github.com/bradendubois/do-calculus/issues)
28 | * **Contact**: [braden.dubois@usask.ca](mailto:braden.dubois@usask.ca)
29 | 
30 | See the [wiki](https://github.com/bradendubois/do-calculus/wiki) to get started.
31 | 
32 | 
33 | ## Development Status
34 | 
35 | A full overhaul has been completed, and marks important milestones in my life and (consequently) this project's. Development on this project is halted until further notice, barring further changes to my own life, or necessary bug fixes and/or security fixes.
36 | 
37 | ## Acknowledgements
38 | 
39 | This project represents approximately two years of part- and full-time work as part of an indescribably fulfilling undergraduate research project. This project was done under the supervision of Dr. Eric Neufeld from approximately Spring 2020 - Winter 2022. Without Dr. Neufeld's support, guidance, patience, expertise (and of course, funding), this project would never have been started, let alone completed. I cannot overstate my appreciation - you've changed my academic and professional path, and provided me with so many wonderful experiences and memories that I will never forget. Thanks, Eric.
40 | 


--------------------------------------------------------------------------------
/archive/build_wiki.py:
--------------------------------------------------------------------------------
 1 | from inspect import getmembers, getdoc, getsource, isclass, ismethod, signature, Signature
 2 | from os import chdir
 3 | from os.path import abspath, dirname
 4 | from pathlib import Path
 5 | 
 6 | from do.API import Do
 7 | import do.structures.Exceptions
 8 | 
 9 | 
10 | def api_docstring_description(function_name):
11 | 
12 |     def parameter_signature(parameter_item):
13 |         parameter_key, parameter_value = parameter_item
14 |         return f"#### {parameter_key}\n```py\n{parameter_value.annotation}\n```"
15 | 
16 |     name = str(function_name.__name__)
17 |     function_signature = signature(function_name, follow_wrapped=True)
18 | 
19 |     title = f"## Function Signature - Do.{name}\n"
20 |     print(function_signature)
21 |     # header = source.split("\n")[0][:-1].split(" ", maxsplit=1)[1].strip(" ")
22 |     header = f"### Header\n\n```py\ndef {function_signature}\n```\n"
23 |     
24 |     parameters = "### Parameters\n\n" + "\n".join(map(parameter_signature, function_signature.parameters.items()))
25 |     if len(function_signature.parameters) == 0:
26 |         parameters = "### Parameters\n\n**None**\n"
27 |     
28 |     if function_signature.return_annotation is not Signature.empty:
29 |         return_annotation = function_signature.return_annotation
30 |     else:
31 |         return_annotation = "None"
32 | 
33 |     return_value = f"### Return Value\n\n```py\n{return_annotation}\n```\n"
34 |     
35 |     sections = [title, header, parameters, return_value]
36 |     
37 |     return "\n".join(sections) + "\n<hr />\n"
38 | 
39 | 
40 | def exception_description(exception_name):
41 |     return f"## {exception_name}\n\n> {getdoc(exception_name)}\n\n"
42 | 
43 | 
44 | def populate_wiki_stubs():
45 | 
46 |     chdir(dirname(abspath(__file__)))
47 | 
48 |     api_signatures = {name: api_docstring_description(method) for (name, method) in
49 |                       getmembers(Do(model=None), predicate=ismethod)}
50 | 
51 |     exceptions = {name: exception_description(exception) for (name, exception) in
52 |                   getmembers(do.structures.Exceptions, predicate=isclass)}
53 | 
54 |     wiki_dir = Path("pages")
55 | 
56 |     for file in wiki_dir.iterdir():
57 |         if not file.is_file():
58 |             continue
59 | 
60 |         text = file.read_text().splitlines()
61 | 
62 |         found = False
63 |         for line, content in enumerate(text):
64 |             if content.startswith("STUB"):
65 |                 stub, replace = content.split("|")
66 |                 if replace in api_signatures:
67 |                     text[line] = api_signatures[replace]
68 |                 elif replace == "exceptions":
69 |                     text[line] = "\n\n".join(exceptions.values())
70 |                 found = True
71 | 
72 |         if found:
73 |             file.write_text("\n".join(text))
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     populate_wiki_stubs()
78 | 


--------------------------------------------------------------------------------
/archive/Markovian Models.md:
--------------------------------------------------------------------------------
 1 | This document outlines the structure of how to create and represent a Markovian model for use in the package, such as in the [[API|Do API]].
 2 | 
 3 | Models are inherently **DAGs** (Directed Acyclic Graph), where each variable in a model is also represented as a vertex in the DAG.
 4 | 
 5 | ## Model Structure
 6 | 
 7 | A model is represented as dictionary, mapping the name of one variable in the model to its detailed information.
 8 | 
 9 | A variable's detailed information consists of the following key-value pairs:
10 | - ``outcomes``: all discrete outcomes the variable may take, represented as a list.
11 | - ``parents``: parent variables (also defined in the model) of the current variable, represented as a list.
12 |   - If the variable is a root - that is, there are no parents - the list can be left empty, or this key can be absent from this variable entirely.
13 |   - If there are *unobservable parents* - that is, *latent variables* - they must be listed *after* all observable parents, but in any particular order.
14 | - ``table``: a list of lists, representing the probability distribution of the variable. Each sub-list is one unique combination of outcomes of the given variable and each of its parents, along with a probability between 0 and 1.
15 |   - The order of the observable parent variables must correspond to the order given in the ``parents`` entry, if there are any.
16 |   - to represent a latent variable, **omit** this key.
17 |   
18 | Additionally, a key ``name`` can be given, corresponding to an arbitrary name for the model.
19 | 
20 | ## Files
21 | 
22 | Models can be stored in ``json`` or ``yml`` files, and must have either ``.json``, ``.yml``, or ``.yaml`` file extensions.
23 | - A handful of models are stored in ``do/graphs``.
24 | 
25 | ## Dictionaries
26 | 
27 | A model can also be stored as a Python dictionary directly, and loaded into an instance of the [[API|Do API]].
28 | 
29 | ### Example
30 | 
31 | Here is an example of a very simple model in **yml**:
32 | 
33 | ```yaml
34 | name: Simple Model
35 | model:
36 |   Y:
37 |     outcomes:
38 |       - y
39 |       - ~y
40 |     table: [
41 |      [ y, 0.7 ],
42 |      [ ~y, 0.3 ]
43 |     ]
44 |   X:
45 |     outcomes:
46 |     - x
47 |     - ~x
48 |     parents: [ Y ]
49 |     table: [
50 |       [ x, y, 0.9 ],
51 |       [ x, ~y, 0.75 ],
52 |       [ ~x, y, 0.1 ],
53 |       [ ~x, ~y, 0.25 ]
54 |     ]
55 | ```
56 | 
57 | This represents the basic graph of a single edge, (Y, X).
58 | - ``Y`` has no parents, it is a root.
59 | 
60 | #### Dictionary
61 | 
62 | Here is the [above example](#example), represented as a Python dictionary.
63 | 
64 | ```py
65 | m = {
66 |   "name": "Simple Model",
67 |   "model": {
68 |     "Y": {
69 |         "outcomes": ["y", "~y"],
70 |         "table": [
71 |           ["y", 0.7], 
72 |           ["~y", 0.3]
73 |         ] 
74 |     },
75 |     "X": {
76 |       "outcomes": ["x", "~x" ],
77 |       "parents": [ "Y" ],
78 |       "table": [
79 |         ["x", "y", 0.9],
80 |         ["x", "~y", 0.75],
81 |         ["~x", "y", 0.1],
82 |         ["~x", "~y", 0.25]
83 |       ]
84 |     }
85 |   }
86 | }
87 | ```
88 | 
89 | Both representations can be used in the [[API|Do API]]; if a string path to a file is given, an attempt will be made to load and parse it.
90 | 


--------------------------------------------------------------------------------
/archive/Definitions.md:
--------------------------------------------------------------------------------
 1 | WORK IN PROGRESS (while we iron out who says what!)
 2 | 
 3 | - Vertices
 4 | - Edges
 5 | - Path
 6 | - Backdoor Path
 7 | - Confounding / Deconfounding
 8 | - Markovian Model / semi-Markovian model / causal Bayesian network
 9 | - Parents(V)
10 | 
11 | ## Tian & Pearl, 2004
12 | 
13 | > The most common such representation involves a Markovian model (also known as a causal Bayesian network). A Markovian model consists of a DAG G over a set V = {V1, ..., Vn } of variables, called a *causal graph*.
14 | 
15 | - Tian & Pearl, 2004, p. 562
16 | 
17 | > The probabilistic interpretation views *G* as representing conditional independence assertions: Each variable is independent of all its non-descendants given its direct parents in the graph. These assertions imply that the joint probability function *P(v) = P(v_1, ..., v_n)* factorizes according to the product *P(v) = Π_{i} P(V_i | pa_i)* where *pa_i* are (values of) the parents of variable *V_i* in the graph.
18 | 
19 | - Tian & Pearl, 2004, p. 562
20 | 
21 | *pa_i* is **exclusive**.
22 | 
23 | > Let *V* and *U* stand for the sets of observed and unobserved variables, respectively. In this paper, we assume that no *U* variable is a descendant of any *V* variable (called a semi-Markovian model). Then the observed probability distribution, *P(v)*, becomes a mixture of products: *P(v) = Σ_{u} Π_{i} P(v_{i} | pa_i, u^i) P(u)* where *Pa_i* and *U^i* stand for the sets of the observed and unobserved parents of *V_i*, and the summation ranges over all the *U* variables.
24 | 
25 | - Tian & Pearl, 2004, p. 562
26 | 
27 | ## Santu Tikku, 2018
28 | 
29 | > For a directed graph G = (V, E) and a set of vertices W ⊆ V the sets Pa(W)_G , Ch(W)_G, An(W)_G and De(W)_G denote a set that
30 | contains W in addition to its parents, children, ancestors and descendants in G, respectively.
31 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8
32 | 
33 | Inclusive with given set.
34 | 
35 | > Contrary to usual graph theoretic conventions, we call a vertex without any descendants a root (typically referred to as sink). The root set of G is the set of all roots of G, which is {X ∈ V | De(X)G \ {X} = ∅}. The reason for this reversal of the names of sinks and roots is to retain consistency  with relevant literature (e.g. Shpitser and Pearl, 2006b) and other
36 | important definitions.
37 |  
38 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8
39 | 
40 | > When a DAG is considered, we can relate an ordering of its vertices to its topological structure. This is useful especially when a causal interpretation is associated with the graph. A topological ordering π of a DAG G = (V, E) is an ordering of its vertices, such that if X is an ancestor of Y in G then X < Y in π. The subset of vertices that are less than V_j in π is denoted by V_π^{j-1}. 
41 | 
42 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8
43 | 
44 | > An algorithm by Kahn (1962) can be used to derive a topological ordering for any DAG.  First, we add the vertices without ancestors to the ordering in any order. At the next stage, we add all vertices such that their parents are already contained in the ordering. This is repeated until every vertex has been included. It should be noted that a DAG may have more than one ordering. 
45 | 
46 | - Santtu Tikku, Improving Identification Algorithms in Causal Inference, 2018, p. 8
47 | 
48 | - Backdoor Paths
49 | - Definitions
50 | - Sorting for ordering
51 | 


--------------------------------------------------------------------------------
/archive/Probability Queries.md:
--------------------------------------------------------------------------------
 1 | How to measure probabilities using the **Do** API.
 2 | 
 3 | STUB|p
 4 | 
 5 | ## Basic Query
 6 | 
 7 | For this, we will query a standard probability through the **Do** API.
 8 | 
 9 | This will use the [[simple model from Markovian Models|Markovian Models]].
10 | 
11 | ```python
12 | from do.API import Do
13 | from do.structures.VariableStructures import Outcome
14 | 
15 | m = {
16 |   "name": "Simple Model",
17 |   "model": {
18 |     "Y": {
19 |         "outcomes": ["y", "~y"],
20 |         "table": [
21 |           ["y", 0.7], 
22 |           ["~y", 0.3]
23 |         ] 
24 |     },
25 |     "X": {
26 |       "outcomes": ["x", "~x" ],
27 |       "parents": [ "Y" ],
28 |       "table": [
29 |         ["x", "y", 0.9],
30 |         ["x", "~y", 0.75],
31 |         ["~x", "y", 0.1],
32 |         ["~x", "~y", 0.25]
33 |       ]
34 |     }
35 |   }
36 | }
37 | 
38 | do_api = Do(m)
39 | 
40 | x = Outcome("X", "x")
41 | y = Outcome("Y", "y")
42 | 
43 | x_alone = do_api.p({x})
44 | print(f"The probability of X=x, P(X=x) = {x_alone:5}")
45 | 
46 | x_if_y = do_api.p({x}, {y})
47 | print(f"The probability of P(X=x | Y=y) = {x_if_y:5}")
48 | 
49 | x_and_y = do_api.p({x, y}, set())
50 | print(f"The probability of P(X=x, Y=y) = {x_and_y:5}")
51 | ```
52 | 
53 | **Important**:
54 | - The representation of a variable in the model having some *observed* value is implemented as an **Outcome** object.
55 | - The creation of an Outcome object is to supply the *name* of the variable, and *some outcome of this variable*.
56 | - The Outcome class is located at ``do.structures.VariableStructures``.
57 | - The API function provided in **Do** to query a probability is the ``p`` function.
58 | - **Do.p** takes *two* arguments, a *Collection of outcome outcomes*, and a *Collection of "given" outcomes*.
59 | - **Do.p** can take an *empty collection* if there are no "given" outcomes.
60 | - **Do.p** can *completely omit* the "given" collection of outcomes if there are none.
61 | - **Do.p** can have its "given" collection of outcomes explicitly set to ``None`` if there are none.
62 | - **Do.p** returns a *float*, between [0, 1].
63 | 
64 | ## Querying an Interventional Measurement
65 | 
66 | Assume the existence of some more complicated model, ``m_confounded``, in which multiple variables are susceptible to *backdoor paths* or *confounding*, but a sufficient *deconfounding set* can block all backdoor paths.
67 | - See [[Literature]] for more details on *backdoor paths* and *deconfounding*.
68 | 
69 | ```python
70 | from do.API import Do
71 | from do.structures.VariableStructures import Outcome, Intervention
72 | 
73 | # Assume this were some more complicated model...
74 | m_confounding = dict()
75 | 
76 | do_api = Do(m_confounding)
77 | 
78 | x = Outcome("X", "x")
79 | 
80 | y_outcome = Outcome("Y", "y")
81 | y_intervention = Intervention("Y", "y")
82 | 
83 | x_y = do_api.p({x}, {y_outcome})
84 | x_do_y = do_api.p({x}, {y_intervention})
85 | 
86 | if x_y != x_do_y:
87 |     print(f"P(X=x | Y=y) ({x_y:5}) != P(X=x | do(Y=y)) ({x_do_y:5}): Y shows causal influence over X!")
88 | ```
89 | 
90 | **Important**:
91 | - A *treatment* or *intervention* is represented by the **Intervention** object.
92 | - The Intervention class is located at ``do.structures.VariableStructures``, the same as the Outcome class.
93 | - The Intervention class takes the same arguments as the Outcome class.
94 | - Queries involving interventions use **Do.p** just as standard queries do.
95 | - The "given" / body of a query is a *Collection* of Outcomes and Interventions.
96 | 


--------------------------------------------------------------------------------
/archive/__init__.md:
--------------------------------------------------------------------------------
  1 | How to instantiate the **Do** API.
  2 | 
  3 | STUB|__init__
  4 | 
  5 | ## Examples
  6 | 
  7 | One can provide a model, and specify what details and results to print and/or log to a file.
  8 | 
  9 | ```python
 10 | from pathlib import Path
 11 | from do.API import Do
 12 | 
 13 | file = Path("output/model1.yml")
 14 | f = file.open("w")
 15 | 
 16 | do_api = Do(
 17 |     model=m,
 18 |     print_detail=False,
 19 |     print_result=True,
 20 |     log=True,
 21 |     log_fd=f
 22 | )
 23 | ```
 24 | 
 25 | **Note**: Here, ``m`` is not defined, but multiple examples will follow, detailing acceptable forms of ``m``.
 26 | 
 27 | **Important**:
 28 | - Since **Do** is a class, multiple instances of **Do** - each with their own model - can be instantiated in one project at a time.
 29 | - Various parameters of outputting and logging details can be [[tweaked|Output]].
 30 | 
 31 | <hr />
 32 | 
 33 | ### Model: Python dictionary
 34 | 
 35 | One can have a model represented as a dictionary, and pass this as a *constructor argument* to instantiate **Do**.
 36 | 
 37 | ```python
 38 | from pathlib import Path
 39 | from do.API import Do
 40 | 
 41 | m = {
 42 |   "name": "Simple Model",
 43 |   "model": {
 44 |     "Y": {
 45 |         "outcomes": ["y", "~y"],
 46 |         "table": [
 47 |           ["y", 0.7], 
 48 |           ["~y", 0.3]
 49 |         ] 
 50 |     },
 51 |     "X": {
 52 |       "outcomes": ["x", "~x" ],
 53 |       "parents": [ "Y" ],
 54 |       "table": [
 55 |         ["x", "y", 0.9],
 56 |         ["x", "~y", 0.75],
 57 |         ["~x", "y", 0.1],
 58 |         ["~x", "~y", 0.25]
 59 |       ]
 60 |     }
 61 |   }
 62 | }
 63 | 
 64 | file = Path("output/model1.yml")
 65 | f = file.open("w")
 66 | 
 67 | do_api = Do(
 68 |     model=m,
 69 |     print_detail=False,
 70 |     print_result=True,
 71 |     log=True,
 72 |     log_fd=f
 73 | )
 74 | ```
 75 | 
 76 | **Important**
 77 | - A regular Python dictionary representation of a [[Markovian model|Markovian Models]] is valid input to **Do**.
 78 | 
 79 | <hr />
 80 | 
 81 | ### Model: string path to a file
 82 | 
 83 | One can also have a file contain a valid model, and pass the *path* to the file as input as well.
 84 | 
 85 | ```python
 86 | from do.API import Do
 87 | 
 88 | model_path = "data/graph1.yml"
 89 | do_api = Do(model_path)         # All good!
 90 | 
 91 | fake_path = "does/not/exist.file"
 92 | do_api_2 = Do(fake_path)        # This will raise an exception!
 93 | ```
 94 | 
 95 | **Important**:
 96 | - A *string path* is valid to pass to **Do**.
 97 | - If the file cannot be found or parsed, an exception will be raised.
 98 | 
 99 | <hr />
100 | 
101 | ### Model: pathlib.Path
102 | 
103 | One can also provide a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) object, as part of the [pathlib library](https://docs.python.org/3/library/pathlib.html).
104 | - **Trivia**: Providing a [string path to a file](#model-string-path-to-a-file) works by attempting to create a [Path](https://docs.python.org/3/library/pathlib.html#pathlib.Path) from the string path.
105 | 
106 | ```python
107 | from pathlib import Path
108 | from do.API import Do
109 | 
110 | model_path = Path("graph2.yml")
111 | do_api = Do(model_path)
112 | ```
113 | 
114 | <hr />
115 | 
116 | ### Model: None
117 | 
118 | One can also create an instance of **Do**, in which no model is provided, and instead [[defer loading the model until later|Loading a Model]].
119 | 
120 | ```python
121 | from do.API import Do
122 | 
123 | do_api = Do(model=None, print_result=True)
124 | ```
125 | 
126 | **Important**
127 | - If no model is loaded, any relevant API functionality will fail until a model [[has been loaded|Loading a Model]].
128 | 


--------------------------------------------------------------------------------
/.github/workflows/test_and_release.yml:
--------------------------------------------------------------------------------
  1 | name: Test and Release
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches: [ main, beta, develop ]
  7 |   pull_request:
  8 |     branches: [ main ]
  9 | 
 10 | jobs:
 11 |   test:
 12 |     strategy:
 13 |       matrix:
 14 |         os:
 15 |         - ubuntu-latest
 16 |         - macos-latest
 17 |         - windows-latest
 18 |         python-version: [ '3.9', '3.10' ]
 19 |         report-coverage: [ false ]
 20 | 
 21 |         # Special matrix job to report coverage only once
 22 |         include:
 23 |           - os: ubuntu-latest
 24 |             python-version: '3.10'
 25 |             report-coverage: true
 26 | 
 27 |     runs-on: ${{ matrix.os }}
 28 | 
 29 |     steps:
 30 |     - uses: actions/checkout@v2
 31 | 
 32 |     - name: Setup Python
 33 |       uses: actions/setup-python@v2
 34 |       with:
 35 |         python-version: ${{ matrix.python-version }}
 36 | 
 37 |     - name: Install dependencies
 38 |       run: |
 39 |         python -m pip install --upgrade pip
 40 |         python -m pip install flake8 pytest coverage coveralls
 41 |         pip install -r requirements.txt
 42 | 
 43 |     - name: Lint with flake8
 44 |       run: |
 45 |         # stop the build if there are Python syntax errors or undefined names
 46 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
 47 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 48 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 49 | 
 50 |     - name: Test with pytest / coverage
 51 |       run: |
 52 |         coverage run -m pytest
 53 | 
 54 |     - name: Report Coverage w/Coveralls
 55 |       uses: AndreMiras/coveralls-python-action@develop
 56 |       if: ${{ matrix.report-coverage }}
 57 | 
 58 |   # Tag and create a new release - handled by semantic-release
 59 |   tag:
 60 |     runs-on: ubuntu-latest
 61 |     needs: test   # Only consider creating a release if  all tests pass
 62 | 
 63 |     # Only create a release on main / beta
 64 |     if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/beta'
 65 |     steps:
 66 |     - uses: actions/checkout@v2
 67 |     - name: Create Release
 68 |       env:
 69 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 70 |       run: npx semantic-release
 71 | 
 72 |   # Package and upload to PyPI
 73 |   pypi:
 74 |     runs-on: ubuntu-latest
 75 |     needs: tag  # Need to get the latest version number, which may change based on semantic-release
 76 | 
 77 |     # Only upload to PyPI on main
 78 |     if: github.ref == 'refs/heads/main'
 79 |     steps:
 80 |       - name: Checkout
 81 |         uses: actions/checkout@v2
 82 | 
 83 |       - name: Setup Python
 84 |         uses: actions/setup-python@v2
 85 |         with:
 86 |           python-version: ${{ matrix.python-version }}
 87 | 
 88 |       - name: Install dependencies
 89 |         run: |
 90 |           python -m pip install --upgrade pip
 91 |           python -m pip install flake8 pytest coverage coveralls
 92 |           pip install -r requirements.txt
 93 | 
 94 |       - name: Get Version Number
 95 |         uses: oprypin/find-latest-tag@v1
 96 |         id: tag
 97 |         with:
 98 |           repository: ${{ github.repository }}
 99 |           releases-only: true
100 | 
101 |       - name: Build PyPI Package
102 |         run: |
103 |           python setup.py sdist bdist_wheel
104 |         env:
105 |           SEMANTIC_VERSION: ${{ steps.tag.outputs.tag }}
106 | 
107 |       - name: PyPI Publish
108 |         uses: pypa/gh-action-pypi-publish@v1.4.2
109 |         with:
110 |           user: __token__
111 |           password: ${{ secrets.PYPI_API_TOKEN }}
112 | 


--------------------------------------------------------------------------------
/do/core/Model.py:
--------------------------------------------------------------------------------
  1 | from json import load as json_load
  2 | from pathlib import Path
  3 | from typing import Collection, Mapping
  4 | from loguru import logger
  5 | from yaml import safe_load as yaml_load
  6 | 
  7 | from .ConditionalProbabilityTable import ConditionalProbabilityTable
  8 | from .Exceptions import MissingVariable
  9 | from .Graph import Graph
 10 | from .Variables import Variable
 11 | 
 12 | 
 13 | class Model:
 14 | 
 15 |     def __init__(self, graph: Graph, variables: Mapping[str, Variable], distribution: Mapping[Variable, ConditionalProbabilityTable]):
 16 |         self._g = graph.copy()
 17 |         self._v = {k: variables[k] for k in variables}
 18 |         self._d = {k: distribution[k] for k in distribution}
 19 | 
 20 |     def graph(self) -> Graph:
 21 |         return self._g
 22 | 
 23 |     def variable(self, key: str) -> Variable:
 24 |         if key not in self._v:
 25 |             logger.error(f"unknown variable: {key}")
 26 |             raise MissingVariable(key)
 27 |         return self._v[key]
 28 | 
 29 |     def table(self, key: str) -> ConditionalProbabilityTable:
 30 |         if key not in self._v:
 31 |             logger.error(f"unknown variable: {key}")
 32 |             raise MissingVariable(key)
 33 |         return self._d[key]
 34 | 
 35 |     def all_variables(self) -> Collection[Variable]:
 36 |         return self._v.values()
 37 | 
 38 | 
 39 | def from_dict(data: dict) -> Model:
 40 |     return parse_model(data)
 41 | 
 42 | 
 43 | def from_path(p: Path) -> Model:
 44 |     if not p.exists() or not p.is_file():
 45 |         raise FileNotFoundError
 46 | 
 47 |     if p.suffix == ".json":
 48 |         return parse_model(json_load(p.read_text()))
 49 | 
 50 |     elif p.suffix in [".yml", ".yaml"]:
 51 |         return parse_model(yaml_load(p.read_text()))
 52 | 
 53 |     else:
 54 |         raise Exception(f"Unknown extension for {p}")
 55 | 
 56 | def parse_model(data: dict) -> Model:
 57 | 
 58 |     """
 59 |     variables: maps string name to the Variable object instantiated
 60 |     outcomes: maps string name *and* corresponding Variable to a list of outcome values
 61 |     tables: maps strings/Variables to corresponding ConditionalProbabilityTables
 62 |     """
 63 |     variables = dict()
 64 |     outcomes = dict()
 65 |     tables = dict()
 66 | 
 67 |     for name, detail in data["endogenous"].items():
 68 | 
 69 |         # Load the relevant data to construct a Variable
 70 |         v_outcomes = detail["outcomes"] if "outcomes" in detail else []
 71 |         v_parents = detail["parents"] if "parents" in detail else []
 72 | 
 73 |         # Create a Variable object
 74 |         variable = Variable(name, v_outcomes, v_parents)
 75 | 
 76 |         # Lookup the object by its name
 77 |         variables[name] = variable
 78 | 
 79 |         # Store by both the Variable object as well as its name, for ease of access
 80 |         outcomes[name] = v_outcomes
 81 |         outcomes[variable] = v_outcomes
 82 | 
 83 |         # Load in the table and construct a CPT
 84 |         table = detail["table"]
 85 |         cpt = ConditionalProbabilityTable(variable, v_parents, table)
 86 | 
 87 |         # Map the name/variable to the table
 88 |         tables[name] = cpt
 89 |         tables[variable] = cpt
 90 | 
 91 |     v = set(variables.keys())
 92 |     e = set()
 93 | 
 94 |     for child in variables.keys():
 95 |         e.update(list(map(lambda parent: (parent, child), variables[child].parents)))
 96 | 
 97 |     if "exogenous" in data:
 98 |         for variable, children in data["exogenous"].items():
 99 |             v.add(variable)
100 |             for c in children:
101 |                 v.add(c)
102 |                 e.add((variable, c))
103 | 
104 |     graph = Graph(v, e)
105 | 
106 |     return Model(graph, variables, tables)
107 | 


--------------------------------------------------------------------------------
/do/identification/API.py:
--------------------------------------------------------------------------------
 1 | from itertools import product
 2 | from typing import Mapping, Set, Tuple, Union
 3 | 
 4 | from ..core.Model import Model
 5 | from ..core.Variables import Intervention, Outcome
 6 | 
 7 | from .LatentGraph import latent_transform
 8 | from .Identification import Identification, simplify_expression
 9 | from .PExpression import PExpression, TemplateExpression
10 | 
11 | 
12 | class API:
13 | 
14 |     def identification(self, y: Set[Outcome], x: Set[Intervention], model: Model, include_proof: bool = True) -> Union[float, Tuple[float, str]]:
15 |         """
16 |         The Identification algorithm presented in Shpitser & Pearl, 2007.
17 | 
18 |         Args:
19 |             y (Set[Outcome]): A set of (outcome) variables.
20 |             x (Set[Intervention]): A set of (treatment) variables.
21 |             model (Model): The given model, which may include exogenous variables.
22 |             include_proof (bool, optional): Controls whether a proof should be generated along 
23 |                 with the expression and returned. Defaults to True.
24 | 
25 |         Raises:
26 |             Fail: Raises a Fail exception if the effect cannot be identified, containing the hedge
27 |                 causing the unidentifiability.
28 | 
29 |         Returns:
30 |             Union[float, Tuple[float, str]]: The result is represented as a float in the range [0, 1],
31 |             representing the resulting effect. Returns this float if include_proof is False. Returns
32 |             a tuple (result, proof) if include_proof is True, where proof is a string. 
33 |         """
34 | 
35 |         endogenous = set(model._v.keys())
36 |         exogenous = model._g.v - endogenous
37 | 
38 |         latent = latent_transform(model._g.copy(), exogenous)
39 |         
40 |         p = PExpression([], [TemplateExpression(x, list(latent.parents(x))) for x in latent.v])
41 |         expression = Identification({v.name for v in y}, {v.name for v in x}, p, latent, include_proof)
42 | 
43 |         def _process(current: Union[PExpression, TemplateExpression], known: Mapping[str, str]):
44 |             
45 |             if isinstance(current, TemplateExpression):
46 |                 t = model.table(current.head)
47 |                 return t.probability_lookup(Outcome(current.head, known[current.head]), [Outcome(v, known[v]) for v in model.variable(current.head).parents])
48 | 
49 |             elif len(current.sigma) == 0:
50 |                 i = 1
51 |                 for term in current.terms:
52 |                     i *= _process(term, known)
53 |                 return i
54 | 
55 |             else:
56 |                 t = 0
57 |                 for values in product(*[model.variable(v).outcomes for v in current.sigma]):
58 |                     i = 1
59 |                     for term in current.terms:
60 |                         i *= _process(term, known | dict(zip(current.sigma, values)))
61 |                     t += i
62 |                 return t
63 | 
64 |         result = _process(expression, {v.name: v.outcome for v in y} | {v.name: v.outcome for v in x})
65 |         return (result, expression.proof()) if include_proof else result
66 | 
67 |     def proof(self, y: Set[Outcome], x: Set[Intervention], model: Model) -> str:
68 |         """
69 |         Generates a proof for the effects of a given expression, as identified by ID (Shpitser & Pearl, 2007).
70 | 
71 |         Args:
72 |             y (Set[Outcome]): A set of (outcome) variables.
73 |             x (Set[Intervention]): A set of (treatment) variables.
74 |             model (Model): The given model, which may include exogenous variables.
75 | 
76 |         Raises:
77 |             Fail: Raises a Fail exception if the effect cannot be identified, containing the hedge
78 |                 causing the unidentifiability.
79 | 
80 |         Returns:
81 |             str: A string proof for the effect identified.
82 |         """
83 | 
84 |         endogenous = set(model._v.keys())
85 |         exogenous = model._g.v - endogenous
86 | 
87 |         latent = latent_transform(model._g.copy(), exogenous)
88 |         
89 |         p = PExpression([], [TemplateExpression(x, list(latent.parents(x))) for x in latent.v])
90 |         expression = Identification({v.name for v in y}, {v.name for v in x}, p, latent, True)
91 |         return expression.proof()
92 | 


--------------------------------------------------------------------------------
/do/core/ConditionalProbabilityTable.py:
--------------------------------------------------------------------------------
  1 | from math import floor, ceil
  2 | from numpy import empty
  3 | from typing import List, Union
  4 | 
  5 | from .Exceptions import MissingTableRow
  6 | from .Variables import Variable, Outcome, Intervention
  7 | 
  8 | 
  9 | class ConditionalProbabilityTable:
 10 |     """
 11 |     A basic conditional probability table that reflects the values of one Variable, and any number of conditional
 12 |     values
 13 |     @param variable: A Variable object, representing the variable this table computes a probability for
 14 |     @param parents: A (possibly empty) list of Variables, representing the parents for the variable given
 15 |     @param table_rows: A list of rows in the table, each formatted as [<OUTCOME>, ["<GIVEN_1_OUTCOME>, ...], <P>]
 16 |     """
 17 | 
 18 |     # Padding units on the left/right sides of each cell
 19 |     padding = 1
 20 | 
 21 |     def __init__(self, variable: Variable, parents: List[str], table_rows: List):
 22 |         self.variable = variable    # The LHS of the table, single-variable only
 23 |         self.parents = parents          # The RHS/body of the table
 24 | 
 25 |         self.table_rows = []
 26 | 
 27 |         latent = len(parents) - (len(table_rows) - 2)
 28 | 
 29 |         # Clean up the rows; Each is formatted as: [outcome of variable, parent_1, parent_2, ..., probability]
 30 |         for row in table_rows:
 31 |             outcome = Outcome(variable.name, row[0])
 32 |             p = row[1:-1]
 33 | 
 34 |             self.table_rows.append([outcome, [Outcome(v, x) for v, x in zip(parents[:-latent], p)], row[-1]])
 35 | 
 36 |     def __str__(self) -> str:
 37 |         """
 38 |         String builtin for a ConditionalProbabilityTable
 39 |         @return: A string representation of the table.
 40 |         """
 41 | 
 42 |         # Create a snazzy numpy table
 43 |         # Rows: 1 for a header + 1 for each row; Columns: 1 for variable, 1 for each given var, 1 for the probability
 44 |         rows = 1 + len(self.table_rows)
 45 |         columns = 1 + len(self.parents) + 1
 46 | 
 47 |         # dtype declaration is better than "str", as str only allows one character in each cell
 48 |         table = empty((rows, columns), dtype='<U100')
 49 | 
 50 |         # Populate the first row: variable, given variables, probability
 51 |         table[0][0] = self.variable.name
 52 |         for i in range(len(self.parents)):
 53 |             table[0][i+1] = self.parents[i]
 54 |         table[0][table.shape[1]-1] = "P()"
 55 | 
 56 |         # Populate each row
 57 |         for i in range(len(self.table_rows)):
 58 |             row = self.table_rows[i]
 59 | 
 60 |             # Value of the given variable
 61 |             table[i+1][0] = row[0].outcome
 62 | 
 63 |             # Each given variable's value
 64 |             for given_idx in range(len(row[1])):
 65 |                 table[i+1][1+given_idx] = row[1][given_idx].outcome
 66 | 
 67 |             # The probability, to some modifiable number of digits
 68 |             table[i+1][table.shape[1]-1] = "{0:.{prec}f}".format(row[2], prec=5)
 69 | 
 70 |         # Wiggle/Padding, column by column
 71 |         for column_index in range(1 + len(self.parents) + 1):
 72 |             widest_element = max([len(cell) for cell in table[:, column_index]])
 73 |             for row_index in range(1 + len(self.table_rows)):
 74 |                 cell_value = table[row_index][column_index]
 75 |                 l_padding = ceil(((widest_element - len(cell_value)) / 2)) * " " + " " * self.padding
 76 |                 r_padding = floor(((widest_element - len(cell_value)) / 2)) * " " + " " * self.padding
 77 |                 table[row_index][column_index] = l_padding + cell_value + r_padding
 78 | 
 79 |         # Convert to fancy string
 80 |         string_list = ["|" + "|".join(row) + "|" for row in table]
 81 |         top_bottom_wrap = "-" * len("|" + "|".join(table[0]) + "|")
 82 |         string_list.insert(1, top_bottom_wrap)
 83 | 
 84 |         return top_bottom_wrap + "\n" + "\n".join(string_list) + "\n" + top_bottom_wrap
 85 | 
 86 |     def probability_lookup(self, outcome: Union[Outcome, Intervention], given: list) -> float:
 87 |         """
 88 |         Directly lookup the probability for the row corresponding to the queried outcome and given data
 89 |         @param outcome: The specific outcome to lookup
 90 |         @param given: A list of Outcome objects
 91 |         @return: A probability corresponding to the respective row. Raises an Exception otherwise.
 92 |         """
 93 |         for row_outcome, row_given, row_p in self.table_rows:
 94 |             # If the outcome for this row matches, and each outcome for the given data matches...
 95 |             if outcome == row_outcome and set(row_given) == set(given):
 96 |                 return row_p       # We have our answer
 97 | 
 98 |         # Iterated over all the rows and didn't find the correct one
 99 |         print(f"Couldn't find row: {outcome} | {', '.join(map(str, given))}")
100 |         raise MissingTableRow
101 | 


--------------------------------------------------------------------------------
/do/deconfounding/Do.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from loguru import logger
  3 | from typing import Collection
  4 | 
  5 | from ..core.Expression import Expression
  6 | from ..core.Inference import inference
  7 | from ..core.Model import Model
  8 | from ..core.Variables import Outcome, Intervention
  9 | 
 10 | from .Backdoor import backdoors, deconfound
 11 | from .Exceptions import NoDeconfoundingSet
 12 | 
 13 | 
 14 | def treat(expression: Expression, interventions: Collection[Intervention], model: Model) -> float:
 15 |     
 16 |     head = set(expression.head())
 17 |     body = set(expression.body())
 18 | 
 19 |     print(head, body, interventions)
 20 | 
 21 |     # If there are no Interventions, we can compute a standard query
 22 |     if len(interventions) == 0:
 23 |         return inference(expression, model)
 24 | 
 25 |     # There are interventions; may need to find some valid Z to compute
 26 |     else:
 27 | 
 28 |         paths = backdoors(interventions, head, model.graph(), body)
 29 | 
 30 |         # No backdoor paths; augment graph space and compute
 31 |         if len(paths) == 0:
 32 |             logger.info(f"no backdoor paths; translating into standard inference query")
 33 |             expression_transform = Expression(expression.head(), set(expression.body()) | set(Outcome(x.name, x.outcome) for x in interventions))
 34 |             logger.info(f"translated expression: {expression_transform}")
 35 |             logger.info(f"disabling incoming edges on graph: {[x.name for x in interventions]}")
 36 |             model.graph().disable_incoming(*interventions)
 37 |             p = inference(expression_transform, model)
 38 |             logger.info("resetting edge transformations")
 39 |             model.graph().reset_disabled()
 40 |             return p
 41 | 
 42 |         # Backdoor paths found; find deconfounding set to compute
 43 |         # Find all possible deconfounding sets, and use possible subsets
 44 |         logger.info("computing deconfounding sets")
 45 |         deconfounding_sets = deconfound(interventions, head, model.graph())
 46 |         logger.info(f"resulting deconfounding sets: {deconfounding_sets}")
 47 | 
 48 |         # Filter down the deconfounding sets not overlapping with our query body
 49 |         vertex_dcf = list(filter(lambda s: len(set(s) & {x.name for x in body}) == 0, deconfounding_sets))
 50 |         if len(vertex_dcf) == 0:
 51 |             raise NoDeconfoundingSet
 52 | 
 53 |         # Compute with every possible deconfounding set as a safety measure; ensuring they all match
 54 |         probability = None  # Sentinel value
 55 |         for z_set in vertex_dcf:
 56 | 
 57 |             result = _marginalize_query(expression, interventions, z_set, model)
 58 |             if probability is None:  # Storing first result
 59 |                 probability = result
 60 | 
 61 |             # If results do NOT match; error
 62 |             assert abs(result-probability) < 0.00000001,  f"Error: Distinct results: {probability} vs {result}"
 63 | 
 64 |         logger.info("{0} = {1:.5f}".format(Expression(head, set(body) | set(interventions)), probability, precision=1))
 65 |         return result
 66 | 
 67 | 
 68 | def _marginalize_query(expression: Expression, interventions: Collection[Intervention], deconfound: Collection[str], model: Model) -> float:
 69 |     """
 70 |     Handle the modified query where we require a deconfounding set due to Interventions / treatments.
 71 |     @param head: The head of the query, a set containing Outcome objects
 72 |     @param body: The body of the query, a set containing Outcome and Intervention objects
 73 |     @param dcf: A set of (string) names of variables to serve as a deconfounding set, blocking all backdoor paths
 74 |         between the head and body
 75 |     @return:
 76 |     """
 77 | 
 78 |     head = set(expression.head())
 79 |     body = set(expression.body())
 80 | 
 81 |     # Augment graph (isolating interventions as roots) and create engine
 82 |     model.graph().disable_incoming(*interventions)
 83 |     as_outcomes = {Outcome(x.name, x.outcome) for x in interventions}
 84 | 
 85 |     probability = 0.0
 86 | 
 87 |     # We take every possible combination of outcomes of Z and compute each probability separately
 88 |     for cross in product(*[model.variable(var).outcomes for var in deconfound]):
 89 | 
 90 |         # Construct the respective Outcome list of each Z outcome cross product
 91 |         z_outcomes = {Outcome(x, cross[i]) for i, x in enumerate(deconfound)}
 92 | 
 93 |         # First, we do P(Y | do(X), Z)
 94 |         ex1 = Expression(head, body | as_outcomes | z_outcomes)
 95 |         logger.info(f"computing sub-query: {ex1}")
 96 |         p_y_x_z = inference(ex1, model)
 97 | 
 98 |         # Second, P(Z)
 99 |         ex2 = Expression(z_outcomes, body | as_outcomes)
100 |         logger.info(f"computing sub-query: {ex2}")
101 |         p_z = inference(ex2, model)
102 | 
103 |         probability += p_y_x_z * p_z
104 | 
105 |     model.graph().reset_disabled()
106 |     return probability
107 | 


--------------------------------------------------------------------------------
/do/core/Variables.py:
--------------------------------------------------------------------------------
  1 | from re import findall, sub
  2 | 
  3 | 
  4 | class Outcome:
  5 |     """
  6 |     A basic "Outcome" of a variable, representing a specific outcome such as "X = x".
  7 |     This does essentially act as a Pair<string, string>-like.
  8 |     """
  9 | 
 10 |     def __init__(self, name: str, outcome: str):
 11 |         """
 12 |         Constructor for an Outcome
 13 |         @param name: The name of the variable. Ex: "X"
 14 |         @param outcome: The specific outcome of the variable. Ex: "x" or "~x"
 15 |         """
 16 |         self.name = name.strip()
 17 |         self.outcome = outcome.strip()
 18 | 
 19 |     def __str__(self) -> str:
 20 |         return self.name + " = " + self.outcome
 21 | 
 22 |     def __hash__(self) -> int:
 23 |         return hash(self.name + self.outcome)
 24 | 
 25 |     def __copy__(self):
 26 |         return Outcome(self.name, self.outcome)
 27 | 
 28 |     def copy(self):
 29 |         return self.__copy__()
 30 | 
 31 |     def __eq__(self, other) -> bool:
 32 |         if isinstance(other, str):
 33 |             return self.name == other
 34 |         return self.name == other.name and self.outcome == other.outcome and type(self) == type(other)
 35 | 
 36 | 
 37 | class Variable:
 38 |     """
 39 |     Represents a basic "Variable", as part of a Conditional Probability Table or the like.
 40 |     Has a name, list of potential outcomes, and some list of parent variables.
 41 |     """
 42 | 
 43 |     def __init__(self, name: str, outcomes: list, parents: list, descendants=None, topological_order=0):
 44 |         """
 45 |         A basic Variable for use in a CPT or Causal Graph
 46 |         @param name: The name of the Variable, "X"
 47 |         @param outcomes: A list of all potential outcomes of the variable: ["x", "~x"]
 48 |         @param parents: A list of strings representing the names of all the parents of this Variable
 49 |         @param descendants: An optional set of Variables which are reachable from this Variable
 50 |         @param topological_order: Used in the ordering of Variables as defined by a topological sort
 51 |         """
 52 |         self.name = name.strip()
 53 |         self.outcomes = [outcome.strip() for outcome in outcomes]
 54 |         self.parents = [parent.strip() for parent in parents]
 55 |         self.topological_order = topological_order
 56 | 
 57 |         if descendants is None:
 58 |             descendants = set()
 59 |         self.descendants = descendants
 60 | 
 61 |     def __str__(self) -> str:
 62 |         return self.name + ": <" + ",".join(self.outcomes) + ">, <-- " + ",".join(self.parents)
 63 | 
 64 |     def __hash__(self) -> int:
 65 |         return hash(self.name + str(self.outcomes) + str(self.parents))
 66 | 
 67 |     def __eq__(self, other) -> bool:
 68 |         if isinstance(other, str):
 69 |             return self.name == other
 70 | 
 71 |         return self.name == other.name and \
 72 |             set(self.outcomes) == set(other.outcomes) and \
 73 |             set(self.parents) == set(other.parents)
 74 | 
 75 |     def __copy__(self):
 76 |         return Variable(self.name, self.outcomes.copy(), self.parents.copy(), descendants=self.descendants.copy())
 77 | 
 78 |     def copy(self):
 79 |         return self.__copy__()
 80 | 
 81 | 
 82 | class Intervention(Outcome):
 83 |     """
 84 |     Represents an intervention; do(X).
 85 |     """
 86 | 
 87 |     def __init__(self, name: str, fixed_outcome: str):
 88 |         super().__init__(name, fixed_outcome)
 89 | 
 90 |     def __str__(self) -> str:
 91 |         return "do(" + self.name + "=" + self.outcome + ")"
 92 | 
 93 |     def __hash__(self):
 94 |         return hash(self.name + self.outcome)
 95 | 
 96 |     def __copy__(self):
 97 |         return Intervention(self.name, self.outcome)
 98 | 
 99 |     def copy(self):
100 |         return self.__copy__()
101 | 
102 | 
103 | def parse_outcomes_and_interventions(line: str) -> set:
104 |     """
105 |     Take one string line and parse it into a list of Outcomes and Interventions
106 |     @param line: A string representing the query
107 |     @return: A list, of Outcomes and/or Interventions
108 |     """
109 |     # "do(X=x)", "do(X=x, Y=y)", "do(X=x), do(Y=y)" are all valid ways to write interventions
110 |     interventions_preprocessed = findall(r'do\([^do]*\)', line)
111 |     interventions_preprocessed = [item.strip("do(), ") for item in interventions_preprocessed]
112 |     interventions = []
113 |     for string in interventions_preprocessed:
114 |         interventions.extend([item.strip(", ") for item in string.split(", ")])
115 | 
116 |     # Remove all the interventions, leaving only specific Outcomes
117 |     outcomes_preprocessed = sub(r'do\([^do]*\)', '', line).strip(", ").split(",")
118 |     outcomes_preprocessed = [item.strip(", ") for item in outcomes_preprocessed]
119 |     outcomes = [string for string in outcomes_preprocessed if string]
120 | 
121 |     # Convert the outcome and intervention strings into the specific Outcome and Intervention classes
122 |     outcomes = [Outcome(item.split("=")[0].strip(), item.split("=")[1].strip()) for item in outcomes]
123 |     interventions = [Intervention(item.split("=")[0].strip(), item.split("=")[1].strip()) for item in interventions]
124 | 
125 |     together = []
126 |     together.extend(outcomes)
127 |     together.extend(interventions)
128 | 
129 |     return set(together)
130 | 


--------------------------------------------------------------------------------
/tests/core/test_Graph.py:
--------------------------------------------------------------------------------
  1 | from do.core.Variables import Outcome, Intervention, Variable
  2 | from do.core.Graph import to_label
  3 | 
  4 | from ..source import models
  5 | graph = models["pearl-3.4.yml"]._g
  6 | 
  7 | 
  8 | def test_roots():
  9 |     assert sum(map(lambda v: len(graph.parents(v)), graph.roots())) == 0
 10 | 
 11 | 
 12 | def test_descendants():
 13 |     assert sum(map(lambda v: len(graph.children(v)), graph.sinks())) == 0
 14 | 
 15 | 
 16 | def test_parents():
 17 |     graph.reset_disabled()
 18 |     roots = graph.roots()
 19 |     for vertex in graph.v:
 20 |         parents = graph.parents(vertex)
 21 |         for parent in parents:
 22 |             assert (parent, vertex) in graph.e
 23 | 
 24 |         if vertex in roots:
 25 |             assert len(parents) == 0
 26 |         else:
 27 |             assert len(parents) > 0
 28 | 
 29 | 
 30 | def test_children():
 31 |     graph.reset_disabled()
 32 |     for vertex in graph.v:
 33 |         children = graph.children(vertex)
 34 |         for child in children:
 35 |             assert (vertex, child) in graph.e
 36 | 
 37 |         for child in children:
 38 |             assert vertex in graph.parents(child)
 39 | 
 40 | 
 41 | def test_ancestors():
 42 |     graph.reset_disabled()
 43 |     for vertex in graph.v:
 44 |         ancestors = graph.ancestors(vertex)
 45 |         for ancestor in ancestors:
 46 |             assert vertex in graph.descendants(ancestor)
 47 | 
 48 | 
 49 | def test_reach():
 50 |     graph.reset_disabled()
 51 |     for vertex in graph.v:
 52 |         descendants = graph.descendants(vertex)
 53 |         for descendant in descendants:
 54 |             assert vertex in graph.ancestors(descendant)
 55 | 
 56 | 
 57 | def test_disable_outgoing():
 58 | 
 59 |     graph.reset_disabled()
 60 | 
 61 |     for v in graph.v:
 62 |         children = graph.children(v)
 63 |         descendants = graph.descendants(v)
 64 |         graph.disable_outgoing(v)
 65 |         assert len(graph.children(v)) == 0
 66 |         assert len(graph.descendants(v)) == 0
 67 |         for child in children:
 68 |             assert v not in graph.parents(child)
 69 |         for descendant in descendants:
 70 |             assert v not in graph.ancestors(descendant)
 71 | 
 72 |     graph.reset_disabled()
 73 | 
 74 | 
 75 | def test_disable_incoming():
 76 | 
 77 |     graph.reset_disabled()
 78 | 
 79 |     for v in graph.v:
 80 |         parents = graph.parents(v)
 81 |         ancestors = graph.ancestors(v)
 82 |         graph.disable_incoming(v)
 83 |         assert len(graph.parents(v)) == 0
 84 |         assert len(graph.ancestors(v)) == 0
 85 |         for parent in parents:
 86 |             assert v not in graph.children(parent)
 87 |         for ancestor in ancestors:
 88 |             assert v not in graph.descendants(ancestor)
 89 | 
 90 |     graph.reset_disabled()
 91 | 
 92 | 
 93 | def test_topology_sort():
 94 | 
 95 |     topology = graph.topology_sort()
 96 | 
 97 |     print(topology)
 98 | 
 99 |     for i, v in enumerate(topology):
100 |         for before in topology[:i]:
101 |             assert before not in graph.descendants(v)
102 | 
103 |         for after in topology[i:]:
104 |             assert after not in graph.ancestors(v)
105 | 
106 | 
107 | def test_graph_copy():
108 | 
109 |     graph_2 = graph.copy()
110 | 
111 |     assert len(graph.v) == len(graph_2.v)
112 |     assert len(graph.e) == len(graph_2.e)
113 | 
114 |     assert graph.v is not graph_2.v
115 |     assert graph.e is not graph_2.e
116 | 
117 |     for v in graph.v:
118 |         assert v in graph_2.v
119 | 
120 |     for v in graph_2.v:
121 |         assert v in graph.v
122 | 
123 |     for e in graph.e:
124 |         assert e in graph_2.e
125 | 
126 |     for e in graph_2.e:
127 |         assert e in graph.e
128 | 
129 | 
130 | def test_without_incoming_edges():
131 | 
132 |     g = graph.copy()
133 | 
134 |     roots = g.roots()
135 |     root_children = set().union(*[g.children(x) for x in roots])
136 | 
137 |     nop = g.without_incoming_edges(roots)       # roots have no incoming; should change nothing
138 |     op = g.without_incoming_edges(root_children)     # sever initial roots
139 | 
140 |     assert g.v == nop.v and g.e == nop.e    # ensure no change
141 | 
142 |     assert g.v == op.v
143 |     assert g.e != op.e
144 |     assert len(g.e) > len(op.e)
145 |     assert len(op.roots()) > len(g.roots())
146 |     assert op.roots() == set(g.roots()) | root_children
147 | 
148 | 
149 | def test_without_outgoing_edges():
150 | 
151 |     g = graph.copy()
152 | 
153 |     sinks = g.sinks()
154 |     sink_parents = set().union(*[g.parents(x) for x in sinks])
155 | 
156 |     nop = g.without_outgoing_edges(sinks)       # sinks have no outgoing; should change nothing
157 |     op = g.without_outgoing_edges(sink_parents)     # sever initial sinks
158 | 
159 |     assert g.v == nop.v and g.e == nop.e    # ensure no change
160 | 
161 |     assert g.v == op.v
162 |     assert g.e != op.e
163 |     assert len(g.e) > len(op.e)
164 |     assert len(op.sinks()) > len(g.sinks())
165 |     assert op.sinks() == set(g.sinks()) | sink_parents
166 | 
167 | 
168 | def test_to_label():
169 |     outcome = Outcome("Xj", "xj")
170 |     intervention = Intervention("Xj", "xj")
171 |     variable = Variable("Xj", [], [])
172 | 
173 |     assert to_label(outcome) == outcome.name
174 |     assert to_label(intervention) == intervention.name
175 |     assert to_label(variable) == variable.name
176 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | braden.dubois@usask.ca.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified perid of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/do/core/Graph.py:
--------------------------------------------------------------------------------
  1 | from typing import Collection, Optional, Sequence, Set, Tuple, Union
  2 | 
  3 | from .Types import VClass, Vertex
  4 | 
  5 | 
  6 | class Graph:
  7 | 
  8 |     """A basic graph, with edge control."""
  9 | 
 10 |     def __init__(self, v: Set[str], e: Set[Tuple[str, str]], topology: Optional[Sequence[Union[str, VClass]]] = None):
 11 |         """
 12 |         Initializer for a basic Graph.
 13 |         @param v: A set of vertices
 14 |         @param e: A set of edges, each edge being (source, target)
 15 |         @param topology: An optional sequence of vertices defining the topological ordering of the graph
 16 |         """
 17 | 
 18 |         self.v = v
 19 |         self.e = {(s.strip(), t.strip()) for s, t in e}
 20 | 
 21 |         # Declare the keys (which are vertices)
 22 |         self.incoming = {vertex.strip(): set() for vertex in v}
 23 |         self.outgoing = {vertex.strip(): set() for vertex in v}
 24 | 
 25 |         for s, t in e:
 26 |             self.outgoing[s].add(t)
 27 |             self.incoming[t].add(s)
 28 | 
 29 |         self.outgoing_disabled = set()
 30 |         self.incoming_disabled = set()
 31 | 
 32 |         if not topology:
 33 |             topology = self.topology_sort()
 34 |         else:
 35 |             topology = list(filter(lambda x: x in v, topology))
 36 | 
 37 |         self.topology_map = {vertex: index for index, vertex in enumerate(topology, start=1)}
 38 | 
 39 |     def __str__(self) -> str:
 40 |         """
 41 |         String builtin for the Graph class
 42 |         @return: A string representation of the given Graph instance
 43 |         """
 44 |         msg = "Vertices: " + ", ".join(sorted(i for i in self.v)) + "\n"
 45 |         msg += "Edges:\n" + "\n".join(" -> ".join(i for i in edge) for edge in self.e)
 46 |         return msg
 47 | 
 48 |     def roots(self) -> Collection[str]:
 49 |         """
 50 |         Get the roots of the the graph G.
 51 |         @return: A set of vertices (strings) in G that have no parents.
 52 |         """
 53 |         return set([x for x in self.v if len(self.parents(x)) == 0])
 54 | 
 55 |     def sinks(self) -> Collection[str]:
 56 |         """
 57 |         Get the sinks of the graph G.
 58 |         @return: A collection of string vertices in G that have no descendants.
 59 |         """
 60 |         return set([x for x in self.v if len(self.children(x)) == 0])
 61 | 
 62 |     def parents(self, v: Vertex) -> Collection[Vertex]:
 63 |         """
 64 |         Get the parents of v, which may actually be currently controlled
 65 |         @param v: A variable in our graph
 66 |         @return: All parents reachable (which would be none if being controlled)
 67 |         """
 68 |         label = to_label(v)
 69 |         if label in self.incoming_disabled:
 70 |             return set()
 71 | 
 72 |         return {p for p in self.incoming[label] if p not in self.outgoing_disabled and p not in self.outgoing[label]}
 73 | 
 74 |     def children(self, v: Vertex) -> Collection[Vertex]:
 75 |         """
 76 |         Get the children of v, which may actually be currently controlled
 77 |         @param v: A variable in our graph
 78 |         @return: All children reachable (which would be none if being controlled)
 79 |         """
 80 |         label = to_label(v)
 81 |         if label in self.outgoing_disabled:
 82 |             return set()
 83 | 
 84 |         return {c for c in self.outgoing[label] if c not in self.incoming_disabled and c not in self.incoming[label]}
 85 | 
 86 |     def ancestors(self, v: Vertex) -> Collection[Vertex]:
 87 |         """
 88 |         Get the ancestors of v, accounting for disabled vertices
 89 |         @param v: The vertex to find all ancestors of
 90 |         @return: A set of reachable ancestors of v
 91 |         """
 92 | 
 93 |         ancestors = set()
 94 |         queue = []
 95 |         queue.extend(self.parents(v))
 96 | 
 97 |         while queue:
 98 |             current = queue.pop(0)
 99 |             ancestors.add(current)
100 |             queue.extend(self.parents(current))
101 | 
102 |         return ancestors
103 | 
104 |     def descendants(self, v: Vertex) -> Collection[Vertex]:
105 |         """
106 |         Get the reach of v, accounting for disabled vertices
107 |         @param v: The vertex to find all descendants of
108 |         @return: A set of reachable descendants of v
109 |         """
110 | 
111 |         children = set()
112 |         queue = []
113 |         queue.extend(list(self.children(v)))
114 | 
115 |         while queue:
116 |             current = queue.pop(0)
117 |             children.add(current)
118 |             queue.extend(list(self.children(current)))
119 | 
120 |         return children
121 | 
122 |     def disable_outgoing(self, *disable: Vertex):
123 |         """
124 |         Disable the given vertices' outgoing edges
125 |         @param disable: Any number of vertices to disable
126 |         """
127 |         for v in disable:
128 |             self.outgoing_disabled.add(to_label(v))
129 | 
130 |     def disable_incoming(self, *disable: Vertex):
131 |         """
132 |         Disable the given vertices' incoming edges
133 |         @param disable: Any number of vertices to disable
134 |         """
135 |         for v in disable:
136 |             self.incoming_disabled.add(to_label(v))
137 | 
138 |     def reset_disabled(self):
139 |         """
140 |         Clear and reset all the disabled edges, restoring the graph
141 |         """
142 |         self.outgoing_disabled.clear()
143 |         self.incoming_disabled.clear()
144 | 
145 |     def get_topology(self, v: Vertex) -> int:
146 |         """
147 |         Determine the "depth" a given Variable is at in a topological sort of the graph
148 |         @param v: The variable to determine the depth of
149 |         @return: Some non-negative integer representing the depth of this variable
150 |         """
151 |         return self.topology_map[to_label(v)]
152 | 
153 |     def copy(self):
154 |         """
155 |         Public copy method; copies v, e, and the disabled sets
156 |         @return: A copied Graph
157 |         """
158 |         return self.__copy__()
159 | 
160 |     def __copy__(self):
161 |         """
162 |         Copy builtin allowing the Graph to be copied
163 |         @return: A copied Graph
164 |         """
165 |         copied = Graph(self.v.copy(), set(self.e.copy()))
166 |         copied.incoming_disabled = self.incoming_disabled.copy()
167 |         copied.outgoing_disabled = self.outgoing_disabled.copy()
168 |         return copied
169 | 
170 |     def __getitem__(self, v: set):
171 |         """
172 |         Compute a subset V of some Graph G.
173 |         :param v: A set of variables in G.
174 |         :return: A Graph representing the subgraph G[V].
175 |         """
176 |         return Graph({s for s in self.v if s in v}, {s for s in self.e if s[0] in v and s[1] in v})
177 | 
178 |     def descendant_first_sort(self, variables: Collection[Vertex]) -> Sequence[Vertex]:
179 |         """
180 |         A helper function to "sort" a list of Variables/Outcomes/Interventions such that no element has a
181 |         "parent"/"ancestor" to its left
182 |         @param variables: A list of any number of Variable/Outcome/Intervention instances
183 |         @return: A sorted list, such that any instance has no ancestor earlier in the list
184 |         """
185 |         return sorted(variables, key=lambda v: self.get_topology(v))
186 | 
187 |     def topology_sort(self) -> Sequence[str]:
188 | 
189 |         topology = []
190 |         v = self.v.copy()
191 |         e = self.e.copy()
192 | 
193 |         while len(v) > 0:
194 | 
195 |             roots = set(filter(lambda t: not any((s, t) in e for s in v), v))
196 |             assert len(roots) > 0
197 | 
198 |             topology.extend(sorted(list(roots)))
199 |             v -= roots
200 |             e -= set(filter(lambda edge: edge[0] in roots, e))
201 | 
202 |         return topology
203 | 
204 |     def without_incoming_edges(self, x: Collection[Vertex]):
205 | 
206 |         v = self.v.copy()
207 |         e = {(s, t) for (s, t) in self.e if t not in x}
208 | 
209 |         return Graph(v, e)
210 | 
211 |     def without_outgoing_edges(self, x: Collection[Vertex]):
212 | 
213 |         v = self.v.copy()
214 |         e = {(s, t) for (s, t) in self.e if s not in x}
215 | 
216 |         return Graph(v, e)
217 | 
218 | 
219 | def to_label(item: VClass) -> str:
220 |     """
221 |     Convert a variable to its string name, if not already provided as such
222 |     @param item: The item to convert, either a string (done) or some Variable
223 |     @return: A string name of the given item, if not already provided as a string
224 |     """
225 |     return item.strip("'") if isinstance(item, str) else item.name.strip("'")
226 | 


--------------------------------------------------------------------------------
/do/identification/LatentGraph.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from typing import List, Iterable, Set, Tuple
  3 | 
  4 | from ..core.Graph import Graph
  5 | 
  6 | 
  7 | class LatentGraph(Graph):
  8 | 
  9 |     def __init__(self, vertices: Set[str], edges: Set[Tuple[str, str]], e_bidirected: Set[Tuple[str, str]], fixed_topology: List[str] = None):
 10 |         super().__init__(vertices, edges, fixed_topology)
 11 |         self.e_bidirected = e_bidirected.copy()
 12 |         self.V = vertices
 13 |         self.C = self.make_components()
 14 | 
 15 |         # Allows passing a topology down to a subgraph
 16 |         if fixed_topology:
 17 | 
 18 |             # filter any vertices from the given topology that don't exist as vertices in the graph
 19 |             filtered_topology = [x for x in fixed_topology if x in vertices]
 20 | 
 21 |             # ensure topology fully represents the graph
 22 |             assert all(x in vertices for x in filtered_topology), "vertex in the given topology is not in the graph!"
 23 |             assert all(x in filtered_topology for x in vertices), "vertex in the graph is not in the given topology!"
 24 | 
 25 |             self.v_Pi = filtered_topology
 26 | 
 27 |         # Otherwise, generate it
 28 |         else:
 29 |             self.v_Pi = self.__kahns()
 30 | 
 31 |     def __str__(self):
 32 |         return f"Graph: V = {', '.join(self.v)}, E = {', '.join(list(map(str, self.e)))}, E (Bidirected) = {', '.join(list(map(str, self.e_bidirected)))}"
 33 | 
 34 |     def __getitem__(self, v: Set[str]):
 35 |         e = {(s, t) for (s, t) in self.e if s in v and t in v}
 36 |         e_bidirected = {(s, t) for (s, t) in self.e_bidirected if s in v and t in v}
 37 |         return LatentGraph(self.v & v, e, e_bidirected, self.v_Pi)
 38 | 
 39 |     def __eq__(self, other):
 40 |         if not isinstance(other, LatentGraph):
 41 |             return False
 42 | 
 43 |         return self.v == other.v and self.e == other.e and \
 44 |             all([(e[0], e[1]) in other.e_bidirected or (e[1], e[0]) in other.e_bidirected for e in self.e_bidirected]) and \
 45 |             all([(e[0], e[1]) in self.e_bidirected or (e[1], e[0]) in self.e_bidirected for e in other.e_bidirected])
 46 | 
 47 |     def biadjacent(self, v: str):
 48 |         return {e[0] if e[0] != v else e[1] for e in self.e_bidirected if v in e}
 49 | 
 50 |     def ancestors(self, y: Set[str]):
 51 |         ans = y.copy()
 52 |         for v in y:
 53 |             for p in self.parents(v):
 54 |                 ans |= self.ancestors({p})
 55 |         return ans
 56 | 
 57 |     # puts nodes in topological ordering
 58 |     def __kahns(self):
 59 | 
 60 |         edges = self.e.copy()
 61 |         vertices = self.v.copy()
 62 |         v_Pi = []
 63 | 
 64 |         s = vertices - ({e[0] for e in edges} | {e[1] for e in edges})
 65 |         s |= set([e[0] for e in edges if e[0] not in {g[1] for g in edges}])
 66 |         s = list(s)
 67 | 
 68 |         while s:
 69 |             n = s.pop()
 70 |             v_Pi.append(n)
 71 | 
 72 |             ms = {e[1] for e in edges if e[0] == n}
 73 |             for m in ms:
 74 |                 edges.remove((n, m))
 75 |                 if {e for e in edges if e[1] == m} == set():
 76 |                     s.append(m)
 77 | 
 78 |         return v_Pi
 79 | 
 80 |     def make_components(self):
 81 | 
 82 |         ans = []
 83 |         all_v = self.v.copy()
 84 |         visited = set()
 85 | 
 86 |         while all_v:
 87 |             start = all_v.pop()
 88 |             component = []
 89 |             q = [start]
 90 | 
 91 |             while q:
 92 |                 v = q.pop(0)
 93 |                 if v not in visited:
 94 |                     visited.add(v)
 95 |                     component.append(v)
 96 |                     q.extend([vs for vs in self.biadjacent(v) if vs not in visited])
 97 | 
 98 |             if component:
 99 |                 ans.append(set(component))
100 | 
101 |         return ans
102 | 
103 |     def without_incoming(self, x: Iterable[str]):
104 |         # return Graph(self.Edges - {edge for edge in self.Edges if edge[1] in x and edge[2] == "->"}, self.V)
105 |         return LatentGraph(self.v, self.e - {e for e in self.e if e[1] in x}, self.e_bidirected, self.v_Pi)
106 | 
107 |     def collider(self, v1, v2, v3):
108 |         return v1 in self.V and v2 in self.V and v3 in self.V and v1 in self.parents(v2) and v3 in self.children(v2)
109 | 
110 |     def all_paths(self, x: Iterable[str], y: Iterable[str]):
111 | 
112 |         def path_list(s, t):  # returns all paths from X to Y regardless of direction of link (no bd links)
113 | 
114 |             # generate a fake variable to represent unobservable variables
115 |             UNOBSERVABLE = "U"
116 |             while UNOBSERVABLE in self.V:
117 |                 UNOBSERVABLE += "U"
118 | 
119 |             from_s_s = [[s]]
120 |             ans = []
121 |             while from_s_s:
122 |                 from_s = from_s_s.pop(0)
123 | 
124 |                 # Directed links
125 |                 for each in set(self.parents(from_s[-1])) | set(self.children(from_s[-1])):
126 |                     if each == t:
127 |                         path = from_s.copy()
128 |                         path.append(t)
129 |                         ans.append(path)
130 |                     elif each not in from_s:
131 |                         r = from_s.copy()
132 |                         r.append(each)
133 |                         from_s_s.append(r)
134 | 
135 |                 # Bidirected links
136 |                 for each in self.biadjacent(from_s[-1]):
137 |                     if each == t:
138 |                         path = from_s.copy()
139 |                         path.append(UNOBSERVABLE)
140 |                         path.append(t)
141 |                         ans.append(path)
142 |                     elif each not in from_s:
143 |                         r = from_s.copy()
144 |                         r.append(UNOBSERVABLE)
145 |                         r.append(each)
146 |                         from_s_s.append(r)
147 |             return ans
148 | 
149 |         return [path_list(q, w) for q, w in product(x, y)]
150 | 
151 |     def ci(self, x: Set[str], y: Set[str], z: Set[str]):
152 |         paths = self.all_paths(x, y)
153 |         for path_pair in paths:
154 |             for path in path_pair:
155 |                 broke = False
156 |                 for idx, element in enumerate(path):
157 |                     if 0 < idx < len(path) - 1:
158 |                         if self.collider(path[idx - 1], element, path[idx + 1]):
159 |                             return False
160 |                     if element in z:
161 |                         broke = True
162 |                         break
163 |                 if not broke:
164 |                     return False
165 |         return True
166 | 
167 | 
168 | def latent_transform(g: Graph, u: Set[str]):
169 | 
170 |     V = g.v.copy()
171 |     E = set(g.e.copy())
172 |     E_Bidirected = set()
173 | 
174 |     Un = u.copy()
175 | 
176 |     # Collapse unobservable variables, such as U1 -> U2 -> V ==> U1 -> V
177 |     reduction = True
178 |     while reduction:
179 |         reduction = False
180 | 
181 |         remove = set()
182 |         for un in Un:
183 | 
184 |             parents = [edge[0] for edge in E if edge[1] == un]       # Edges : parent -> u
185 |             children = [edge[1] for edge in E if edge[0] == un]      # Edges : u -> child
186 | 
187 |             # All parents are unobservable, all children are observable, at least one parent
188 |             if all(x in u for x in parents) and len(parents) > 0 and all(x not in u for x in children):
189 |                 reduction = True
190 | 
191 |                 # Remove edges from parents to u
192 |                 for parent in parents:
193 |                     E.remove((parent, un))
194 | 
195 |                 # Remove edges from u to children
196 |                 for child in children:
197 |                     E.remove((un, child))
198 | 
199 |                 # Replace with edge from edge parent to each child
200 |                 for cr in product(parents, children):
201 |                     E.add((cr[0], cr[1]))
202 | 
203 |                 # U can be removed entirely from graph
204 |                 remove.add(un)
205 | 
206 |         V -= remove
207 |         Un -= remove
208 | 
209 |     # Convert all remaining unobservable to a list to iterate through
210 |     Un = list(Un)
211 | 
212 |     # Replace each remaining unobservable with bi-directed arcs between its children
213 |     while len(Un) > 0:
214 | 
215 |         # Take one "current" unobservable to remove, and remove it from the graph entirely
216 |         cur = Un.pop()
217 |         V.remove(cur)
218 | 
219 |         assert len([edge for edge in E if edge[1] == cur]) == 0, \
220 |             "Unobservable still had parent left."
221 | 
222 |         # All outgoing edges of this unobservable
223 |         child_edges = {edge for edge in E if edge[0] == cur}
224 |         E -= child_edges
225 | 
226 |         # Replace all edges from this unobservable to its children with bidirected arcs
227 |         child_edges = list(child_edges)
228 |         for i in range(len(child_edges)):
229 |             a, b = child_edges[i], child_edges[(i + 1) % len(child_edges)]
230 |             E_Bidirected.add((a[1], b[1]))
231 | 
232 |     print(V, u)
233 |     return LatentGraph(V, E, E_Bidirected, [x for x in g.topology_sort() if x in V - u])
234 | 


--------------------------------------------------------------------------------
/do/deconfounding/Backdoor.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from typing import Collection, List, Optional
  3 | 
  4 | from ..core.Graph import Graph
  5 | from ..core.Types import Path, Vertex
  6 | from ..core.Exceptions import IntersectingSets
  7 | 
  8 | from ..core.helpers import disjoint, minimal_sets, power_set
  9 | 
 10 | 
 11 | def backdoors(src: Collection[Vertex], dst: Collection[Vertex], graph: Graph, dcf: Optional[Collection[Vertex]] = None) -> Collection[Path]:
 12 |     """
 13 |     Get all possible backdoor paths between some source set of vertices in the internal graph to any vertices in
 14 |     some destination set of vertices. A given (possibly empty) set of deconfounding vertices may serve to block, or
 15 |     even open, some backdoor paths.
 16 |     @param src: The source set of (string) vertices to search for paths from
 17 |     @param dst: The destination set of (string) vertices to search from src towards.
 18 |     @param dcf: An optional set of (string) vertices that may serve as a sufficient deconfounding set to block or open
 19 |         backdoor paths.
 20 |     @return: A list of lists, where each sublist contains a backdoor path, the first and last element being a
 21 |         vertex from src and dst, respectively, with all vertices between representing the path. All elements are
 22 |         string vertices.
 23 |     """
 24 | 
 25 |     src_str = str_map(src)
 26 |     dst_str = str_map(dst)
 27 |     dcf_str = str_map(dcf) if dcf else set()
 28 | 
 29 |     if not disjoint(src_str, dst_str, dcf_str):
 30 |         raise IntersectingSets
 31 | 
 32 |     paths = []
 33 | 
 34 |     # Use the product of src, dst to try each possible pairing
 35 |     for s, t in product(src_str, dst_str):
 36 |         paths += _backdoor_paths_pair(s, t, graph, dcf_str)
 37 | 
 38 |     return paths
 39 | 
 40 | 
 41 | def deconfound(src: Collection[Vertex], dst: Collection[Vertex], graph: Graph) -> Collection[Collection[Vertex]]:
 42 | 
 43 |     src_str = str_map(src)
 44 |     dst_str = str_map(dst)
 45 | 
 46 |     # Can't use anything in src, dst, or any descendant of any vertex in src as a deconfounding/blocking vertex
 47 |     disallowed_vertices = src_str | dst_str | set().union(*[graph.descendants(s) for s in src_str])
 48 | 
 49 |     valid_deconfounding_sets = list()
 50 | 
 51 |     # Candidates deconfounding sets remaining are the power set of all the possible remaining vertices
 52 |     for tentative_dcf in power_set(graph.v - disallowed_vertices):
 53 | 
 54 |         # Tentative, indicating that no specific cross product in this subset has yet yielded any backdoor paths
 55 |         any_backdoor_paths = False
 56 | 
 57 |         # Cross represents one (x in X, y in Y) tuple
 58 |         for s, t in product(src_str, dst_str):
 59 | 
 60 |             # Get any/all backdoor paths for this particular pair of vertices in src,dst with given potential
 61 |             #   deconfounding set
 62 |             if len(_backdoor_paths_pair(s, t, graph, set(tentative_dcf))) > 0:
 63 |                 any_backdoor_paths = True
 64 |                 break
 65 | 
 66 |         # None found in any cross product -> Valid subset
 67 |         if not any_backdoor_paths:
 68 |             valid_deconfounding_sets.append(tentative_dcf)
 69 | 
 70 |     return list(minimal_sets(*valid_deconfounding_sets))
 71 | 
 72 | 
 73 | def all_paths_cumulative(s: str, t: str, path: list, path_list: list, graph: Graph) -> Collection[Path]:
 74 |     """
 75 |     Return a list of lists of all paths from a source to a target, with conditional movement from child to parent,
 76 |     or parent to child.
 77 |     This is a modified version of the graph-traversal algorithm provided by Dr. Eric Neufeld.
 78 |     @param s: A source (string) vertex defined in the graph.
 79 |     @param t: A target (string) destination vertex defined in the graph.
 80 |     @param path: A list representing the current path at any given point in the traversal.
 81 |     @param path_list: A list which will contain lists of paths from s to t.
 82 |     @return: A list of lists of Variables, where each sublist denotes a path from s to t .
 83 |     """
 84 |     if s == t:
 85 |         return path_list + [path + [t]]
 86 |     if s not in path:
 87 |         for child in graph.children(s):
 88 |             path_list = all_paths_cumulative(child, t, path + [s], path_list, graph)
 89 |     return path_list
 90 | 
 91 | 
 92 | def independent(src: Collection[Vertex], dst: Collection[Vertex], dcf: Optional[Collection[Vertex]], graph: Graph) -> bool:
 93 |     """
 94 |     Helper function that makes some do_calculus logic more readable; determine if two sets are independent, given
 95 |     some third set.
 96 |     @param src: A source set (of strings) X, to be independent from Y
 97 |     @param dst: A destination set (of strings) Y, to be independent from X
 98 |     @param dcf: A deconfounding set (of strings) Z, to block paths between X and Y
 99 |     @return: True if there are no backdoor paths and no straight-line paths, False otherwise
100 |     """
101 | 
102 |     src_str = str_map(src)
103 |     dst_str = str_map(dst)
104 |     dcf_str = str_map(dcf) if dcf else set()
105 | 
106 |     # Not independent if there are any unblocked backdoor paths
107 |     if len(backdoors(src_str, dst_str, graph, dcf_str)) > 0:
108 |         return False
109 | 
110 |     # Ensure no straight-line variables from any X -> Y or Y -> X
111 |     for s, t in product(src_str, dst_str):
112 |         if len(all_paths_cumulative(s, t, [], [], graph)) != 0:
113 |             return False        # x -> y
114 |         if len(all_paths_cumulative(t, s, [], [], graph)) != 0:
115 |             return False        # y -> x
116 | 
117 |     # No paths, must be independent
118 |     return True
119 | 
120 | 
121 | def _backdoor_paths_pair(s: Collection[str], t: Collection[str], graph: Graph, dcf: Collection[str]) -> List[Path]:
122 |     """
123 |     Find all backdoor paths between any particular pair of vertices in the loaded graph
124 |     @param s: A source (string) vertex in the graph
125 |     @param t: A destination (string) vertex in the graph
126 |     @param dcf: A set of (string) variables, by which movement through any variable is controlled. This can serve
127 |         as a sufficient "blocking" set, or may open additional backdoor paths
128 |     @return Return a list of lists, where each sublist is a path of string vertices connecting s and t.
129 |         Endpoints s and t are the first and last elements of any sublist.
130 |     """
131 | 
132 |     def get_backdoor_paths(cur: str, path: list, path_list: list, previous="up") -> list:
133 |         """
134 |         Return a list of lists of all paths from a source to a target, with conditional movement of either
135 |             child to parent or parent to child. This may include an edge case that is not a backdoor path, which
136 |             is filtered in the parent function, otherwise all paths will be backdoor paths.
137 |         This is a heavily modified version of the graph-traversal algorithm provided by Dr. Eric Neufeld.
138 |         @param cur: The current (string) vertex we are at in a traversal.
139 |         @param path: The current path from s, our source.
140 |         @param path_list: A list of lists, each sublist being a path discovered so far.
141 |         @param previous: Whether moving from the previous variable to current we moved "up" (child to parent) or
142 |             "down" (from parent to child); this movement restriction is involved in backdoor path detection
143 |         @return: A list of lists, where each sublist is a path from s to t.
144 |         """
145 | 
146 |         # Reached target
147 |         if cur == t:
148 |             return path_list + [path + [t]]
149 | 
150 |         # No infinite loops
151 |         if cur not in path:
152 | 
153 |             if previous == "down":
154 | 
155 |                 # We can ascend on a controlled collider, OR an ancestor of a controlled collider
156 |                 if cur in dcf or any(map(lambda v: v in dcf, graph.descendants(cur))):
157 |                     for parent in graph.parents(cur):
158 |                         path_list = get_backdoor_paths(parent, path + [cur], path_list, "up")
159 | 
160 |                 # We can *continue* to descend on a non-controlled variable
161 |                 if cur not in dcf:
162 |                     for child in graph.children(cur):
163 |                         path_list = get_backdoor_paths(child, path + [cur], path_list, "down")
164 | 
165 |             if previous == "up" and cur not in dcf:
166 | 
167 |                 # We can ascend on a non-controlled variable
168 |                 for parent in graph.parents(cur):
169 |                     path_list = get_backdoor_paths(parent, path + [cur], path_list, "up")
170 | 
171 |                 # We can descend on a non-controlled reverse-collider
172 |                 for child in graph.children(cur):
173 |                     path_list = get_backdoor_paths(child, path + [cur], path_list, "down")
174 | 
175 |         return path_list
176 | 
177 |     # Get all possible backdoor paths
178 |     backdoor_paths = get_backdoor_paths(s, [], [])
179 | 
180 |     # Filter out the paths that don't "enter" x; see the definition of a backdoor path
181 |     return list(filter(lambda l: len(l) > 2 and l[0] in graph.children(l[1]) and l[1] != t, backdoor_paths))
182 | 
183 | 
184 | def str_map(to_filter: Collection[Vertex]):
185 |     return set(map(lambda v: v if isinstance(v, str) else v.name, to_filter))
186 | 


--------------------------------------------------------------------------------
/tests/identification/test_LatentGraph.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | 
  3 | from do.identification.Identification import Identification, simplify_expression
  4 | from do.identification.LatentGraph import LatentGraph
  5 | from do.identification.PExpression import PExpression, TemplateExpression
  6 | 
  7 | 
  8 | def parse_graph_string(graph_string: str) -> LatentGraph:
  9 | 
 10 |     from re import split
 11 | 
 12 |     arrows = ["<->", "<-", "->"]
 13 | 
 14 |     splits = graph_string.strip().split(".")[:-1]
 15 |     print(splits)
 16 | 
 17 |     e = set()
 18 |     v = set()
 19 |     e_b = set()
 20 | 
 21 |     for item in splits:
 22 | 
 23 |         # This would be an item like "X." or "X,Y". in which the vertices exist, but don't have any edges.
 24 |         if not any(arrow in item for arrow in arrows):
 25 |             v.update(item.split(","))
 26 | 
 27 |         parse = split(f'({"|".join(arrows)})', item)
 28 | 
 29 |         for i in range(1, len(parse), 2):
 30 | 
 31 |             # Left and Right are comma-separated lists of values, arrow being the "->" -style arrow joining them.
 32 |             left, arrow, right = parse[i-1].split(","), parse[i], parse[i+1].split(",")
 33 | 
 34 |             # Add all vertices into V
 35 |             v.update(left)
 36 |             v.update(right)
 37 | 
 38 |             for s, t in product(left, right):
 39 | 
 40 |                 if arrow == "<-":
 41 |                     e.add((t, s))
 42 | 
 43 |                 elif arrow == "->":
 44 |                     e.add((s, t))
 45 | 
 46 |                 elif arrow == "<->":
 47 |                     e_b.add((s, t))
 48 | 
 49 |                 else:
 50 |                     print("Invalid Arrow Type:", arrow)
 51 | 
 52 |     return LatentGraph(v, e, e_b)
 53 | 
 54 | 
 55 | #########################################
 56 | # graph 1
 57 | #########################################
 58 | 
 59 | g_1 = LatentGraph({'C', 'S', 'M'}, {('M', 'S'), ('S', 'C'), ('M', 'C')}, set())
 60 | g_1_string = "M->S.S,M->C."
 61 | 
 62 | g1_q1 = ({'C'}, {'S'})
 63 | g1_q2 = ({'C'}, {'M'})
 64 | g1_q3 = ({'C'}, {'S', 'M'})
 65 | 
 66 | g1_a1 = "C | S = <sum {M} [C|M,S] [M] >"
 67 | g1_a2 = "C | M = <sum {S} [C|M,S] [S|M] >"
 68 | g1_a3 = "C | S, M = [C|M,S]"
 69 | 
 70 | g1_queries = [g1_q1, g1_q2, g1_q3]
 71 | g1_answers = [g1_a1, g1_a2, g1_a3]
 72 | 
 73 | 
 74 | 
 75 | #########################################
 76 | # queries - graph 2
 77 | #########################################
 78 | 
 79 | g_2 = LatentGraph({'A', 'B', 'C', 'D'}, {('A', 'B'), ('A', 'C'), ('B', 'D'), ('C', 'D')}, set())
 80 | g_2_string = "A->B,C.B,C->D."
 81 | 
 82 | g2_q1 = ({'D'}, {'A'})
 83 | g2_q2 = ({'D'}, {'B'})
 84 | g2_q3 = ({'D'}, {'C'})
 85 | g2_q4 = ({'D'}, {'B', 'C'})
 86 | 
 87 | g2_a1 = "D | A = <sum {C, B} [D|A,B,C] [C|A] [B|A] >"
 88 | g2_a2 = "D | B = <sum {C, A} [C|A] [A] [D|A,B,C] >"
 89 | g2_a3 = "D | C = <sum {A, B} [D|A,B,C] [B|A] [A] >"
 90 | g2_a4 = "D | C, B = [D|A,B,C]"
 91 | 
 92 | g2_queries = [g2_q1, g2_q2, g2_q3, g2_q4]
 93 | g2_answers = [g2_a1, g2_a2, g2_a3, g2_a4]
 94 | 
 95 | #########################################
 96 | # queries - graph 3
 97 | #########################################
 98 | 
 99 | g_3 = LatentGraph({'B', 'C', 'D'}, {('B', 'D'), ('C', 'D')}, {('B', 'C')})
100 | g_3_string = "B<->C.B,C->D."
101 | 
102 | g3_q1 = ({'D'}, {'B'})
103 | g3_q2 = ({'D'}, {'C'})
104 | g3_q3 = ({'D'}, {'B', 'C'})
105 | 
106 | g3_a1 = "D | B = <sum {C} [C] [D|B,C] >"
107 | g3_a2 = "D | C = <sum {B} [D|B,C] [B] >"
108 | g3_a3 = "D | C, B = [D|B,C]"
109 | 
110 | g3_queries = [g3_q1, g3_q2, g3_q3]
111 | g3_answers = [g3_a1, g3_a2, g3_a3]
112 | 
113 | #########################################
114 | # queries - graph 4
115 | #########################################
116 | 
117 | g_4 = LatentGraph({'S', 'T', 'C'}, {('S', 'T'), ('T', 'C')}, {('S', 'C')})
118 | g_4_string = "S->T->C.S<->C."
119 | 
120 | g4_q1 = ({'C'}, {'S'})
121 | 
122 | g4_a1 = "C | S = <sum {T} [T|S] ><sum {S} [C|S,T] [S] >"
123 | 
124 | g4_queries = [g4_q1]
125 | g4_answers = [g4_a1]
126 | 
127 | #########################################
128 | # queries - graph 5
129 | #########################################
130 | 
131 | g_5 = LatentGraph({"X", "Y", "Z1", "Z2", "Z3"},
132 |     {("Z1", "Z2"), ("X", "Z2"), ("Z2", "Z3"), ("X", "Y"), ("Z2", "Y"), ("Z3", "Y")}, {("X", "Z1"), ("Z1", "Z3")}
133 | )
134 | g_5_string = "X<->Z1<->Z3.X,Z1->Z2->Z3.X,Z2,Z3->Y."
135 | 
136 | g5_q1 = ({"Y"}, {"X"})      # paper: Sum_{X} [P(Z3 | Z2, Z1, X), P(Z1 | X), P(X)]
137 | g5_q2 = ({"Y"}, {"X", "Z1", "Z2", "Z3"})
138 | 
139 | g5_a1 = "Y | X = <sum {Z2, Z1, Z3} [Y|X,Z2,Z3] [Z2|X,Z1] ><sum {X} [Z3|Z1,X,Z2] [Z1|X] [X] >"
140 | g5_a2 = "Y | Z2, Z1, X, Z3 = [Y|X,Z2,Z3]"
141 | 
142 | g5_queries = [g5_q1, g5_q2]
143 | g5_answers = [g5_a1, g5_a2]
144 | 
145 | #########################################
146 | # queries - graph 6
147 | #########################################
148 | 
149 | g_6 = LatentGraph({"X", "Y1", "Y2", "W1", "W2"},
150 |     {("W1", "X"), ("X", "Y1"), ("W2", "Y2")}, {("W1", "W2"), ("W1", "Y1"), ("W2", "X")}
151 | )
152 | g_6_string = "Y1<->W1<->W2<->X.W1->X->Y1.W2->Y2."
153 | 
154 | g6_q1 = ({"Y1", "Y2"}, {"X"})
155 | g6_a1 = "Y2, Y1 | X = <sum {W2} [Y2|W2] > [W2] <sum {W1} [Y1|W1,X] [W1] >"
156 | 
157 | g6_queries = [g6_q1]
158 | g6_answers = [g6_a1]
159 | 
160 | #########################################
161 | # queries - graph 7
162 | #########################################
163 | 
164 | g_7 = LatentGraph({"Z1", "Z2", "W", "X", "Y"},
165 |     {("Z2", "X"), ("X", "W"), ("W", "Y"), ("Z1", "Y")}, {("Z1", "Z2"), ("Z2", "W"), ("Z1", "W")}
166 | )
167 | g_7_string = "Z1<->Z2<->W<->Z1.Z2->X->W->Y<-Z1."
168 | 
169 | g7_q1 = ({"Y"}, {"X"})
170 | g7_a1 = "Y | X = <sum {Z1, W} [Y|Z1,Z2,X,W] ><sum {Z2} [W|Z2,Z1,X] [Z2|Z1] [Z1] >"
171 | 
172 | g7_queries = [g7_q1]
173 | g7_answers = [g7_a1]
174 | 
175 | #########################################
176 | # queries - graph 8
177 | #########################################
178 | 
179 | g_8 = LatentGraph({'S1', 'T1', 'C1', 'S2', 'T2', 'C2', 'C'}, {('S1', 'T1'), ('T1', 'C1'), ('S2', 'T2'), ('T2', 'C2'), ('C2', 'C'), ('C1', 'C')}, {('S1', 'C1'), ('S2', 'C2')})
180 | 
181 | g_8_string = "S1->T1->C1<->S1.S2->T2->C2<->S2.C2->C<-C1."
182 | 
183 | g8_q1 = ({'C1'}, {'S1'})
184 | g8_q2 = ({'C2'}, {'S2'})
185 | g8_q3 = ({'C1', 'C2'}, {'S1', 'S2'})
186 | g8_q4 = ({'C'}, {'S1', 'S2'})
187 | 
188 | g8_a1 = "<sum {T1} [T1|S1] <sum {S1} [C1|S1,T1] [S1] >>"
189 | g8_a2 = "<sum {T2} [T2|S2] <sum {S2} [C2|S2,T2] [S2] >>"
190 | g8_a3 = "<sum {T2} [T2|S2] <sum {T1} [T1|S1] ><sum {S1} [C1|S1,T1] [S1] ><sum {S2} [C2|S2,T2] [S2]>>"
191 | g8_a4 = "<sum {C1, C2} [C|C1,C2] <sum {T1} [T1|S1] ><sum {T2} [T2|S2] <sum {S1} [C1|S1,T1] [S1] ><sum {S2} [C2|S2,T2] [S2] >>"
192 | 
193 | g8_queries = [g8_q1, g8_q2, g8_q3, g8_q4]
194 | g8_answers = [g8_a1, g8_a2, g8_a3, g8_a4]
195 | 
196 | #########################################
197 | 
198 | all_tests = [
199 |     {
200 |         "queries": g1_queries,
201 |         "answers": g1_answers,
202 |         "g": g_1,
203 |         "as_string": g_1_string,
204 |     }, {
205 |         "queries": g2_queries,
206 |         "answers": g2_answers,
207 |         "g": g_2,
208 |         "as_string": g_2_string,
209 |     }, {
210 |         "queries": g3_queries,
211 |         "answers": g3_answers,
212 |         "g": g_3,
213 |         "as_string": g_3_string,
214 |     }, {
215 |         "queries": g4_queries,
216 |         "answers": g4_answers,
217 |         "g": g_4,
218 |         "as_string": g_4_string,
219 |     }, {
220 |         "queries": g5_queries,
221 |         "answers": g5_answers,
222 |         "g": g_5,
223 |         "as_string": g_5_string,
224 |     }, {
225 |         "queries": g6_queries,
226 |         "answers": g6_answers,
227 |         "g": g_6,
228 |         "as_string": g_6_string,
229 |     }, {
230 |         "queries": g7_queries,
231 |         "answers": g7_answers,
232 |         "g": g_7,
233 |         "as_string": g_7_string,
234 |     }, {
235 |         "queries": g8_queries,
236 |         "answers": g8_answers,
237 |         "g": g_8,
238 |         "as_string": g_8_string,
239 |     }
240 | ]
241 | 
242 | 
243 | def test_GraphParse1():
244 |     assert g_1 == parse_graph_string(g_1_string)
245 | 
246 | def test_GraphParse2():
247 |     assert g_2 == parse_graph_string(g_2_string)
248 | 
249 | def test_GraphParse3():
250 |     assert g_3 == parse_graph_string(g_3_string)
251 | 
252 | def test_GraphParse4():
253 |     assert g_4 == parse_graph_string(g_4_string)
254 | 
255 | def test_GraphParse5():
256 |     assert g_5 == parse_graph_string(g_5_string)
257 | 
258 | def test_GraphParse6():
259 |     assert g_6 == parse_graph_string(g_6_string)
260 | 
261 | def test_GraphParse7():
262 |     assert g_7 == parse_graph_string(g_7_string)
263 | 
264 | def test_GraphParse8():
265 |     assert g_8 == parse_graph_string(g_8_string)
266 | 
267 | 
268 | def tests():
269 | 
270 |     for index, problem_set in enumerate(all_tests, start=1):
271 | 
272 |         print("*" * 20, f"Beginning Graph {index}", "*" * 20)
273 | 
274 |         g = problem_set["g"]
275 |         p = PExpression([], [TemplateExpression(x, list(g.parents(x))) for x in g.V])
276 | 
277 |         # Verify Graph-Parsing
278 |         g_str = problem_set["as_string"]
279 | 
280 |         print(f"Graph String: {index}")
281 |         print(g_str)
282 |         parsed = parse_graph_string(g_str)
283 | 
284 |         print("Original:", g)
285 |         print("  Parsed:", parsed)
286 |         assert g == parsed
287 | 
288 |         # Verify ID
289 |         for i, (query, answer) in enumerate(zip(problem_set["queries"], problem_set["answers"]), start=1):
290 | 
291 |             y, x = query
292 | 
293 |             query_str = f"{', '.join(y)} | {', '.join(x)}"
294 |             print(f"Beginning problem ({i}): {query_str}")
295 |             result = Identification(y, x, p, g, True)
296 |             simplify = simplify_expression(result, g)
297 | 
298 |             print("*********** Proof")
299 |             print(result.proof())
300 | 
301 |             print("*********** Proof (Simplified)")
302 |             print(simplify.proof())
303 | 


--------------------------------------------------------------------------------
/do/core/Inference.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | from loguru import logger
  3 | from typing import Collection
  4 | 
  5 | from .Exceptions import ExogenousNonRoot, ProbabilityIndeterminableException
  6 | from .Expression import Expression
  7 | from .Model import Model
  8 | from .Variables import Outcome, Intervention
  9 | 
 10 | from .helpers import within_precision
 11 | 
 12 | 
 13 | def inference(expression: Expression, model: Model):
 14 | 
 15 |     def _compute(head: Collection[Outcome], body: Collection[Intervention], depth=0) -> float:
 16 |         """
 17 |         Compute the probability of some head given some body
 18 |         @param head: A list of some number of Outcome objects
 19 |         @param body: A list of some number of Outcome objects
 20 |         @param depth: Used for horizontal offsets in outputting info
 21 |         @return: A probability between [0.0, 1.0]
 22 |         @raise ProbabilityIndeterminableException if the result cannot be computed for any reason
 23 |         """
 24 | 
 25 |         ###############################################
 26 |         #   Begin with bookkeeping / error-checking   #
 27 |         ###############################################
 28 | 
 29 |         current_expression = Expression(head, body)
 30 |         logger.info(f"query: {current_expression}")
 31 | 
 32 |         # If the calculation for this contains two separate outcomes for a variable (Y = y | Y = ~y), 0
 33 |         if contradictory_outcome_set(head + body):
 34 |             logger.error("two separate outcomes for one variable, P = 0.0")
 35 |             return 0.0
 36 | 
 37 |         ###############################################
 38 |         #             Reverse product rule            #
 39 |         #   P(y, x | ~z) = P(y | x, ~z) * P(x | ~z)   #
 40 |         ###############################################
 41 | 
 42 |         if len(head) > 1:
 43 |             logger.info(f"applying reverse product rule to {current_expression}")
 44 | 
 45 |             result_1 = _compute(head[:-1], [head[-1]] + body, depth+1)
 46 |             result_2 = _compute([head[-1]], body, depth+1)
 47 |             result = result_1 * result_2
 48 | 
 49 |             logger.success(f"{current_expression} = {result}")
 50 |             return result
 51 | 
 52 |         ###############################################
 53 |         #            Attempt direct lookup            #
 54 |         ###############################################
 55 | 
 56 |         if set(model.variable(head[0].name).parents) == set(v.name for v in body):
 57 |             logger.info(f"querying table for: {current_expression}")
 58 |             table = model.table(head[0].name)                           # Get table
 59 |             probability = table.probability_lookup(head[0], body)       # Get specific row
 60 |             logger.success(f"{current_expression} = {probability}")
 61 | 
 62 |             return probability
 63 |         else:
 64 |             logger.info("no direct table found")
 65 | 
 66 |         ##################################################################
 67 |         #   Easy identity rule; P(X | X) = 1, so if LHS ⊆ RHS, P = 1.0   #
 68 |         ##################################################################
 69 | 
 70 |         if set(head).issubset(set(body)):
 71 |             logger.success(f"identity rule: X|X = 1.0, therefore {current_expression} = 1.0")
 72 |             return 1.0
 73 | 
 74 |         #################################################
 75 |         #                  Bayes' Rule                  #
 76 |         #     Detect children of the LHS in the RHS     #
 77 |         #      p(a|Cd) = p(d|aC) * p(a|C) / p(d|C)      #
 78 |         #################################################
 79 | 
 80 |         reachable_from_head = set().union(*[model.graph().descendants(outcome) for outcome in head])
 81 |         descendants_in_rhs = set([var.name for var in body]) & reachable_from_head
 82 | 
 83 |         if descendants_in_rhs:
 84 |             logger.info(f"Children of the LHS in the RHS: {','.join(descendants_in_rhs)}")
 85 |             logger.info("Applying Bayes' rule.")
 86 | 
 87 |             # Not elegant, but simply take one of the children from the body out and recurse
 88 |             child = list(descendants_in_rhs)[0]
 89 |             child = list(filter(lambda x: x.name == child, body))
 90 |             new_body = list(set(body) - set(child))
 91 | 
 92 |             logger.info(f"{Expression(child, head + new_body)} * {Expression(head, new_body)} / {Expression(child, new_body)}")
 93 | 
 94 |             result_1 = _compute(child, head + new_body, depth+1)
 95 |             result_2 = _compute(head, new_body, depth+1)
 96 |             result_3 = _compute(child, new_body, depth+1)
 97 |             if result_3 == 0:       # Avoid dividing by 0! coverage: skip
 98 |                 logger.success(f"{Expression([child], new_body)} = 0, therefore the result is 0.")
 99 |                 return 0
100 | 
101 |             # flip flop flippy flop
102 |             result = result_1 * result_2 / result_3
103 |             logger.success(f"{current_expression} = {result}")
104 |             return result
105 | 
106 |         #######################################################################################################
107 |         #                                  Jeffrey's Rule / Distributive Rule                                 #
108 |         #   P(y | x) = P(y | z, x) * P(z | x) + P(y | ~z, x) * P(~z | x) === sigma_Z P(y | z, x) * P(z | x)   #
109 |         #######################################################################################################
110 | 
111 |         missing_parents = set()
112 |         for outcome in head:
113 |             missing_parents.update(set(model.variable(outcome.name).parents) - set([parent.name for parent in head + body]))
114 | 
115 |         if missing_parents:
116 |             logger.info("Attempting application of Jeffrey's Rule")
117 | 
118 |             for missing_parent in missing_parents:
119 | 
120 |                 # Add one parent back in and recurse
121 |                 parent_outcomes = model.variable(missing_parent).outcomes
122 | 
123 |                 # Consider the missing parent and sum every probability involving it
124 |                 total = 0.0
125 |                 for parent_outcome in parent_outcomes:
126 | 
127 |                     as_outcome = Outcome(missing_parent, parent_outcome)
128 | 
129 |                     logger.info(f"{Expression(head, [as_outcome] + body)}, * {Expression([as_outcome], body)}")
130 | 
131 |                     result_1 = _compute(head, [as_outcome] + body, depth+1)
132 |                     result_2 = _compute([as_outcome], body, depth+1)
133 |                     outcome_result = result_1 * result_2
134 | 
135 |                     total += outcome_result
136 | 
137 |                 logger.success(f"{current_expression} = {total}")
138 |                 return total
139 | 
140 |         ###############################################
141 |         #            Single element on LHS            #
142 |         #               Drop non-parents              #
143 |         ###############################################
144 | 
145 |         if len(head) == 1 and not missing_parents and not descendants_in_rhs:
146 | 
147 |             head_variable = head[0].name
148 |             can_drop = [v for v in body if v.name not in model.variable(head_variable).parents]
149 | 
150 |             if can_drop:
151 |                 logger.info(f"can drop: {[str(item) for item in can_drop]}")
152 |                 result = _compute(head, list(set(body) - set(can_drop)), depth+1)
153 |                 logger.success(f"{current_expression} = {result}")
154 |                 return result
155 | 
156 |         ###############################################
157 |         #               Cannot compute                #
158 |         ###############################################
159 | 
160 |         raise ProbabilityIndeterminableException
161 | 
162 |     head = set(expression.head())
163 |     body = set(expression.body())
164 | 
165 |     for out in head | body:
166 |         assert out.name in model.graph().v, f"Error: Unknown variable {out}"
167 |         assert out.outcome in model.variable(out.name).outcomes, f"Error: Unknown outcome {out.outcome} for {out.name}"
168 |         assert not isinstance(out, Intervention), \
169 |             f"Error: basic inference engine does not handle Interventions ({out.name} is an Intervention)"
170 | 
171 |     return _compute(list(head), list(body))
172 | 
173 | 
174 | def contradictory_outcome_set(outcomes: Collection[Outcome]) -> bool:
175 |     """
176 |     Check whether a list of outcomes contain any contradictory values, such as Y = y and Y = ~y
177 |     @param outcomes: A list of Outcome objects
178 |     @return: True if there is a contradiction/implausibility, False otherwise
179 |     """
180 |     for x, y in product(outcomes, outcomes):
181 |         if x.name == y.name and x.outcome != y.outcome:
182 |             return True
183 |     return False
184 | 
185 | 
186 | def validate(model: Model) -> bool:
187 |     """
188 |     Ensures a model is 'valid' and 'consistent'.
189 |     1. Ensures the is a DAG (contains no cycles)
190 |     2. Ensures all variables denoted as exogenous are roots.
191 |     3. Ensures all distributions are consistent (the sum of probability of each outcome is 1.0)
192 | 
193 |     Returns True on success (indicating a valid model), or raises an appropriate Exception indicating a failure.
194 |     """
195 |     # no cycles
196 |     ...
197 | 
198 |     # exogenous variables are all roots
199 |     exogenous = model._g.v - set(model._v.keys())
200 |     roots = model._g.roots()
201 |     for variable in exogenous:
202 |         if variable not in roots:
203 |             raise ExogenousNonRoot(variable)
204 | 
205 |     # consistent distributions
206 |     for name, variable in model._v.items():
207 |         t = 0
208 |         for outcome in variable.outcomes:
209 |             t += inference(Expression(Outcome(name, outcome)), model)
210 | 
211 |         assert within_precision(t, 1)
212 | 
213 |     # all checks passed -> valid model
214 |     return True
215 | 


--------------------------------------------------------------------------------
/do/identification/Identification.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Set, Tuple, Union
  2 | 
  3 | from .Exceptions import Fail as FAIL
  4 | from .LatentGraph import LatentGraph as Graph
  5 | from .PExpression import PExpression, TemplateExpression
  6 | 
  7 | 
  8 | def Identification(y: Set[str], x: Set[str], p: PExpression, g: Graph, prove: bool = True):
  9 |     """
 10 |     The Identification algorithm presented in Shpitser & Pearl, 2007.
 11 | 
 12 |     Args:
 13 |         y (Set[str]): A set of (outcome) variable names, corresponding to vertices present in graph G.
 14 |         x (Set[str]): A set of (treatment) variable names, corresponding to vertices present in graph G.
 15 |         p (PExpression): A custom data structure representing a distribution as a summation of variables 
 16 |             (which can be empty) and collection of 'tables' (TemplateExpressions) represented as a variable 
 17 |             name "given" some set of prior variables.
 18 |         g (Graph): A LatentGraph which has undergone augmentation to remove any exogenous variables, replacing
 19 |             them with bidirected arcs connecting their children.
 20 |         prove (bool, optional): Controls whether or not an additional process of proof generation should be
 21 |             undertaken when identifying the resulting expression. Defaults to True.
 22 | 
 23 |     Returns:
 24 |         PExpression: A resulting PExpression containing any number of nested PExpressions or (terminal)
 25 |             TemplateExpressions. This is not particularly useful on its own, but instead, can be evaluated 
 26 |             through the main API.
 27 |     """
 28 | 
 29 |     def _identification(_y: Set[str], _x: Set[str], _p: PExpression, _g: Graph, _prove: bool = True, i=0, passdown_proof: Optional[List[Tuple[int, List[str]]]] = None) -> PExpression:
 30 |         
 31 |         def s(a_set):
 32 |             if len(a_set) == 0:
 33 |                 return "Ø"
 34 |             return "{" + ', '.join(a_set) + "}"
 35 | 
 36 |         # The continuation of a proof that is ongoing if this is a recursive ID call, or a 'fresh' new proof sequence otherwise
 37 |         proof_chain = passdown_proof if passdown_proof else []
 38 | 
 39 |         # noinspection PyPep8Naming
 40 |         def An(vertices):
 41 |             return _g.ancestors(vertices)
 42 | 
 43 |         if _prove:
 44 |             proof_chain.append((i, [f"ID Begin: Y = {s(_y)}, X = {s(_x)}"]))
 45 | 
 46 |         # 1
 47 |         if _x == set():
 48 |             if _prove:
 49 |                 proof_chain.append((i, [
 50 |                     "1: if X == Ø, return Σ_{V \\ Y} P(V)",
 51 |                     f"  --> Σ_{s(_g.V - _y)} P({s(_g.V)})",
 52 |                     "",
 53 |                     f"[***** Standard Probability Rules *****]"
 54 |                 ]))
 55 | 
 56 |             return p_operator(_g.V - _y, _p, proof_chain)
 57 | 
 58 |         # 2
 59 |         if _g.V != An(_y):
 60 |             w = _g.V - An(_y)
 61 |             if _prove:
 62 |                 proof_chain.append((i, [
 63 |                     "2: if V != An(Y)",
 64 |                     f"--> {s(_g.V)} != {s(An(_y))}",
 65 |                     "  return ID(y, x ∩ An(y), P(An(Y)), An(Y)_G)",
 66 |                     f"  --> ID({s(_y)}, {s(_x)} ∩ {s(An(_y))}, P({s(An(_y))}), An({s(An(_y))})_G)",
 67 |                     "",
 68 |                     f"  [***** Do-Calculus: Rule 3 *****]",
 69 |                     "  let W = V \\ An(Y)_G",
 70 |                     f"      W = {s(_g.V)} \\ {s(An(_y))}",
 71 |                     f"      W = {s(w)}",
 72 |                     f"  G \\ W = An(Y)_G",
 73 |                     f"  {s(_g.V)} \\ {s(w)} = {s(An(_y))}",
 74 |                     "  P_{x,z} (y | w) = P_{x} (y | w) if (Y ⊥⊥ Z | X, W) _G_X,Z(W)",
 75 |                     f"  let y = y ({s(_y)}), x = x ∩ An(Y) ({s(_x & An(_y))}), z = w ({s(w)})" ", w = Ø",
 76 |                     "  P_{" f"{s((_x & An(_y)) | w)}" "} " f"({s(_y)}) = P_{s(_x & An(_y))} ({s(_y)}) if ({s(_y)} ⊥⊥ {s(w)} | {s(_x & An(_y))}) _G_{s(_x)}",
 77 |                 ]))
 78 | 
 79 |             return _identification(_y, _x & An(_y), p_operator(_g.V - _g[An(_y)].V, _p), _g[An(_y)], _prove, i+1, proof_chain)
 80 | 
 81 | 
 82 |         # 3
 83 |         w = (_g.V - _x) - _g.without_incoming(_x).ancestors(_y)
 84 | 
 85 |         if _prove:
 86 |             proof_chain.append((i, [
 87 |                 "let W = (V \\ X) \\ An(Y)_G_X",
 88 |                 f"--> W = ({s(_g.V)} \\ {s(_x)}) \\ An({s(_y)})_G_{s(_x)}",
 89 |                 f"--> W = {s(_g.V - _x)} \\ {s(_g.without_incoming(_x).ancestors(_y))}",
 90 |                 f"--> W = {s(w)}"
 91 |             ]))
 92 | 
 93 |         if w != set():
 94 |             if _prove:
 95 |                 proof_chain.append((i, [
 96 |                     "3: W != Ø",
 97 |                     "  return ID(y, x ∪ w, P, G)",
 98 |                     f"  --> ID({s(_y)}, {s(_x)} ∪ {s(w)}, P, G)",
 99 |                     "",
100 |                     "  [***** Do-Calculus: Rule 3 *****]",
101 |                     "  P_{x, z} (y | w) = P_{x} if (Y ⊥⊥ Z | X, W)_G_X_Z(W)",
102 |                     "  let y = y, x = x, z = w, w = Ø",
103 |                     "  P_{x} (y | w) = P_{x,z} (y | w) if (Y ⊥⊥ Z | X, W) _G_X,Z(W)",
104 |                     f"  P_{s(_x)} ({s(_y)}) = P_" "{" f"{s(_x)[1:-1]}, {s(w)[1:-1]}" "}" f" ({s(_y)}) if ({s(_y)} ⊥⊥ {s(w)} | {s(_x)})_G_{s(_x)}"
105 |                 ]))
106 | 
107 |             return _identification(_y, _x | w, _p, _g, _prove, i+1, proof_chain)
108 | 
109 |         C_V_minus_X = _g[_g.V - _x].C
110 | 
111 |         # Line 4
112 |         if len(C_V_minus_X) > 1:
113 |             if _prove:
114 |                 proof_chain.append((i, [
115 |                     "4: C(G \\ X) = {S_1, ..., S_k}",
116 |                     f"--> C(G \\ X) = C({s(_g.V)} \\ {s(_x)}) = {', '.join(list(map(s, C_V_minus_X)))}",
117 |                     "  return Σ_{V \\ y ∪ x} Π_i ID(Si, v \\ Si, P, G)",
118 |                     "  --> Σ_{" f"{s(_g.V)} \\ {s(_y)} ∪ {s(_x)}" "} Π [",
119 |                     *[f"      --> ID({s(Si)}, {s(_g.V - Si)}, P, G)" for Si in C_V_minus_X],
120 |                     "  ]",
121 |                     "",
122 |                     "  [***** Proof *****]",
123 |                     "  P_{x} (y) = Σ_{v \\ (y ∪ x)} Π_i P_{v \\ S_i} (S_i)",
124 |                     "  1. [***** Do-Calculus: Rule 3 *****]",
125 |                     "     Π_i P_{v \\ S_i} (S_i) = Π_i P_{A_i} (S_i), where A_i = An(S_i)_G \\ S_i",
126 |                     "     Π [",
127 |                         *[f"       P_{s(_g.V - si)} ({s(si)[1:-1]})" for si in C_V_minus_X],
128 |                     "     ] = Π [",
129 |                         *[f"       P_{s(_g.ancestors(si)-si)} ({s(si)[1:-1]})" for si in C_V_minus_X],
130 |                     "     ]",
131 | 
132 |                     "  2. [***** Chain Rule *****]",
133 |                     "     Π_i P_{A_i} (S_i) = Π_i Π_{V_j ∈ S_i} P_{A_i} (V_j | V_π^(j-1) \\ A_i)",
134 | 
135 |                     "     Π [",
136 |                         *[f"       P_{s(_g.ancestors(si)-si)} ({s(si)[1:-1]})" for si in C_V_minus_X],
137 |                     "     ] = Π [",
138 |                         *[" ".join(["       Π ["]  + [
139 |                             f"P_{s(_g.ancestors(si)-si)} ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]) - _g.ancestors({vj}))})" for vj in si
140 |                         ] + ["]"]) for si in C_V_minus_X],
141 |                     "     ]",
142 | 
143 |                     "  3. [***** Rule 2 or Rule 3 *****]",
144 |                     "     Π_i Π_{V_j ∈ S_i} P_{A_i} (V_j | V_π^(j-1) \\ A_i) = Π_i Π_{V_j ∈ S_i} P(V_j | V_π^(j-1))",
145 |                     "     a. if A ∈ A_i ∩ V_π^(j-1), A can be removed as an intervention by Rule 2",
146 |                     "        All backdoor paths from A_i to V_j with a node not in V_π^(j-1) are d-separated.",
147 |                     "        Paths must also be bidirected arcs only.",
148 |                     "        let x = x, y = y, z = {A}, w = Ø",
149 |                     "        P_{x,z} (y | w) = P_{x} (y | z, w) if (Y ⊥⊥ Z | X, W)_X_Z_",
150 |                     "     b. if A ∈ A_i \\ V_π^(j-1), A can be removed as an intervention by Rule 3",
151 |                     "         let x = x, y = V_j, z = {A}, w = Ø",
152 |                     "         P_{x,z} (y | w) = P_{x} (y | w) if (Y ⊥⊥ Z | X, W)_G_X_Z(W)",
153 |                     "         (V_j ⊥⊥ A | V_π^(j-1)) G_{A_i}",
154 | 
155 |                     "     Π [",
156 |                         *[" ".join(["       Π ["]  + [
157 |                             f"P_{s(_g.ancestors(si)-si)} ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]) - _g.ancestors({vj}))})" for vj in si
158 |                         ] + ["]"]) for si in C_V_minus_X],
159 |                     "     ] = Π [",
160 |                         *[" ".join(["       Π ["]  + [
161 |                             f"P ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]))})" for vj in si
162 |                         ] + ["]"]) for si in C_V_minus_X],
163 |                     "     ]",
164 | 
165 |                     "  4. [***** Grouping *****]",
166 |                     "     Π_i Π_{V_j ∈ S_i} P(V_j | V_π^(j-1)) = Π_i P(V_i | V_π^(i-1))",
167 | 
168 |                     "     Π [",
169 |                         *[" ".join(["       Π ["]  + [
170 |                             f"P ({vj} | {s(set(_g.v_Pi[:_g.v_Pi.index(vj)]))})" for vj in si
171 |                         ] + ["]"]) for si in C_V_minus_X],
172 |                     "     ] = Π [",
173 | 
174 |                     "     ]",
175 | 
176 |                     "  5. [***** Chain Rule *****]",
177 |                     "     Π_i P(V_i | V_π^(i-1)) = P(v)"
178 |                 ]))
179 | 
180 |             return PExpression(_g.V - (_y | _x), [_identification(s_i, _g.V - s_i, _p, _g, _prove, i+1) for s_i in C_V_minus_X], proof_chain)
181 | 
182 |         else:
183 | 
184 |             # At this point we have a single component
185 |             S = C_V_minus_X[0]
186 | 
187 |             if _prove:
188 |                 proof_chain.append((i, [
189 |                     "if C(G \\ X) = {S}",
190 |                     f"--> C({s(_g.V)} \\ {s(_x)}) = {s(S)}"
191 |                 ]))
192 | 
193 |             # Line 5
194 |             if set(S) == _g.V:
195 |                 if _prove:
196 |                     proof_chain.append((i, [
197 |                         "5: if C(G) = {G}: FAIL(G, S)",
198 |                         f"--> G, S form hedges F, F' for Px(Y) -> {_g}, {S} for P_{_x}({_y})"
199 |                     ]))
200 | 
201 |                 raise FAIL(_g, S, proof_chain)
202 | 
203 |             # Line 6 - a single c-component
204 |             if S in _g.C:
205 | 
206 |                 dists = []
207 |                 dist_str = []
208 |                 for vi in S:
209 |                     given = _g.v_Pi[:_g.v_Pi.index(vi)]
210 |                     if _prove:
211 |                         dist_str.append(f"P({vi})" if len(given) == 0 else f"P({vi} | {', '.join(given)})")
212 |                     dists.append(TemplateExpression(vi, given))
213 | 
214 |                 if _prove:
215 |                     proof_chain.append((i, [
216 |                         f"6: S ∈ C(G)",
217 |                         f"--> {s(S)} ∈ {', '.join(list(map(s, _g.C)))}",
218 |                         "  return Σ_{S-Y} π_{Vi ∈ S} P(Vi | V_π^(i-1))",
219 |                         f"  --> Σ_{s(S - _y)} π [{', '.join(dist_str)}]",
220 |                         "",
221 |                         "  [***** Proof *****]",
222 |                         f"  G has been partitioned into S = {s(S)} and X = {s(_x)} in G = {s(_g.V)}.",
223 |                         "  There are no bidirected arcs between S and X."
224 |                     ]))
225 | 
226 |                 return PExpression(S - _y, dists, proof_chain)
227 | 
228 |             # 7
229 |             else:
230 |                 s_prime = next(s for s in _g.C if set(s) > set(S))
231 |                 p = []
232 | 
233 |                 msg = "  --> P = "
234 | 
235 |                 for v in s_prime:
236 |                     rhs0 = _g.v_Pi[:_g.v_Pi.index(v)]
237 |                     rhs1 = rhs0.copy()
238 | 
239 |                     rhs0 = list(set(rhs0) & s_prime)
240 |                     rhs1 = list(set(rhs1) - s_prime)
241 |                     rhs = rhs0 + rhs1
242 |                     p.append(TemplateExpression(v, rhs))
243 |                     if _prove:
244 |                         msg += f"[{v}{(f' | ' + ', '.join(rhs)) if len(rhs) > 0 else ''}]"
245 | 
246 |                 g_s_prime = _g[s_prime]
247 | 
248 |                 if _prove:
249 |                     proof_chain.append((i, [
250 |                         f"7: if ∃(S') S ⊂ S' ∈ C(G)",
251 |                         f"--> let S = {s(S)}, S' = {s(s_prime)}",
252 |                         f"--> {s(S)} ⊂ {s(s_prime)} ∈ {', '.join(list(map(s, _g.C)))}",
253 |                         "  return ID(y, x ∩ S', π_{V_i ∈ S'} P(V_i | V_π^(i-1) ∩ S', V_π^(i-1) \\ S'), S')",
254 |                         msg,
255 |                         f"  --> ID({s(_y)}, {s(_x)} ∩ {s(s_prime)}, P, G = ({g_s_prime.V}, {g_s_prime.e}, {g_s_prime.e_bidirected}))",
256 |                         "",
257 |                         "  [***** Proof *****]",
258 |                         f"  G is partitioned into X = {s(_x)} and S = {s(S)}, where X ⊂ An(S).",
259 |                         "  M_{X \\ S'} induces G \\ (X \\ S') = S'.",
260 |                         "  P_{x} = P_{x ∩ S', X \\ S'} = P_{x ∩ S'}.",
261 |                     ]))
262 | 
263 |                 return _identification(_y, _x & s_prime, PExpression([], p), g_s_prime, _prove, i+1, proof_chain)
264 | 
265 |     return _identification(y, x, p, g, prove)
266 | 
267 | def simplify_expression(original: PExpression, g: Graph, debug=False) -> PExpression:
268 | 
269 |     def _simplify(current,i = 0):
270 | 
271 |         cpt_list_copy = list(filter(lambda i: isinstance(i, TemplateExpression), current.terms))
272 |         for s in current.terms:
273 | 
274 |             if isinstance(s, TemplateExpression):
275 |                 continue
276 | 
277 |             c = _simplify(s, i + 1)
278 | 
279 |             if s.internal_proof:
280 |                 offset = original.internal_proof[-1][0] + 2
281 |             else:
282 |                 offset = 1
283 | 
284 |             s.internal_proof.append((offset, c))
285 | 
286 |         steps = []
287 | 
288 |         # """
289 |         # Remove unnecessary variables from body
290 |         for expression in cpt_list_copy:
291 | 
292 |             while True:
293 |                 removed_one = False
294 |                 x = {expression.head}
295 |                 for variable in expression.given:
296 |                     y = {variable}
297 |                     z = set(expression.given) - y
298 |                     if g.ci(x, y, z):
299 |                         msg1 = f"{', '.join(x)} is independent of {', '.join(y)} given {', '.join(z)}, and can be removed."
300 |                         msg2 = f"p operator removed {variable} from body of {expression}"
301 |                         if debug:
302 |                             print(msg1)
303 |                             print(msg2)
304 |                         steps.append(msg1)
305 |                         expression.given.remove(variable)
306 |                         removed_one = True
307 | 
308 |                 if not removed_one:
309 |                     break
310 |         # """
311 | 
312 |         # Remove unnecessary expressions
313 |         # """
314 |         while True:
315 |             bodies = set().union(*[el.given for el in current.terms if isinstance(el, TemplateExpression)])
316 |             search = filter(lambda el: isinstance(el, TemplateExpression) and el.head in current.sigma, current.terms)
317 |             remove = list(filter(lambda el: el.head not in bodies, search))
318 | 
319 |             if len(remove) == 0:
320 |                 break
321 | 
322 |             for query in remove:
323 |                 current.sigma.remove(query.head)
324 |                 current.terms.remove(query)
325 |                 msg = f"{query.head} can be removed."
326 |                 if debug:
327 |                     print(msg)
328 |                 steps.append(msg)
329 |         # """
330 | 
331 |         while True:
332 |             sumout = [cpt for cpt in current.terms if isinstance(cpt, TemplateExpression) and cpt.head in current.sigma and not any([cpt.head in el.given for el in current.terms if isinstance(el, TemplateExpression)])]
333 |             if not sumout:
334 |                 break
335 |             for cpt in sumout:
336 |                 current.terms.remove(cpt)
337 |                 current.sigma.remove(cpt.head)
338 | 
339 |         if len(steps) > 0:
340 |             tables = ", ".join(f"P({table.head} | {', '.join(table.given)})" if len(table.given) > 0 else f"P({table.head})" for table in cpt_list_copy)
341 |             steps.append(f"After simplification: {tables}")
342 | 
343 |         def distribution_position(item: Union[PExpression, TemplateExpression]):
344 |             if isinstance(item, PExpression):
345 |                 if len(item.sigma) == 0:
346 |                     return len(g.v_Pi)
347 |                 return len(g.v_Pi) + min(0, *list(map(lambda v: g.v_Pi.index(v), item.sigma)))
348 |             else:
349 |                 return g.v_Pi.index(item.head)
350 | 
351 |         # Sort remaining expressions by the topological ordering
352 |         current.terms.sort(key=distribution_position)
353 | 
354 |         if len(steps) > 0:
355 |             steps.insert(0, "[***** Simplification *****]")
356 | 
357 |         return steps
358 | 
359 |     if original.internal_proof:
360 |         depth = original.internal_proof[-1][0] + 1
361 |     else:
362 |         depth = 1
363 | 
364 |     p = original.copy()
365 |     changes = _simplify(p)
366 |     p.internal_proof.append((depth, changes))
367 |     return p
368 | 
369 | 
370 | def p_operator(v: Set[str], p: PExpression, proof: List[Tuple[int, List[str]]] = None):
371 |     return PExpression(list(v.copy() | set(p.sigma)), p.terms.copy(), proof)
372 | 


--------------------------------------------------------------------------------