├── .github
└── workflows
│ └── tests.yml
├── .gitignore
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── docs
├── Makefile
├── conf.py
├── image
│ ├── bootstrap_dag.svg
│ ├── bootstrap_hist.png
│ ├── bootstrap_with_imputation.png
│ ├── bootstrap_with_imputation.svg
│ ├── bottom_up_parce.svg
│ ├── bottom_up_parce2.svg
│ ├── bottom_up_parce_hist.png
│ ├── camuvexmpl.png
│ ├── dag.png
│ ├── datageneration_CAMUV.png
│ ├── draw_graph1.svg
│ ├── draw_graph10.svg
│ ├── draw_graph11.svg
│ ├── draw_graph12.svg
│ ├── draw_graph13.svg
│ ├── draw_graph14.svg
│ ├── draw_graph2.svg
│ ├── draw_graph3.svg
│ ├── draw_graph4.svg
│ ├── draw_graph5.svg
│ ├── draw_graph6.svg
│ ├── draw_graph7.svg
│ ├── draw_graph8.svg
│ ├── draw_graph9.svg
│ ├── extract_ancestors.svg
│ ├── f_correlation1.png
│ ├── f_correlation2.png
│ ├── f_correlation3.png
│ ├── f_correlation4.png
│ ├── high_dim.svg
│ ├── high_dim2.svg
│ ├── lingam1.svg
│ ├── lingam2.svg
│ ├── longitudinal_dag1.svg
│ ├── longitudinal_dag2.svg
│ ├── longitudinal_dag3.svg
│ ├── longitudinal_hist.png
│ ├── longitudinal_scatter1.png
│ ├── longitudinal_scatter2.png
│ ├── multi_camuv1.svg
│ ├── multi_camuv2.svg
│ ├── multi_camuv3.svg
│ ├── multi_rcd_dag1.svg
│ ├── multi_rcd_dag2.svg
│ ├── multi_rcd_dag3.svg
│ ├── multi_rcd_dag4.svg
│ ├── multi_rcd_dag5.svg
│ ├── multiple_dataset_dag1.svg
│ ├── multiple_dataset_dag2.svg
│ ├── multiple_dataset_dag3.svg
│ ├── multiple_dataset_dag4.svg
│ ├── multiple_dataset_dag5.svg
│ ├── multiple_dataset_hist.png
│ ├── nan_reason.png
│ ├── pk_directlingam1.svg
│ ├── pk_directlingam2.svg
│ ├── pk_directlingam3.svg
│ ├── pk_directlingam4.svg
│ ├── pk_parcelingam1.svg
│ ├── pk_parcelingam2.svg
│ ├── pk_parcelingam3.svg
│ ├── pk_parcelingam4.svg
│ ├── rcd_dag1.svg
│ ├── rcd_dag2.svg
│ ├── rcd_hist.png
│ ├── resit_dag1.svg
│ ├── resit_dag2.svg
│ ├── var_dag.svg
│ ├── var_hist.png
│ ├── varma_dag.svg
│ ├── varma_hist.png
│ ├── visualize_nonlinear_causal_effect1.svg
│ └── visualize_nonlinear_causal_effect2.svg
├── index.rst
├── installation.rst
├── make.bat
├── reference
│ ├── bootstrap.rst
│ ├── bottom_up_parce_lingam.rst
│ ├── camuv.rst
│ ├── causal_effect.rst
│ ├── direct_lingam.rst
│ ├── high_dim_direct_lingam.rst
│ ├── ica_lingam.rst
│ ├── index.rst
│ ├── lim.rst
│ ├── lina.rst
│ ├── longitudinal_bootstrap.rst
│ ├── longitudinal_lingam.rst
│ ├── multi_group_camuv.rst
│ ├── multi_group_direct_lingam.rst
│ ├── multi_group_rcd.rst
│ ├── rcd.rst
│ ├── resit.rst
│ ├── tools.rst
│ ├── utils.rst
│ ├── var_bootstrap.rst
│ ├── var_lingam.rst
│ ├── varma_bootstrap.rst
│ └── varma_lingam.rst
├── requirements-doc.txt
└── tutorial
│ ├── bootstrap.rst
│ ├── bootstrap_with_imputation.rst
│ ├── bottom_up_parce.rst
│ ├── camuv.rst
│ ├── causal_effect.rst
│ ├── draw_graph.rst
│ ├── evaluate_model_fit.rst
│ ├── extract_ancestors.rst
│ ├── f_correlation.rst
│ ├── high_dim_direct_lingam.rst
│ ├── index.rst
│ ├── lim.rst
│ ├── lina.rst
│ ├── lingam.rst
│ ├── longitudinal.rst
│ ├── multi_group_camuv.rst
│ ├── multi_group_rcd.rst
│ ├── multiple_dataset.rst
│ ├── pk_bottom_up_parce.rst
│ ├── pk_direct.rst
│ ├── rcd.rst
│ ├── resit.rst
│ ├── total_effect.rst
│ ├── var.rst
│ ├── varma.rst
│ └── visualize_nonlinear_causal_effect.rst
├── examples
├── Bootstrap.ipynb
├── BootstrapWithImputation.ipynb
├── BottomUpParceLiNGAM.ipynb
├── BottomUpParceLiNGAM_f-correlation.ipynb
├── CAMUV.ipynb
├── CAMUV_f-correlation.ipynb
├── CausalBasedSimulator_estimating_functional_relationships.ipynb
├── CausalBasedSimulator_generating_simulated_data.ipynb
├── CausalBasedSimulator_introduction.ipynb
├── CausalDataGenerator_discrete.ipynb
├── CausalDataGenerator_introduction.ipynb
├── CausalEffect(LassoCV).ipynb
├── CausalEffect(LightGBM).ipynb
├── CausalEffect(LogisticRegression).ipynb
├── CausalEffect.ipynb
├── DirectLiNGAM(Kernel).ipynb
├── DirectLiNGAM.ipynb
├── DirectLiNGAM_fast.py
├── DrawGraph.ipynb
├── EvaluateModelFit.ipynb
├── Example_to_handle_groups_of_variables_with_known_causal_orders_btw_the_groups.ipynb
├── Example_to_handle_groups_of_varibles_with_known_causal_orders_btw_the_groups_for_multiple_datasets.ipynb
├── Example_to_perform_causal_discovery_on_data_with_discrete_variables.ipynb
├── HighDimDirectLiNGAM.ipynb
├── LongitudinalLiNGAM.ipynb
├── MultGroupCAMUV.ipynb
├── MultiGroupDirectLiNGAM.ipynb
├── MultiGroupRCD.ipynb
├── MultiGroupRCD_f-correlation.ipynb
├── OutOfSampleCausalTuning.ipynb
├── RCD.ipynb
├── RCD_f-correlation.ipynb
├── RESIT.ipynb
├── TotalEffect.ipynb
├── Use_prior_knowledge_in_BottomUpParceLiNGAM.ipynb
├── Use_prior_knowledge_in_DirectLiNGAM.ipynb
├── UsingSampleData.ipynb
├── VARLiNGAM.ipynb
├── VARMALiNGAM.ipynb
├── VisualizeNonlinearCausalEffect.ipynb
├── data
│ ├── 5vars_longitudinal_t0.csv
│ ├── 5vars_longitudinal_t1.csv
│ ├── 5vars_longitudinal_t2.csv
│ ├── 5vars_var.csv
│ ├── 5vars_varma.csv
│ ├── 6vars.csv
│ ├── 6vars_mid_latent.csv
│ ├── 6vars_top_latent.csv
│ ├── GenerateDatasets.ipynb
│ ├── create_5vars_longitudinal.py
│ ├── create_5vars_var.py
│ ├── create_5vars_varma.py
│ ├── create_6vars.py
│ ├── create_6vars_mid_latent.py
│ ├── create_6vars_top_latent.py
│ ├── sample_data_var_lingam.csv
│ └── sample_data_varma_lingam.csv
├── datageneration_CAMUV.png
├── images
│ ├── example_to_analyze_data_with_discrete_variables.png
│ ├── example_to_analyze_data_with_discrete_variables2.png
│ ├── example_to_analyze_data_with_discrete_variables3.png
│ ├── example_to_analyze_data_with_discrete_variables4.png
│ ├── example_to_analyze_data_with_discrete_variables5.png
│ └── example_to_analyze_data_with_discrete_variables6.png
└── utils.ipynb
├── lingam
├── __init__.py
├── base.py
├── bootstrap.py
├── bottom_up_parce_lingam.py
├── camuv.py
├── causal_based_simulator.py
├── causal_effect.py
├── direct_lingam.py
├── experimental
│ ├── __init__.py
│ ├── cdg.py
│ └── oct.py
├── high_dim_direct_lingam.py
├── hsic.py
├── ica_lingam.py
├── lim.py
├── lina.py
├── longitudinal_lingam.py
├── multi_group_camuv.py
├── multi_group_direct_lingam.py
├── multi_group_rcd.py
├── rcd.py
├── resit.py
├── tools
│ └── __init__.py
├── utils
│ ├── __init__.py
│ ├── _f_correlation.py
│ ├── _rcd.py
│ └── _visualize_nonlinear_causal_effect.py
├── var_lingam.py
└── varma_lingam.py
├── requirements.txt
├── setup.py
└── tests
├── __init__.py
├── test_bootstrap.py
├── test_bottom_up_parce_lingam.py
├── test_camuv.py
├── test_causal_based_simulator.py
├── test_causal_based_simulator
├── test_run-simulated_data.csv
├── test_run2-simulated_data.csv
├── test_run3-simulated_data.csv
├── test_run4-simulated_data.csv
├── test_run5-simulated_data.csv
├── test_run6-simulated_data.csv
├── test_train2_params.json
├── test_train2_resid.json
├── test_train3_params.json
├── test_train3_resid.json
├── test_train4_params.json
├── test_train4_resid.json
├── test_train_params.json
└── test_train_resid.json
├── test_causal_data_generator.py
├── test_causal_effect.py
├── test_direct_lingam.py
├── test_high_dim_direct_lingam.py
├── test_ica_lingam.py
├── test_lim.py
├── test_lim_data.csv
├── test_lina_MIT.py
├── test_lina_data.csv
├── test_longitudinal_lingam.py
├── test_mdlina_data.csv
├── test_multi_group_camuv.py
├── test_multi_group_direct_lingam.py
├── test_multi_group_rcd.py
├── test_rcd.py
├── test_resit.py
├── test_tools.py
├── test_utils.py
├── test_var_lingam.py
├── test_varma_lingam.py
└── test_visualize_nonlinear_causal_effect.py
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: Tests
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 |
8 | runs-on: ubuntu-latest
9 | strategy:
10 | matrix:
11 | python-version: ["3.8", "3.9", "3.10", "3.11"]
12 |
13 | steps:
14 | - uses: actions/checkout@v2
15 | - name: Set up Python ${{ matrix.python-version }}
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: ${{ matrix.python-version }}
19 | - name: Install dependencies
20 | run: |
21 | python -m pip install --upgrade pip
22 | pip install flake8 pytest pytest-cov
23 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
24 | - name: Lint with flake8
25 | run: |
26 | # stop the build if there are Python syntax errors or undefined names
27 | flake8 lingam --count --select=E9,F63,F7,F82 --show-source --statistics
28 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
29 | flake8 lingam --count --exit-zero --ignore=E203,E741,C901 --max-line-length=127 --statistics
30 | - name: Test with pytest
31 | run: |
32 | pytest -v --cov=lingam --cov-report=term-missing
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | # vscode
107 | .vscode/
108 |
109 | # vi
110 | *.swp
111 |
112 | # docker
113 | Dockerfile
114 | .devcontainer/
115 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.11"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/conf.py
17 |
18 | # Optionally build your docs in additional formats such as PDF and ePub
19 | formats: all
20 |
21 | # Install packages from a requirements file
22 | python:
23 | install:
24 | - requirements: docs/requirements-doc.txt
25 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contribution Guide
2 |
3 | We welcome and encourage community contributions to lingam package.
4 |
5 | There are many ways to help lingam:
6 |
7 | * Implement a feature
8 | * Send a patch
9 | * Report a bug
10 | * Fix/Improve documentation
11 | * Write examples and tutorials
12 |
13 | ## Code Style
14 |
15 | We try to closely follow the official Python guidelines detailed in [PEP8](https://www.python.org/dev/peps/pep-0008/). Please read it and follow it.
16 |
17 | In addition, we add the following guidelines:
18 |
19 | * Use underscores to separate words in non class names: n_samples rather than nsamples.
20 | * Use relative imports for references inside lingam package.
21 | * Use the [numpy docstring standard](https://numpydoc.readthedocs.io/en/latest/format.html#numpydoc-docstring-guide) in all your docstrings.
22 |
23 | ## Checking the Format
24 |
25 | Coding style is checked with flake8.
26 |
27 | ``` sh
28 | flake8 lingam --count --ignore=E203,E741,C901 --max-line-length=127 --statistics
29 | ```
30 |
31 | ## Documentation
32 |
33 | When adding a new feature to lingam, you also need to document it in the reference. The documentation source is stored under the docs directory and written in reStructuredText format. The API reference is automatically output from the docstring.
34 |
35 | To build the documentation, you use Sphinx. Run the following commands to install Sphinx and its extensions.
36 |
37 | ``` sh
38 | pip install sphinx
39 | pip install sphinxcontrib-napoleon
40 | pip install sphinx_rtd_theme
41 | ```
42 |
43 | Then you can build the documentation in HTML format locally:
44 |
45 | ``` sh
46 | cd docs
47 | make html
48 | ```
49 |
50 | HTML files are generated under build/html directory. Open index.html with the browser and see if it is rendered as expected.
51 |
52 | ## Unit Tests
53 |
54 | When adding a new feature or fixing a bug, you also need to write sufficient test code. We use pytest as the testing framework and unit tests are stored under the tests directory. We check that the code coverage is 100% when we run pytest.
55 |
56 | You can run all your tests as follows:
57 |
58 | ``` sh
59 | pytest -v --cov=lingam --cov-report=term-missing tests
60 | ```
61 |
62 | ## Creating a Pull Request
63 |
64 | When you are ready to create a pull request, please try to keep the following in mind:
65 |
66 | ### Title
67 |
68 | The title of your pull request should
69 |
70 | * briefly describe and reflect the changes
71 | * wrap any code with backticks
72 | * not end with a period
73 |
74 | ### Description
75 |
76 | The description of your pull request should
77 |
78 | * describe the motivation
79 | * describe the changes
80 | * if still work-in-progress, describe remaining tasks
81 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 T.Ikeuchi, G.Haraoka, M.Ide, W.Kurebayashi, S.Shimizu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = .
8 | BUILDDIR = _build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/image/bootstrap_dag.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/bootstrap_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/bootstrap_hist.png
--------------------------------------------------------------------------------
/docs/image/bootstrap_with_imputation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/bootstrap_with_imputation.png
--------------------------------------------------------------------------------
/docs/image/bootstrap_with_imputation.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
99 |
--------------------------------------------------------------------------------
/docs/image/bottom_up_parce2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/bottom_up_parce_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/bottom_up_parce_hist.png
--------------------------------------------------------------------------------
/docs/image/camuvexmpl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/camuvexmpl.png
--------------------------------------------------------------------------------
/docs/image/dag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/dag.png
--------------------------------------------------------------------------------
/docs/image/datageneration_CAMUV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/datageneration_CAMUV.png
--------------------------------------------------------------------------------
/docs/image/draw_graph1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/draw_graph8.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
47 |
--------------------------------------------------------------------------------
/docs/image/draw_graph9.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
99 |
--------------------------------------------------------------------------------
/docs/image/f_correlation1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/f_correlation1.png
--------------------------------------------------------------------------------
/docs/image/f_correlation2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/f_correlation2.png
--------------------------------------------------------------------------------
/docs/image/f_correlation3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/f_correlation3.png
--------------------------------------------------------------------------------
/docs/image/f_correlation4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/f_correlation4.png
--------------------------------------------------------------------------------
/docs/image/high_dim.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
99 |
--------------------------------------------------------------------------------
/docs/image/high_dim2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
99 |
--------------------------------------------------------------------------------
/docs/image/lingam1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/lingam2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/longitudinal_dag1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
81 |
--------------------------------------------------------------------------------
/docs/image/longitudinal_dag2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
75 |
--------------------------------------------------------------------------------
/docs/image/longitudinal_dag3.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
69 |
--------------------------------------------------------------------------------
/docs/image/longitudinal_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/longitudinal_hist.png
--------------------------------------------------------------------------------
/docs/image/longitudinal_scatter1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/longitudinal_scatter1.png
--------------------------------------------------------------------------------
/docs/image/longitudinal_scatter2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/longitudinal_scatter2.png
--------------------------------------------------------------------------------
/docs/image/multi_camuv1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
84 |
--------------------------------------------------------------------------------
/docs/image/multi_camuv2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
84 |
--------------------------------------------------------------------------------
/docs/image/multi_rcd_dag3.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
98 |
--------------------------------------------------------------------------------
/docs/image/multi_rcd_dag4.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
98 |
--------------------------------------------------------------------------------
/docs/image/multiple_dataset_dag1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/multiple_dataset_dag3.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
80 |
--------------------------------------------------------------------------------
/docs/image/multiple_dataset_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/multiple_dataset_hist.png
--------------------------------------------------------------------------------
/docs/image/nan_reason.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/nan_reason.png
--------------------------------------------------------------------------------
/docs/image/pk_directlingam1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/pk_parcelingam1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/rcd_dag2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
86 |
--------------------------------------------------------------------------------
/docs/image/rcd_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/rcd_hist.png
--------------------------------------------------------------------------------
/docs/image/resit_dag1.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
75 |
--------------------------------------------------------------------------------
/docs/image/resit_dag2.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
69 |
--------------------------------------------------------------------------------
/docs/image/var_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/var_hist.png
--------------------------------------------------------------------------------
/docs/image/varma_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/docs/image/varma_hist.png
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to lingam's documentation!
2 | ==================================
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 | :caption: Contents:
7 |
8 | installation
9 | tutorial/index
10 | reference/index
11 |
12 |
13 | Indices and tables
14 | ==================
15 |
16 | * :ref:`genindex`
17 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | Installation Guide
2 | ==================
3 |
4 | To install lingam package, use `pip` as follows:
5 |
6 | .. code-block:: bash
7 |
8 | $ pip install lingam
9 |
10 | You can also install the development version of lingam package from GitHub:
11 |
12 | .. code-block:: bash
13 |
14 | $ pip install git+https://github.com/cdt15/lingam.git
15 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/reference/bootstrap.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | BootstrapResult
4 | ===============
5 |
6 | .. autoclass:: BootstrapResult
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/bottom_up_parce_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | BottomUpParceLiNGAM
4 | ===================
5 |
6 | .. autoclass:: BottomUpParceLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/camuv.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | CAM-UV
4 | ======
5 |
6 | .. autoclass:: CAMUV
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/causal_effect.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | CausalEffect
4 | ============
5 |
6 | .. autoclass:: CausalEffect
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/direct_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | DirectLiNGAM
4 | =============
5 |
6 | .. autoclass:: DirectLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/high_dim_direct_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | HighDimDirectLiNGAM
4 | ===================
5 |
6 | .. autoclass:: HighDimDirectLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/ica_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | ICA-LiNGAM
4 | =============
5 |
6 | .. autoclass:: ICALiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | API Reference
4 | =============
5 |
6 | .. toctree::
7 | :maxdepth: 2
8 |
9 | ica_lingam
10 | direct_lingam
11 | multi_group_direct_lingam
12 | high_dim_direct_lingam
13 | var_lingam
14 | varma_lingam
15 | longitudinal_lingam
16 | bootstrap
17 | var_bootstrap
18 | varma_bootstrap
19 | longitudinal_bootstrap
20 | bottom_up_parce_lingam
21 | rcd
22 | camuv
23 | multi_group_rcd
24 | multi_group_camuv
25 | lina
26 | resit
27 | lim
28 | causal_effect
29 | utils
30 | tools
31 |
--------------------------------------------------------------------------------
/docs/reference/lim.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | LiM
4 | ======
5 |
6 | .. autoclass:: LiM
7 | :members:
8 | :inherited-members:
--------------------------------------------------------------------------------
/docs/reference/lina.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | LiNA
4 | ====
5 |
6 | .. autoclass:: LiNA
7 | :members:
8 | :inherited-members:
9 |
10 | .. autoclass:: MDLiNA
11 | :members:
12 | :inherited-members:
13 |
--------------------------------------------------------------------------------
/docs/reference/longitudinal_bootstrap.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | LongitudinalBootstrapResult
4 | ===========================
5 |
6 | .. autoclass:: LongitudinalBootstrapResult
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/longitudinal_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | LongitudinalLiNGAM
4 | ==================
5 |
6 | .. autoclass:: LongitudinalLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/multi_group_camuv.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | MultiGroupCAMUV
4 | ===============
5 |
6 | .. autoclass:: MultiGroupCAMUV
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/multi_group_direct_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | MultiGroupDirectLiNGAM
4 | ======================
5 |
6 | .. autoclass:: MultiGroupDirectLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/multi_group_rcd.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | MultiGroupRCD
4 | ======================
5 |
6 | .. autoclass:: MultiGroupRCD
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/rcd.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | RCD
4 | ===
5 |
6 | .. autoclass:: RCD
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/resit.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | RESIT
4 | =====
5 |
6 | .. autoclass:: RESIT
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/tools.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam.tools
2 |
3 | tools
4 | =====
5 |
6 | .. autofunction:: bootstrap_with_imputation
7 |
--------------------------------------------------------------------------------
/docs/reference/utils.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam.utils
2 |
3 | utils
4 | =====
5 |
6 | .. autofunction:: print_causal_directions
7 | .. autofunction:: print_dagc
8 | .. autofunction:: make_prior_knowledge
9 | .. autofunction:: remove_effect
10 | .. autofunction:: make_dot
11 | .. autofunction:: get_sink_variables
12 | .. autofunction:: get_exo_variables
13 | .. autofunction:: find_all_paths
14 | .. autofunction:: predict_adaptive_lasso
15 | .. autofunction:: likelihood_i
16 | .. autofunction:: log_p_super_gaussian
17 | .. autofunction:: variance_i
18 | .. autofunction:: extract_ancestors
19 | .. autofunction:: f_correlation
20 | .. autofunction:: visualize_nonlinear_causal_effect
21 | .. autofunction:: evaluate_model_fit
22 |
--------------------------------------------------------------------------------
/docs/reference/var_bootstrap.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | VARBootstrapResult
4 | ==================
5 |
6 | .. autoclass:: VARBootstrapResult
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/var_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | VAR-LiNGAM
4 | =============
5 |
6 | .. autoclass:: VARLiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/varma_bootstrap.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | VARMABootstrapResult
4 | ====================
5 |
6 | .. autoclass:: VARMABootstrapResult
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/reference/varma_lingam.rst:
--------------------------------------------------------------------------------
1 | .. module:: lingam
2 |
3 | VARMA-LiNGAM
4 | =============
5 |
6 | .. autoclass:: VARMALiNGAM
7 | :members:
8 | :inherited-members:
9 |
--------------------------------------------------------------------------------
/docs/requirements-doc.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | scikit-learn
4 | graphviz
5 | statsmodels
6 | networkx
7 | pandas
8 | pygam
9 | matplotlib
10 | psy
11 | semopy
12 | sphinx
13 | sphinx_rtd_theme
14 |
--------------------------------------------------------------------------------
/docs/tutorial/causal_effect.rst:
--------------------------------------------------------------------------------
1 | Causal Effect on predicted variables
2 | ====================================
3 |
4 | The following demonstrates a method [1]_ that analyzes the prediction mechanisms of constructed predictive models based on causality.
5 | This method estimates causal effects, i.e., intervention effects of features or explanatory variables used in constructed predictive models on the predicted variables.
6 | Users can use estimated causal structures, e.g., by a LiNGAM-type method or known causal structures based on domain knowledge.
7 |
8 | References
9 |
10 | .. [1] P. Blöbaum and S. Shimizu. Estimation of interventional effects of features on prediction.
11 | In Proc. 2017 IEEE International Workshop on Machine Learning for Signal Processing (MLSP2017), pp. 1--6, Tokyo, Japan, 2017.
12 |
13 |
14 | First, we use lingam package:
15 |
16 | .. code-block:: python
17 |
18 | import lingam
19 |
20 | Then, we create a :class:`~lingam.DirectLiNGAM` object and call the :func:`~lingam.DirectLiNGAM.fit` method:
21 |
22 | .. code-block:: python
23 |
24 | model = lingam.DirectLiNGAM()
25 | model.fit(X)
26 |
27 | Next, we create the prediction model. In the following example, linear regression model is created, but it is also possible to create logistic regression model or non-linear regression model.
28 |
29 | .. code-block:: python
30 |
31 | from sklearn.linear_model import LinearRegression
32 |
33 | target = 0
34 | features = [i for i in range(X.shape[1]) if i != target]
35 | reg = LinearRegression()
36 | reg.fit(X.iloc[:, features], X.iloc[:, target])
37 |
38 |
39 | Identification of Feature with Greatest Causal Influence on Prediction
40 | ----------------------------------------------------------------------
41 |
42 | We create a :class:`~lingam.CausalEffect` object and call the :func:`~lingam.CausalEffect.estimate_effects_on_prediction` method.
43 |
44 | .. code-block:: python
45 |
46 | ce = lingam.CausalEffect(model)
47 | effects = ce.estimate_effects_on_prediction(X, target, reg)
48 |
49 | To identify of the feature having the greatest intervention effect on the prediction, we can get the feature that maximizes the value of the obtained list.
50 |
51 | .. code-block:: python
52 |
53 | print(X.columns[np.argmax(effects)])
54 |
55 | .. code-block:: python
56 |
57 | cylinders
58 |
59 | Estimation of Optimal Intervention
60 | ----------------------------------
61 |
62 | To estimate of the intervention such that the expectation of the prediction of the post-intervention observations is equal or close to a specified value, we use :func:`~lingam.CausalEffect.estimate_optimal_intervention` method of :class:`~lingam.CausalEffect`.
63 | In the following example, we estimate the intervention value at variable index 1 so that the predicted value is close to 15.
64 |
65 | .. code-block:: python
66 |
67 | c = ce.estimate_optimal_intervention(X, target, reg, 1, 15)
68 | print(f'Optimal intervention: {c:.3f}')
69 |
70 | .. code-block:: python
71 |
72 | Optimal intervention: 7.871
73 |
74 | Use a known causal model
75 | ------------------------
76 |
77 | When using a known causal model, we can specify the adjacency matrix when we create :class:`~lingam.CausalEffect` object.
78 |
79 | .. code-block:: python
80 |
81 | m = np.array([[0.0, 0.0, 0.0, 3.0, 0.0, 0.0],
82 | [3.0, 0.0, 2.0, 0.0, 0.0, 0.0],
83 | [0.0, 0.0, 0.0, 6.0, 0.0, 0.0],
84 | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
85 | [8.0, 0.0,-1.0, 0.0, 0.0, 0.0],
86 | [4.0, 0.0, 0.0, 0.0, 0.0, 0.0]])
87 |
88 | ce = lingam.CausalEffect(causal_model=m)
89 | effects = ce.estimate_effects_on_prediction(X, target, reg)
90 |
91 | For details, see also:
92 |
93 | * https://github.com/cdt15/lingam/blob/master/examples/CausalEffect.ipynb
94 | * https://github.com/cdt15/lingam/blob/master/examples/CausalEffect(LassoCV).ipynb
95 | * https://github.com/cdt15/lingam/blob/master/examples/CausalEffect(LogisticRegression).ipynb
96 | * https://github.com/cdt15/lingam/blob/master/examples/CausalEffect(LightGBM).ipynb
97 |
--------------------------------------------------------------------------------
/docs/tutorial/extract_ancestors.rst:
--------------------------------------------------------------------------------
1 |
2 | Finding ancestors of each variable
3 | ==================================
4 |
5 | By using ``utils.extract_ancestors``, which implements Algorithm 1 of RCD method [1]_, we can extract the ancestors of variables.
6 | Since RCD allows for the existence of unobserved common causes, we can search for ancestors even when there are unobserved common causes, as in the following example.
7 |
8 | References
9 |
10 | .. [1] T. N. Maeda and S. Shimizu. RCD: Repetitive causal discovery of linear non-Gaussian acyclic models with latent confounders.
11 | In Proc. 23rd International Conference on Artificial Intelligence and Statistics (AISTATS2020), Palermo, Sicily, Italy. PMLR 108:735-745, 2020.
12 |
13 |
14 | Import and settings
15 | -------------------
16 |
17 | .. code-block:: python
18 |
19 | import random
20 |
21 | import numpy as np
22 | import pandas as pd
23 |
24 | from sklearn.utils import check_array
25 | from lingam.utils import make_dot, extract_ancestors
26 |
27 |
28 | Test data
29 | ---------
30 |
31 | .. code-block:: python
32 |
33 | def get_coef():
34 | coef = random.random()
35 | return coef if coef >= 0.5 else coef - 1.0
36 | get_external_effect = lambda n: np.random.normal(0.0, 0.5, n) ** 3
37 |
38 | B = np.array([[ 0.0, 0.0, 0.0, 0.0, 0.0, get_coef(), 0.0],
39 | [ 0.0, 0.0, 0.0, 0.0, 0.0, get_coef(), 0.0],
40 | [get_coef(), get_coef(), 0.0, 0.0, 0.0, 0.0, 0.0],
41 | [ 0.0, 0.0, get_coef(), 0.0, 0.0, 0.0, get_coef()],
42 | [ 0.0, 0.0, get_coef(), 0.0, 0.0, 0.0, get_coef()],
43 | [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
44 | [ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]])
45 |
46 | samples = 500
47 | f0 = get_external_effect(samples)
48 | f1 = get_external_effect(samples)
49 | x0 = f0 * B[0, 5] + get_external_effect(samples)
50 | x1 = f0 * B[1, 5] + get_external_effect(samples)
51 | x2 = x0 * B[2, 0] + x1 * B[2, 1] + get_external_effect(samples)
52 | x3 = x2 * B[3, 2] + f1 * B[3, 6] + get_external_effect(samples)
53 | x4 = x2 * B[4, 2] + f1 * B[4, 6] + get_external_effect(samples)
54 |
55 | # f0 and f1 are latent confounders
56 | X = pd.DataFrame(np.array([x0, x1, x2, x3, x4]).T ,columns=['x0', 'x1', 'x2', 'x3', 'x4'])
57 |
58 | make_dot(B, labels=['x0', 'x1', 'x2', 'x3', 'x4', 'f0', 'f1'])
59 |
60 |
61 | .. image:: ../image/extract_ancestors.svg
62 |
63 |
64 | Extract the ancestors of each observed variable
65 | -----------------------------------------------
66 |
67 | .. code-block:: python
68 |
69 | M = extract_ancestors(X)
70 |
71 | for i in range(X.shape[1]):
72 | if len(M[i]) == 0:
73 | print(f'x{i} has no ancestors.')
74 | else:
75 | print(f'The ancestors of x{i} are ' + ', '.join([f'x{n}' for n in M[i]]))
76 |
77 |
78 | .. parsed-literal::
79 |
80 | x0 has no ancestors.
81 | x1 has no ancestors.
82 | The ancestors of x2 are x0, x1
83 | The ancestors of x3 are x0, x1, x2
84 | The ancestors of x4 are x0, x1, x2
85 |
86 |
87 |
--------------------------------------------------------------------------------
/docs/tutorial/f_correlation.rst:
--------------------------------------------------------------------------------
1 | F-correlation
2 | =============
3 |
4 | By using ``utils.f_correlation``, which implements the F-correlation [1]_, we can compute the nonlinear correlation between two variables.
5 |
6 | References
7 |
8 | .. [1] Bach, Francis R., and Michael I. Jordan. "Kernel independent component analysis." Journal of machine learning research 3.Jul (2002): 1-48.
9 |
10 | Import and settings
11 | -------------------
12 |
13 | .. code-block:: python
14 |
15 | import numpy as np
16 | import matplotlib.pyplot as plt
17 | from lingam.utils import f_correlation
18 |
19 |
20 | Test data
21 | ---------
22 |
23 | .. code-block:: python
24 |
25 | def linear_data(n, r):
26 | a = np.random.randn(n)
27 | e1 = np.random.randn(n)
28 | e2 = np.random.randn(n)
29 | if r < 0:
30 | r = -r
31 | x = -np.sqrt(r)*a - np.sqrt(1-r)*e1
32 | else:
33 | x = np.sqrt(r)*a + np.sqrt(1-r)*e1
34 | y = np.sqrt(r)*a + np.sqrt(1-r)*e2
35 | return x, y
36 |
37 | def x2_data(n):
38 | x = np.random.uniform(-5, 5, n)
39 | e = np.random.randn(n)
40 | y = 0.5 * (x ** 2) + e
41 | return x, y
42 |
43 | def sin_data(n):
44 | e = np.random.randn(n)
45 | x = np.random.uniform(-5, 5, n)
46 | y = 5 * np.sin(x) + e
47 | return x, y
48 |
49 |
50 | Linear correlated data (Uncorrelated)
51 | -------------------------------------
52 |
53 | .. code-block:: python
54 |
55 | x, y = linear_data(1000, 0.1)
56 | corr = np.corrcoef(x, y)[0, 1]
57 | print(f"Pearson's correlation coefficient= {corr:.3f}")
58 |
59 | corr = f_correlation(x, y)
60 | print(f'F-correlation= {corr:.3f}')
61 |
62 | plt.scatter(x, y, alpha=0.5)
63 | plt.show()
64 |
65 | .. parsed-literal::
66 |
67 | Pearson's correlation coefficient= 0.126
68 | F-correlation= 0.120
69 |
70 |
71 | .. image:: ../image/f_correlation1.png
72 |
73 |
74 | Linear correlated data (Strongly correlated)
75 | --------------------------------------------
76 |
77 | .. code-block:: python
78 |
79 | x, y = linear_data(1000, 0.9)
80 | corr = np.corrcoef(x, y)[0, 1]
81 | print(f"Pearson's correlation coefficient= {corr:.3f}")
82 |
83 | corr = f_correlation(x, y)
84 | print(f'F-correlation= {corr:.3f}')
85 |
86 | plt.scatter(x, y, alpha=0.5)
87 | plt.show()
88 |
89 |
90 | .. parsed-literal::
91 |
92 | Pearson's correlation coefficient= 0.907
93 | F-correlation= 0.814
94 |
95 |
96 | .. image:: ../image/f_correlation2.png
97 |
98 |
99 | Non-linear correlated data (Quadratic function)
100 | -----------------------------------------------
101 |
102 | .. code-block:: python
103 |
104 | x, y = x2_data(1000)
105 | corr = np.corrcoef(x, y)[0, 1]
106 | print(f"Pearson's correlation coefficient= {corr:.3f}")
107 |
108 | corr = f_correlation(x, y)
109 | print(f'F-correlation= {corr:.3f}')
110 |
111 | plt.scatter(x, y, alpha=0.5)
112 | plt.show()
113 |
114 |
115 | Pearson's correlation coefficient= 0.037
116 | F-correlation= 0.848
117 |
118 |
119 | .. image:: ../image/f_correlation3.png
120 |
121 |
122 | Non-linear correlated data (Sin function)
123 | -----------------------------------------
124 |
125 | .. code-block:: python
126 |
127 | x, y = sin_data(1000)
128 | corr = np.corrcoef(x, y)[0, 1]
129 | print(f"Pearson's correlation coefficient= {corr:.3f}")
130 |
131 | corr = f_correlation(x, y)
132 | print(f'F-correlation= {corr:.3f}')
133 |
134 | plt.scatter(x, y, alpha=0.5)
135 | plt.show()
136 |
137 |
138 | .. parsed-literal::
139 |
140 | Pearson's correlation coefficient= -0.275
141 | F-correlation= 0.853
142 |
143 |
144 | .. image:: ../image/f_correlation4.png
145 |
146 |
--------------------------------------------------------------------------------
/docs/tutorial/high_dim_direct_lingam.rst:
--------------------------------------------------------------------------------
1 | HighDimDirectLiNGAM
2 | ===================
3 |
4 | Import and settings
5 | -------------------
6 |
7 | In this example, we need to import ``numpy``, ``pandas``, and
8 | ``graphviz`` in addition to ``lingam``.
9 |
10 | .. code:: ipython3
11 |
12 | import numpy as np
13 | import pandas as pd
14 | import graphviz
15 | import lingam
16 | from lingam import HighDimDirectLiNGAM
17 | from lingam.utils import make_dot
18 |
19 | print([np.__version__, pd.__version__, graphviz.__version__, lingam.__version__])
20 |
21 | np.set_printoptions(precision=3, suppress=True)
22 | np.random.seed(100)
23 |
24 |
25 | .. parsed-literal::
26 |
27 | ['1.24.4', '2.0.3', '0.20.1', '1.8.3']
28 |
29 |
30 | Test data
31 | ---------
32 |
33 | We create test data consisting of 6 variables.
34 |
35 | .. code:: ipython3
36 |
37 | m = np.array([
38 | [ 0.000, 0.000, 0.000, 0.895, 0.000, 0.000],
39 | [ 0.565, 0.000, 0.377, 0.000, 0.000, 0.000],
40 | [ 0.000, 0.000, 0.000, 0.895, 0.000, 0.000],
41 | [ 0.000, 0.000, 0.000, 0.000, 0.000, 0.000],
42 | [ 0.991, 0.000, -0.124, 0.000, 0.000, 0.000],
43 | [ 0.895, 0.000, 0.000, 0.000, 0.000, 0.000]
44 | ])
45 |
46 | generate_error = lambda p: np.random.uniform(-p, p, size=1000)
47 |
48 | error_vars = [0.2, 0.2, 0.2, 1.0, 0.2, 0.2]
49 | params = [0.5 * np.sqrt(12 * v) for v in error_vars]
50 | e = np.array([generate_error(p) for p in params])
51 |
52 | X = np.linalg.pinv(np.eye(len(m)) - m) @ e
53 | X = pd.DataFrame(X.T)
54 |
55 | display(make_dot(m))
56 |
57 | X.head()
58 |
59 |
60 |
61 | .. image:: ../image/high_dim.svg
62 |
63 |
64 |
65 |
66 | .. raw:: html
67 |
68 |
69 |
82 |
83 |
84 |
85 | |
86 | 0 |
87 | 1 |
88 | 2 |
89 | 3 |
90 | 4 |
91 | 5 |
92 |
93 |
94 |
95 |
96 | 0 |
97 | -1.245034 |
98 | -2.070303 |
99 | -1.684946 |
100 | -1.466231 |
101 | -0.607202 |
102 | -1.208680 |
103 |
104 |
105 | 1 |
106 | -0.129694 |
107 | -0.453755 |
108 | -0.525306 |
109 | 0.238720 |
110 | -0.770446 |
111 | 0.214530 |
112 |
113 |
114 | 2 |
115 | -0.426608 |
116 | 0.434575 |
117 | -0.070464 |
118 | -0.346001 |
119 | -0.891935 |
120 | 0.060805 |
121 |
122 |
123 | 3 |
124 | -0.058363 |
125 | -0.412667 |
126 | 0.134419 |
127 | -0.661997 |
128 | -0.661361 |
129 | 0.382801 |
130 |
131 |
132 | 4 |
133 | 0.560928 |
134 | 0.631961 |
135 | 1.636429 |
136 | 1.484039 |
137 | 0.757059 |
138 | 0.526978 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 | Causal Discovery
147 | ----------------
148 |
149 | To run causal discovery, we create a ``HighDimDirectLiNGAM`` object and
150 | call the ``fit`` method.
151 |
152 | .. code:: ipython3
153 |
154 | model = HighDimDirectLiNGAM()
155 | model.fit(X)
156 |
157 |
158 |
159 |
160 | .. parsed-literal::
161 |
162 |
163 |
164 |
165 |
166 | Using the ``causal_order_`` properties, we can see the causal ordering
167 | as a result of the causal discovery.
168 |
169 | .. code:: ipython3
170 |
171 | model.causal_order_
172 |
173 |
174 |
175 |
176 | .. parsed-literal::
177 |
178 | [3, 2, 0, 4, 1, 5]
179 |
180 |
181 |
182 | Also, using the ``adjacency_matrix_`` properties, we can see the
183 | adjacency matrix as a result of the causal discovery.
184 |
185 | .. code:: ipython3
186 |
187 | model.adjacency_matrix_
188 |
189 |
190 |
191 |
192 | .. parsed-literal::
193 |
194 | array([[ 0. , 0. , 0. , 0.87 , 0. , 0. ],
195 | [ 0.535, 0. , 0.411, 0. , 0. , 0. ],
196 | [ 0. , 0. , 0. , 0.894, 0. , 0. ],
197 | [ 0. , 0. , 0. , 0. , 0. , 0. ],
198 | [ 0.937, 0. , -0.103, 0. , 0. , 0. ],
199 | [ 0.875, 0. , 0. , 0. , 0. , 0. ]])
200 |
201 |
202 |
203 | We can draw a causal graph by utility funciton.
204 |
205 | .. code:: ipython3
206 |
207 | make_dot(model.adjacency_matrix_)
208 |
209 |
210 |
211 |
212 | .. image:: ../image/high_dim2.svg
213 |
214 |
215 |
--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
1 | Tutorial
2 | ========
3 | In this tutorial, we will show you how to run LiNGAM algorithms and see the results. We will also show you how to run the bootstrap method and check the results.
4 |
5 | The following packages must be installed in order to run this tutorial. And import if necessary:
6 |
7 | * numpy
8 | * pandas
9 | * scikit-learn
10 | * graphviz
11 | * statsmodels
12 |
13 | Contents:
14 |
15 | .. toctree::
16 | :maxdepth: 2
17 |
18 | lingam
19 | bootstrap
20 | pk_direct
21 | multiple_dataset
22 | total_effect
23 | causal_effect
24 | var
25 | varma
26 | longitudinal
27 | bottom_up_parce
28 | pk_bottom_up_parce
29 | rcd
30 | camuv
31 | multi_group_rcd
32 | multi_group_camuv
33 | lina
34 | resit
35 | lim
36 | extract_ancestors
37 | f_correlation
38 | draw_graph
39 | visualize_nonlinear_causal_effect
40 | evaluate_model_fit
41 | bootstrap_with_imputation
42 | high_dim_direct_lingam
43 |
--------------------------------------------------------------------------------
/docs/tutorial/lim.rst:
--------------------------------------------------------------------------------
1 |
2 | LiM
3 | ========
4 |
5 | Model
6 | -------------------
7 | Linear Mixed (LiM) causal discovery algorithm [1]_ extends LiNGAM to handle the mixed data that consists of both continuous and discrete variables.
8 | The estimation is performed by first globally optimizing the log-likelihood function on the joint distribution of data with the acyclicity constraint, and then applying a local combinatorial search to output a causal graph.
9 |
10 | This method is based on the LiM model as shown below,
11 |
12 | i) As for the continuous variable, its value assigned to each of :math:`x_i` is a linear function of its parent variables denoted
13 | by :math:`x_{\mathrm{pa}(i)}` plus a non-Gaussian error term :math:`e_i`, that is,
14 |
15 | .. math::
16 |
17 | x_i = e_i + c_i + \sum_{j \in \mathrm{pa}(i) }{b_{ij} x_j}, \quad e_i \sim Non-Gaussian(\cdot),
18 |
19 | where the error terms :math:`e_i` are continuous random variables with non-Gaussian densities, and the error variables :math:`e_i` are independent of
20 | each other. The coefficients :math:`b_{ij}` and intercepts :math:`c_i` are constants.
21 |
22 | ii) As for the discrete variable, its value equals 1 if the linear function of its parent variables :math:`x_{\mathrm{pa}(i)}` plus a Logistic error
23 | term :math:`e_i` is larger than 0, otherwise, its value equals 0. That is,
24 |
25 | .. math::
26 | :nowrap:
27 |
28 | \begin{eqnarray}
29 | x_i = \begin{cases}
30 | 1, & e_i + c_i + \sum_{j \in \mathrm{pa}(i) }{b_{ij} x_j}>0 \\
31 | 0, & \mathrm{otherwise}
32 | \end{cases}, \quad e_i \sim Logistic(0,1),
33 | \end{eqnarray}
34 |
35 | where the error terms :math:`e_i` follow the Logistic distribution, while the other notations are identical to those in continuous variables.
36 |
37 | This method makes the following assumptions.
38 |
39 | #. Continous variables and binary variables.
40 | #. Linearity
41 | #. Acyclicity
42 | #. No hidden common causes
43 | #. Baselines are the same when predicting one binary variable from the other for every pair of binary variables.
44 |
45 | References
46 |
47 | .. [1] Y. Zeng, S. Shimizu, H. Matsui, F. Sun.
48 | Causal discovery for linear mixed data.
49 | In Proc. First Conference on Causal Learning and Reasoning (CLeaR2022). PMLR 177, pp. 994-1009, 2022.
50 |
51 | Import and settings
52 | -------------------
53 |
54 | In this example, we need to import ``numpy``, and ``random``,
55 | in addition to ``lingam``.
56 |
57 | .. code-block:: python
58 |
59 | import numpy as np
60 | import random
61 | import lingam
62 | import lingam.utils as ut
63 |
64 | print([np.__version__, lingam.__version__])
65 |
66 |
67 | .. parsed-literal::
68 |
69 | ['1.20.3', '1.6.0']
70 |
71 |
72 | Test data
73 | -----------
74 |
75 | First, we generate a causal structure with 2 variables, where one of them is randomly set to be a discrete variable.
76 |
77 | .. code-block:: python
78 |
79 | ut.set_random_seed(1)
80 | n_samples, n_features, n_edges, graph_type, sem_type = 1000, 2, 1, 'ER', 'mixed_random_i_dis'
81 | B_true = ut.simulate_dag(n_features, n_edges, graph_type)
82 | W_true = ut.simulate_parameter(B_true) # row to column
83 |
84 | no_dis = np.random.randint(1, n_features) # number of discrete vars.
85 | print('There are %d discrete variable(s).' % (no_dis))
86 | nodes = [iii for iii in range(n_features)]
87 | dis_var = random.sample(nodes, no_dis) # randomly select no_dis discrete variables
88 | dis_con = np.full((1, n_features), np.inf)
89 | for iii in range(n_features):
90 | if iii in dis_var:
91 | dis_con[0, iii] = 0 # 1:continuous; 0:discrete
92 | else:
93 | dis_con[0, iii] = 1
94 |
95 | X = ut.simulate_linear_mixed_sem(W_true, n_samples, sem_type, dis_con)
96 |
97 | print('The true adjacency matrix is:\n', W_true)
98 |
99 |
100 | .. parsed-literal::
101 |
102 | There are 1 discrete variable(s).
103 | The true adjacency matrix is:
104 | [[0. 0. ]
105 | [1.3082251 0. ]]
106 |
107 |
108 | Causal Discovery for linear mixed data
109 | -----------------------------------------
110 |
111 | To run causal discovery, we create a ``LiM`` object and call the ``fit``
112 | method.
113 |
114 | .. code-block:: python
115 |
116 | model = lingam.LiM()
117 | model.fit(X, dis_con, only_global=True)
118 |
119 |
120 |
121 | .. parsed-literal::
122 |
123 |
124 |
125 |
126 |
127 | Using the ``_adjacency_matrix`` properties, we can see the estimated adjacency matrix between mixed variables.
128 |
129 |
130 |
131 | .. code-block:: python
132 |
133 | print('The estimated adjacency matrix is:\n', model._adjacency_matrix)
134 |
135 |
136 |
137 | .. parsed-literal::
138 |
139 | The estimated adjacency matrix is:
140 | [[ 0. , 0. ],
141 | [-1.09938457, 0. ]]
142 |
--------------------------------------------------------------------------------
/docs/tutorial/total_effect.rst:
--------------------------------------------------------------------------------
1 | Total Effect
2 | ============
3 |
4 | We use lingam package:
5 |
6 | .. code-block:: python
7 |
8 | import lingam
9 |
10 | Then, we create a :class:`~lingam.DirectLiNGAM` object and call the :func:`~lingam.DirectLiNGAM.fit` method:
11 |
12 | .. code-block:: python
13 |
14 | model = lingam.DirectLiNGAM()
15 | model.fit(X)
16 |
17 | To estimate the total effect, we can call :func:`~lingam.DirectLiNGAM.estimate_total_effect` method. The following example estimates the total effect from x3 to x1.
18 |
19 | .. code-block:: python
20 |
21 | te = model.estimate_total_effect(X, 3, 1)
22 | print(f'total effect: {te:.3f}')
23 |
24 | .. code-block:: python
25 |
26 | total effect: 21.002
27 |
28 | For details, see also https://github.com/cdt15/lingam/blob/master/examples/TotalEffect.ipynb
29 |
--------------------------------------------------------------------------------
/docs/tutorial/visualize_nonlinear_causal_effect.rst:
--------------------------------------------------------------------------------
1 | Visualization of nonlinear causal effect
2 | ========================================
3 |
4 | Import and settings
5 | -------------------
6 |
7 | .. code-block:: python
8 |
9 | import numpy as np
10 | import pandas as pd
11 |
12 | from lingam import RESIT
13 | from sklearn.ensemble import RandomForestRegressor
14 |
15 | from lingam.utils import make_dot, visualize_nonlinear_causal_effect
16 | import matplotlib.pyplot as plt
17 |
18 | np.random.seed(0)
19 |
20 | Data generation
21 | ---------------
22 |
23 | .. code-block:: python
24 |
25 | n_samples = 1000
26 |
27 | def N(size):
28 | return np.random.uniform(size=size) - 0.5
29 |
30 | X = np.zeros([n_samples, 5])
31 | X[:, 0] = N(n_samples)
32 | X[:, 1] = 3 * (X[:, 0] + 0.25) * (X[:, 0] - 0.25) + N(n_samples)
33 | X[:, 2] = -0.75 * (X[:, 0] - 1) * (X[:, 0] - 2) + 1.5 * X[:, 1] + N(n_samples)
34 | X[:, 3] = 5 * (X[:, 1] + 0.4) * (X[:, 1] - 0.1) * (X[:, 1] - 0.5) + 1 * np.log(5 * X[:, 2] + 20) + N(n_samples)
35 | X[:, 4] = -0.8 * (X[:, 3] - 1.5) * (X[:, 3] - 3.5) + N(n_samples)
36 | X = pd.DataFrame(X, columns=[f'X{i}' for i in range(5)])
37 |
38 | pd.plotting.scatter_matrix(X, figsize=(8, 8), alpha=0.5)
39 | plt.show()
40 |
41 | .. image:: ../image/visualize_nonlinear_causal_effect1.svg
42 |
43 |
44 | Causal discovery
45 | ----------------
46 |
47 | .. code-block:: python
48 |
49 | regressor = RandomForestRegressor()
50 | model = RESIT(regressor=regressor, alpha=1)
51 | bs_result = model.bootstrap(X, n_sampling=100)
52 |
53 |
54 | Visualization
55 | -------------
56 |
57 | .. code-block:: python
58 |
59 | fig = plt.figure(figsize=(9, 6))
60 |
61 | fig = visualize_nonlinear_causal_effect(X, bs_result, RandomForestRegressor(), "X2", "X3", fig=fig)
62 |
63 | for ax in fig.get_axes():
64 | ax.legend()
65 | ax.grid()
66 |
67 | fig.tight_layout()
68 | fig.show()
69 |
70 | .. image:: ../image/visualize_nonlinear_causal_effect2.svg
71 |
72 |
--------------------------------------------------------------------------------
/examples/DirectLiNGAM_fast.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 |
4 | current_dir = os.path.dirname(os.path.abspath(__file__))
5 | parent_dir = os.path.dirname(current_dir)
6 | sys.path.append(parent_dir)
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import graphviz
11 | import lingam
12 | from lingam.utils import make_dot, get_cuda_version
13 |
14 | def main():
15 | cuda = get_cuda_version()
16 |
17 | if cuda:
18 | print([np.__version__, pd.__version__, graphviz.__version__, lingam.__version__])
19 |
20 | np.set_printoptions(precision=3, suppress=True)
21 | np.random.seed(100)
22 |
23 | # ## Test data
24 | # We create test data consisting of 6 variables.
25 |
26 | x3 = np.random.uniform(size=1000)
27 | x0 = 3.0*x3 + np.random.uniform(size=1000)
28 | x2 = 6.0*x3 + np.random.uniform(size=1000)
29 | x1 = 3.0*x0 + 2.0*x2 + np.random.uniform(size=1000)
30 | x5 = 4.0*x0 + np.random.uniform(size=1000)
31 | x4 = 8.0*x0 - 1.0*x2 + np.random.uniform(size=1000)
32 | X = pd.DataFrame(np.array([x0, x1, x2, x3, x4, x5]).T ,columns=['x0', 'x1', 'x2', 'x3', 'x4', 'x5'])
33 | X.head()
34 |
35 | # ## Causal Discovery
36 | # To run causal discovery, we create a `DirectLiNGAM` object and call the `fit` method.
37 | # We use the pwling_fast measure which uses a CUDA accelerated implementation of pwling
38 |
39 | model = lingam.DirectLiNGAM(measure='pwling')
40 | model.fit(X)
41 |
42 | print(model.causal_order_)
43 | print(model.adjacency_matrix_)
44 |
45 | m = model.adjacency_matrix_
46 |
47 | model = lingam.DirectLiNGAM(measure='pwling_fast')
48 | model.fit(X)
49 |
50 | assert np.allclose(model.adjacency_matrix_, m)
51 |
52 | # ## Independence between error variables
53 | # To check if the LiNGAM assumption is broken, we can get p-values of independence between error variables. The value in the i-th row and j-th column of the obtained matrix shows the p-value of the independence of the error variables $e_i$ and $e_j$.
54 |
55 | p_values = model.get_error_independence_p_values(X)
56 | print(p_values)
57 |
58 | if __name__ == "__main__":
59 | main()
60 |
--------------------------------------------------------------------------------
/examples/data/create_5vars_longitudinal.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | np.random.seed(0)
5 |
6 | n_features = 5
7 | n_samples = 200
8 | n_lags = 1
9 | n_timepoints = 3
10 |
11 | causal_orders = []
12 | B_t_true = np.empty((n_timepoints, n_features, n_features))
13 | B_tau_true = np.empty((n_timepoints, n_lags, n_features, n_features))
14 | X_t = np.empty((n_timepoints, n_samples, n_features))
15 |
16 | # B(0,0)
17 | B_t_true[0] = np.array(
18 | [
19 | [0.0, 0.5, -0.3, 0.0, 0.0],
20 | [0.0, 0.0, -0.3, 0.4, 0.0],
21 | [0.0, 0.0, 0.0, 0.3, 0.0],
22 | [0.0, 0.0, 0.0, 0.0, 0.0],
23 | [0.1, -0.7, 0.0, 0.0, 0.0],
24 | ]
25 | )
26 | causal_orders.append([3, 2, 1, 0, 4])
27 |
28 | # B(1,1)
29 | B_t_true[1] = np.array(
30 | [
31 | [0.0, 0.2, -0.1, 0.0, -0.5],
32 | [0.0, 0.0, 0.0, 0.4, 0.0],
33 | [0.0, 0.3, 0.0, 0.0, 0.0],
34 | [0.0, 0.0, 0.0, 0.0, 0.0],
35 | [0.0, -0.4, 0.0, 0.0, 0.0],
36 | ]
37 | )
38 | causal_orders.append([3, 1, 2, 4, 0])
39 |
40 | # B(2,2)
41 | B_t_true[2] = np.array(
42 | [
43 | [0.0, 0.0, 0.0, 0.0, 0.0],
44 | [0.0, 0.0, -0.7, 0.0, 0.5],
45 | [0.2, 0.0, 0.0, 0.0, 0.0],
46 | [0.0, 0.0, -0.4, 0.0, 0.0],
47 | [0.3, 0.0, 0.0, 0.0, 0.0],
48 | ]
49 | )
50 | causal_orders.append([0, 2, 4, 3, 1])
51 |
52 | # create B(t,t-τ) and X
53 | for t in range(n_timepoints):
54 | # external influence
55 | expon = 0.1
56 | ext = np.empty((n_features, n_samples))
57 | for i in range(n_features):
58 | ext[i, :] = np.random.normal(size=(1, n_samples))
59 | ext[i, :] = np.multiply(np.sign(ext[i, :]), abs(ext[i, :]) ** expon)
60 | ext[i, :] = ext[i, :] - np.mean(ext[i, :])
61 | ext[i, :] = ext[i, :] / np.std(ext[i, :])
62 |
63 | # create B(t,t-τ)
64 | for tau in range(n_lags):
65 | value = np.random.uniform(low=0.01, high=0.5, size=(n_features, n_features))
66 | sign = np.random.choice([-1, 1], size=(n_features, n_features))
67 | B_tau_true[t, tau] = np.multiply(value, sign)
68 |
69 | # create X(t)
70 | X = np.zeros((n_features, n_samples))
71 | for co in causal_orders[t]:
72 | X[co] = np.dot(B_t_true[t][co, :], X) + ext[co]
73 | if t > 0:
74 | for tau in range(n_lags):
75 | X[co] = X[co] + np.dot(B_tau_true[t, tau][co, :], X_t[t - (tau + 1)].T)
76 |
77 | X_t[t] = X.T
78 |
79 | for t in range(n_timepoints):
80 | df = pd.DataFrame(X_t[t], columns=["x0", "x1", "x2", "x3", "x4"])
81 | df.to_csv(f"5vars_longitudinal_t{t}.csv", index=False)
82 |
--------------------------------------------------------------------------------
/examples/data/create_5vars_var.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | n = 5
5 | T = 1000
6 | random_state = None
7 |
8 | np.random.seed(random_state)
9 |
10 | T_spurious = 20
11 | expon = 1.5
12 |
13 | value = np.random.uniform(low=0.05, high=0.5, size=(n, n))
14 | sign = np.random.choice([-1, 1], size=(n, n))
15 | B0 = np.multiply(value, sign)
16 | B0 = np.multiply(B0, np.random.binomial(1, 0.4, size=(n, n)))
17 | B0 = np.tril(B0, k=-1)
18 |
19 | value = np.random.uniform(low=0.05, high=0.5, size=(n, n))
20 | sign = np.random.choice([-1, 1], size=(n, n))
21 | B1 = np.multiply(value, sign)
22 | B1 = np.multiply(B1, np.random.binomial(1, 0.4, size=(n, n)))
23 | M1 = np.dot(np.linalg.inv(np.eye(n) - B0), B1)
24 |
25 | ee = np.empty((n, T + T_spurious))
26 | for i in range(n):
27 | ee[i, :] = np.random.normal(size=(1, T + T_spurious))
28 | ee[i, :] = np.multiply(np.sign(ee[i, :]), abs(ee[i, :]) ** expon)
29 | ee[i, :] = ee[i, :] - np.mean(ee[i, :])
30 | ee[i, :] = ee[i, :] / np.std(ee[i, :])
31 |
32 | std_e = np.random.uniform(size=(n,)) + 0.5
33 | nn = np.dot(np.dot(np.linalg.inv(np.eye(n) - B0), np.diag(std_e)), ee)
34 |
35 | xx = np.zeros((n, T + T_spurious))
36 | xx[:, 0] = np.random.normal(size=(n,))
37 |
38 | for t in range(1, T + T_spurious):
39 | xx[:, t] = np.dot(M1, xx[:, t - 1]) + nn[:, t]
40 |
41 | data = xx[:, T_spurious + 1 : T_spurious + T]
42 |
43 |
44 | df = pd.DataFrame(data.T, columns=["x0", "x1", "x2", "x3", "x4"])
45 |
46 | df.to_csv("5vars_var.csv", index=False)
47 |
--------------------------------------------------------------------------------
/examples/data/create_5vars_varma.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 |
5 | def randnetbalanced(dims, samples, indegree, parminmax, errminmax):
6 |
7 | # First, generate errstd
8 | errstd = np.random.uniform(
9 | low=errminmax["min"], high=errminmax["max"], size=(dims, 1)
10 | )
11 |
12 | # Initializations
13 | X = np.empty(shape=[dims, samples])
14 | B = np.zeros([dims, dims])
15 |
16 | # Go trough each node in turn
17 | for i in range(dims):
18 |
19 | # If indegree is finite, randomly pick that many parents,
20 | # else, all previous variables are parents
21 | if indegree == float("inf"):
22 | if i <= indegree:
23 | par = np.arange(i)
24 | else:
25 | par = np.random.permutation(i)[:indegree]
26 | else:
27 | par = np.arange(i)
28 |
29 | if len(par) == 0:
30 | # if node has no parents
31 | # Increase errstd to get it to roughly same variance
32 | parent_std = np.random.uniform(low=parminmax["min"], high=parminmax["max"])
33 | errstd[i] = np.sqrt(errstd[i] ** 2 + parent_std ** 2)
34 |
35 | # Set data matrix to empty
36 | X[i] = np.zeros(samples)
37 | else:
38 | # If node has parents, do the following
39 | w = np.random.normal(size=[1, len(par)])
40 |
41 | # Randomly pick weights
42 | wfull = np.zeros([1, i])
43 | wfull[0, par] = w
44 |
45 | # Calculate contribution of parents
46 | X[i] = np.dot(wfull, X[:i, :])
47 |
48 | # Randomly select a 'parents std'
49 | parstd = np.random.uniform(low=parminmax["min"], high=parminmax["max"])
50 |
51 | # Scale w so that the combination of parents has 'parstd' std
52 | scaling = parstd / np.sqrt(np.mean(X[i] ** 2))
53 | w = w * scaling
54 |
55 | # Recalculate contribution of parents
56 | wfull = np.zeros([1, i])
57 | wfull[0, par] = w
58 | X[i] = np.dot(wfull, X[:i, :])
59 |
60 | # Fill in B
61 | B[i, par] = w
62 |
63 | # Update data matrix
64 | X[i] = X[i] + np.random.normal(size=samples) * errstd[i]
65 |
66 | return B, errstd
67 |
68 |
69 | n = 5
70 | T = 500
71 | head = 100
72 | T = T + head
73 |
74 | # psi0
75 | indegree = float("inf")
76 | psi0, _ = randnetbalanced(
77 | n, n, indegree, {"min": 0.05, "max": 0.5}, {"min": 0.05, "max": 0.5}
78 | )
79 | permutation = np.random.permutation(n)
80 | psi0 = psi0[permutation][:, permutation]
81 |
82 | # causal order
83 | causal_order = np.empty(len(permutation))
84 | causal_order[permutation] = np.arange(len(permutation))
85 | causal_order = causal_order.astype(int)
86 |
87 | # phi1
88 | value = np.random.uniform(low=0.01, high=0.5, size=(n, n))
89 | sign = np.random.choice([-1, 1], size=(n, n))
90 | phi1 = np.multiply(value, sign)
91 |
92 | # theta1
93 | value = np.random.uniform(low=0.01, high=0.5, size=(n, n))
94 | sign = np.random.choice([-1, 1], size=(n, n))
95 | theta1 = np.multiply(value, sign)
96 |
97 | # psi1, omega1
98 | psi1 = np.dot(np.eye(n) - psi0, phi1)
99 | omega1 = np.dot(np.eye(n) - psi0, theta1, np.linalg.inv(np.eye(n) - psi0))
100 |
101 | # external influence
102 | expon = 0.1
103 | ext = np.empty((n, T))
104 | for i in range(n):
105 | ext[i, :] = np.random.normal(size=(1, T))
106 | ext[i, :] = np.multiply(np.sign(ext[i, :]), abs(ext[i, :]) ** expon)
107 | ext[i, :] = ext[i, :] - np.mean(ext[i, :])
108 | ext[i, :] = ext[i, :] / np.std(ext[i, :])
109 |
110 | # observed signals y
111 | y = np.zeros((n, T))
112 | y[:, 0] = np.random.normal(loc=0.1, scale=1, size=(n,)) * np.random.choice(
113 | [-1, 1], size=(n,)
114 | )
115 | for t in range(1, T):
116 | for i in causal_order:
117 | y[i, t] = (
118 | np.dot(psi0[i, :], y[:, t])
119 | + np.dot(psi1[i, :], y[:, t - 1])
120 | + ext[i, t]
121 | + np.dot(omega1[i, :], ext[:, t - 1])
122 | )
123 |
124 | df = pd.DataFrame(y[:, head:].T, columns=["x0", "x1", "x2", "x3", "x4"])
125 |
126 | df.to_csv("5vars_varma.csv", index=False)
127 |
--------------------------------------------------------------------------------
/examples/data/create_6vars.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | N_SAMPLES = 1000
5 |
6 | np.random.seed(23)
7 |
8 | x3 = np.random.uniform(size=N_SAMPLES)
9 | x0 = 3.0 * x3 + np.random.uniform(size=N_SAMPLES)
10 | x2 = 6.0 * x3 + np.random.uniform(size=N_SAMPLES)
11 | x1 = 3.0 * x0 + 2.0 * x2 + np.random.uniform(size=N_SAMPLES)
12 | x5 = 4.0 * x0 + np.random.uniform(size=N_SAMPLES)
13 | x4 = 8.0 * x0 - 1.0 * x2 + np.random.uniform(size=N_SAMPLES)
14 | df = pd.DataFrame(
15 | np.array([x0, x1, x2, x3, x4, x5]).T, columns=["x0", "x1", "x2", "x3", "x4", "x5"]
16 | )
17 | df.to_csv("6vars.csv", index=False)
18 |
--------------------------------------------------------------------------------
/examples/data/create_6vars_mid_latent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | N_SAMPLES = 300
5 |
6 | np.random.seed(0)
7 |
8 | get_external_effect = lambda n: np.random.normal(0.0, 0.5, n) ** 3
9 |
10 | x5 = get_external_effect(N_SAMPLES)
11 | x6 = get_external_effect(N_SAMPLES)
12 | x1 = 0.6 * x5 + get_external_effect(N_SAMPLES)
13 | x3 = 0.5 * x5 + get_external_effect(N_SAMPLES)
14 | x0 = 1.0 * x1 + 1.0 * x3 + get_external_effect(N_SAMPLES)
15 | x2 = 0.8 * x0 - 0.6 * x6 + get_external_effect(N_SAMPLES)
16 | x4 = 1.0 * x0 - 0.5 * x6 + get_external_effect(N_SAMPLES)
17 |
18 | # The latent variable x6 is not included.
19 | df = pd.DataFrame(
20 | np.array([x0, x1, x2, x3, x4, x5]).T, columns=["x0", "x1", "x2", "x3", "x4", "x5"]
21 | )
22 |
23 | df.to_csv("6vars_mid_latent.csv", index=False)
24 |
--------------------------------------------------------------------------------
/examples/data/create_6vars_top_latent.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | N_SAMPLES = 1000
5 |
6 | np.random.seed(1000)
7 |
8 | x6 = np.random.uniform(size=N_SAMPLES)
9 | x3 = 2.0 * x6 + np.random.uniform(size=N_SAMPLES)
10 | x0 = 0.5 * x3 + np.random.uniform(size=N_SAMPLES)
11 | x2 = 2.0 * x6 + np.random.uniform(size=N_SAMPLES)
12 | x1 = 0.5 * x0 + 0.5 * x2 + np.random.uniform(size=N_SAMPLES)
13 | x5 = 0.5 * x0 + np.random.uniform(size=N_SAMPLES)
14 | x4 = 0.5 * x0 - 0.5 * x2 + np.random.uniform(size=N_SAMPLES)
15 |
16 | # The latent variable x6 is not included.
17 | df = pd.DataFrame(
18 | np.array([x0, x1, x2, x3, x4, x5]).T, columns=["x0", "x1", "x2", "x3", "x4", "x5"]
19 | )
20 |
21 | df.to_csv("6vars_top_latent.csv", index=False)
22 |
--------------------------------------------------------------------------------
/examples/datageneration_CAMUV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/datageneration_CAMUV.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables2.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables3.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables4.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables5.png
--------------------------------------------------------------------------------
/examples/images/example_to_analyze_data_with_discrete_variables6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/examples/images/example_to_analyze_data_with_discrete_variables6.png
--------------------------------------------------------------------------------
/lingam/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The lingam module includes implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | from .bootstrap import BootstrapResult
7 | from .bottom_up_parce_lingam import BottomUpParceLiNGAM
8 | from .camuv import CAMUV
9 | from .causal_based_simulator import CausalBasedSimulator
10 | from .causal_effect import CausalEffect
11 | from .direct_lingam import DirectLiNGAM
12 | from .ica_lingam import ICALiNGAM
13 | from .lim import LiM
14 | from .longitudinal_lingam import LongitudinalBootstrapResult, LongitudinalLiNGAM
15 | from .multi_group_direct_lingam import MultiGroupDirectLiNGAM
16 | from .multi_group_rcd import MultiGroupRCD
17 | from .rcd import RCD
18 | from .resit import RESIT
19 | from .var_lingam import VARBootstrapResult, VARLiNGAM
20 | from .varma_lingam import VARMABootstrapResult, VARMALiNGAM
21 | from .lina import LiNA
22 | from .lina import MDLiNA
23 | from .high_dim_direct_lingam import HighDimDirectLiNGAM
24 | from .multi_group_camuv import MultiGroupCAMUV
25 |
26 | __all__ = [
27 | "ICALiNGAM",
28 | "DirectLiNGAM",
29 | "BootstrapResult",
30 | "MultiGroupDirectLiNGAM",
31 | "CausalEffect",
32 | "VARLiNGAM",
33 | "VARMALiNGAM",
34 | "LongitudinalLiNGAM",
35 | "VARBootstrapResult",
36 | "VARMABootstrapResult",
37 | "LongitudinalBootstrapResult",
38 | "BottomUpParceLiNGAM",
39 | "RCD",
40 | "CAMUV",
41 | "RESIT",
42 | "LiM",
43 | "CausalBasedSimulator",
44 | "MultiGroupRCD",
45 | "LiNA",
46 | "MDLiNA",
47 | "HighDimDirectLiNGAM",
48 | "MultiGroupCAMUV",
49 | ]
50 |
51 | __version__ = "1.10.0"
52 |
--------------------------------------------------------------------------------
/lingam/experimental/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | from .oct import OutOfSampleCausalTuning
7 | from .cdg import CausalDataGenerator
8 |
9 | __all__ = [
10 | "OutOfSampleCausalTuning",
11 | "CausalDataGenerator",
12 | ]
13 |
--------------------------------------------------------------------------------
/lingam/hsic.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | import numpy as np
7 | from scipy.stats import gamma
8 | from statsmodels.nonparametric import bandwidths
9 |
10 | __all__ = ["get_kernel_width", "get_gram_matrix", "hsic_teststat", "hsic_test_gamma"]
11 |
12 | # Modify hsic with reference to causal-learn:
13 | # causallearn/search/FCMBased/lingam/hsic.py
14 |
15 |
16 | def get_kernel_width(X):
17 | """Calculate the bandwidth to median distance between points.
18 | Use at most 100 points (since median is only a heuristic,
19 | and 100 points is sufficient for a robust estimate).
20 |
21 | Parameters
22 | ----------
23 | X : array-like, shape (n_samples, n_features)
24 | Training data, where ``n_samples`` is the number of samples
25 | and ``n_features`` is the number of features.
26 |
27 | Returns
28 | -------
29 | float
30 | The bandwidth parameter.
31 | """
32 | n_samples = X.shape[0]
33 | if n_samples > 100:
34 | X_med = X[:100, :]
35 | n_samples = 100
36 | else:
37 | X_med = X
38 |
39 | G = np.sum(X_med * X_med, 1).reshape(n_samples, 1)
40 | dists = G + G.T - 2 * np.dot(X_med, X_med.T)
41 | dists = dists - np.tril(dists)
42 | dists = dists.reshape(n_samples**2, 1)
43 |
44 | return np.sqrt(0.5 * np.median(dists[dists > 0]))
45 |
46 |
47 | def _rbf_dot(X, width):
48 | """rbf dot, in special case with X dot X"""
49 | G = np.sum(X * X, axis=1)
50 | H = G[None, :] + G[:, None] - 2 * np.dot(X, X.T)
51 | return np.exp(-H / 2 / (width**2))
52 |
53 |
54 | def get_gram_matrix(X, width):
55 | """Get the centered gram matrices.
56 |
57 | Parameters
58 | ----------
59 | X : array-like, shape (n_samples, n_features)
60 | Training data, where ``n_samples`` is the number of samples
61 | and ``n_features`` is the number of features.
62 |
63 | width : float
64 | The bandwidth parameter.
65 |
66 | Returns
67 | -------
68 | K, Kc : array
69 | the centered gram matrices.
70 | """
71 | n = X.shape[0]
72 |
73 | K = _rbf_dot(X, width)
74 | K_colsums = K.sum(axis=0)
75 | K_rowsums = K.sum(axis=1)
76 | K_allsum = K_rowsums.sum()
77 | Kc = K - (K_colsums[None, :] + K_rowsums[:, None]) / n + (K_allsum / n**2)
78 | return K, Kc
79 |
80 |
81 | def hsic_teststat(Kc, Lc, n):
82 | """get the HSIC statistic.
83 |
84 | Parameters
85 | ----------
86 | K, Kc : array
87 | the centered gram matrices.
88 |
89 | n : float
90 | the number of samples.
91 |
92 | Returns
93 | -------
94 | float
95 | the HSIC statistic.
96 | """
97 | # test statistic m*HSICb under H1
98 | return 1 / n * np.sum(Kc.T * Lc)
99 |
100 |
101 | def hsic_test_gamma(X, Y, bw_method="mdbs"):
102 | """get the HSIC statistic.
103 |
104 | Parameters
105 | ----------
106 | X, Y : array-like, shape (n_samples, n_features)
107 | Training data, where ``n_samples`` is the number of samples
108 | and ``n_features`` is the number of features.
109 |
110 | bw_method : str, optional (default=``mdbs``)
111 | The method used to calculate the bandwidth of the HSIC.
112 |
113 | * ``mdbs`` : Median distance between samples.
114 | * ``scott`` : Scott's Rule of Thumb.
115 | * ``silverman`` : Silverman's Rule of Thumb.
116 |
117 | Returns
118 | -------
119 | test_stat : float
120 | the HSIC statistic.
121 |
122 | p : float
123 | the HSIC p-value.
124 | """
125 | X = X.reshape(-1, 1) if X.ndim == 1 else X
126 | Y = Y.reshape(-1, 1) if Y.ndim == 1 else Y
127 |
128 | if bw_method == "scott":
129 | width_x = bandwidths.bw_scott(X)
130 | width_y = bandwidths.bw_scott(Y)
131 | elif bw_method == "silverman":
132 | width_x = bandwidths.bw_silverman(X)
133 | width_y = bandwidths.bw_silverman(Y)
134 | # Get kernel width to median distance between points
135 | else:
136 | width_x = get_kernel_width(X)
137 | width_y = get_kernel_width(Y)
138 |
139 | # these are slightly biased estimates of centered gram matrices
140 | K, Kc = get_gram_matrix(X, width_x)
141 | L, Lc = get_gram_matrix(Y, width_y)
142 |
143 | # test statistic m*HSICb under H1
144 | n = X.shape[0]
145 | test_stat = hsic_teststat(Kc, Lc, n)
146 |
147 | var = (1 / 6 * Kc * Lc) ** 2
148 | # second subtracted term is bias correction
149 | var = 1 / n / (n - 1) * (np.sum(var) - np.trace(var))
150 | # variance under H0
151 | var = 72 * (n - 4) * (n - 5) / n / (n - 1) / (n - 2) / (n - 3) * var
152 |
153 | K[np.diag_indices(n)] = 0
154 | L[np.diag_indices(n)] = 0
155 | mu_X = 1 / n / (n - 1) * K.sum()
156 | mu_Y = 1 / n / (n - 1) * L.sum()
157 | # mean under H0
158 | mean = 1 / n * (1 + mu_X * mu_Y - mu_X - mu_Y)
159 |
160 | alpha = mean**2 / var
161 | # threshold for hsicArr*m
162 | beta = var * n / mean
163 | p = gamma.sf(test_stat, alpha, scale=beta)
164 |
165 | return test_stat, p
166 |
--------------------------------------------------------------------------------
/lingam/ica_lingam.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | import numpy as np
7 | from scipy.optimize import linear_sum_assignment
8 | from sklearn.utils import check_array
9 | from sklearn.decomposition import FastICA
10 |
11 | from .base import _BaseLiNGAM
12 |
13 |
14 | class ICALiNGAM(_BaseLiNGAM):
15 | """Implementation of ICA-based LiNGAM Algorithm [1]_
16 |
17 | References
18 | ----------
19 | .. [1] S. Shimizu, P. O. Hoyer, A. Hyvärinen, and A. J. Kerminen.
20 | A linear non-gaussian acyclic model for causal discovery.
21 | Journal of Machine Learning Research, 7:2003-2030, 2006.
22 | """
23 |
24 | def __init__(self, random_state=None, max_iter=1000):
25 | """Construct a ICA-based LiNGAM model.
26 |
27 | Parameters
28 | ----------
29 | random_state : int, optional (default=None)
30 | ``random_state`` is the seed used by the random number generator.
31 | max_iter : int, optional (default=1000)
32 | The maximum number of iterations of FastICA.
33 | """
34 | super().__init__(random_state)
35 | self._max_iter = max_iter
36 |
37 | def fit(self, X):
38 | """Fit the model to X.
39 |
40 | Parameters
41 | ----------
42 | X : array-like, shape (n_samples, n_features)
43 | Training data, where ``n_samples`` is the number of samples
44 | and ``n_features`` is the number of features.
45 |
46 | Returns
47 | -------
48 | self : object
49 | Returns the instance of self.
50 | """
51 | X = check_array(X)
52 |
53 | # obtain a unmixing matrix from the given data
54 | ica = FastICA(max_iter=self._max_iter, random_state=self._random_state)
55 | ica.fit(X)
56 | W_ica = ica.components_
57 |
58 | # obtain a permuted W_ica
59 | _, col_index = linear_sum_assignment(1 / np.abs(W_ica))
60 | PW_ica = np.zeros_like(W_ica)
61 | PW_ica[col_index] = W_ica
62 |
63 | # obtain a vector to scale
64 | D = np.diag(PW_ica)[:, np.newaxis]
65 |
66 | # estimate an adjacency matrix
67 | W_estimate = PW_ica / D
68 | B_estimate = np.eye(len(W_estimate)) - W_estimate
69 |
70 | causal_order = self._estimate_causal_order(B_estimate)
71 | self._causal_order = causal_order
72 |
73 | return self._estimate_adjacency_matrix(X)
74 |
75 | def _search_causal_order(self, matrix):
76 | """Obtain a causal order from the given matrix strictly.
77 |
78 | Parameters
79 | ----------
80 | matrix : array-like, shape (n_features, n_samples)
81 | Target matrix.
82 |
83 | Return
84 | ------
85 | causal_order : array, shape [n_features, ]
86 | A causal order of the given matrix on success, None otherwise.
87 | """
88 | causal_order = []
89 |
90 | row_num = matrix.shape[0]
91 | original_index = np.arange(row_num)
92 |
93 | while 0 < len(matrix):
94 | # find a row all of which elements are zero
95 | row_index_list = np.where(np.sum(np.abs(matrix), axis=1) == 0)[0]
96 | if len(row_index_list) == 0:
97 | break
98 |
99 | target_index = row_index_list[0]
100 |
101 | # append i to the end of the list
102 | causal_order.append(original_index[target_index])
103 | original_index = np.delete(original_index, target_index, axis=0)
104 |
105 | # remove the i-th row and the i-th column from matrix
106 | mask = np.delete(np.arange(len(matrix)), target_index, axis=0)
107 | matrix = matrix[mask][:, mask]
108 |
109 | if len(causal_order) != row_num:
110 | causal_order = None
111 |
112 | return causal_order
113 |
114 | def _estimate_causal_order(self, matrix):
115 | """Obtain a lower triangular from the given matrix approximately.
116 |
117 | Parameters
118 | ----------
119 | matrix : array-like, shape (n_features, n_samples)
120 | Target matrix.
121 |
122 | Return
123 | ------
124 | causal_order : array, shape [n_features, ]
125 | A causal order of the given matrix on success, None otherwise.
126 | """
127 | causal_order = None
128 |
129 | # set the m(m + 1)/2 smallest(in absolute value) elements of the matrix to zero
130 | pos_list = np.argsort(np.abs(matrix), axis=None)
131 | pos_list = np.vstack(np.unravel_index(pos_list, matrix.shape)).T
132 | initial_zero_num = int(matrix.shape[0] * (matrix.shape[0] + 1) / 2)
133 | for i, j in pos_list[:initial_zero_num]:
134 | matrix[i, j] = 0
135 |
136 | for i, j in pos_list[initial_zero_num:]:
137 | causal_order = self._search_causal_order(matrix)
138 | if causal_order is not None:
139 | break
140 | else:
141 | # set the smallest(in absolute value) element to zero
142 | matrix[i, j] = 0
143 |
144 | return causal_order
145 |
--------------------------------------------------------------------------------
/lingam/utils/_f_correlation.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | import numpy as np
7 | from scipy.linalg import eigh
8 | from scipy.sparse.linalg import eigsh
9 | from sklearn.utils import check_array
10 |
11 |
12 | def f_correlation(x, y):
13 | """Implementation of F-correlation [2]_
14 |
15 | References
16 | ----------
17 | .. [2] Bach, Francis R., and Michael I. Jordan. "Kernel independent component analysis."
18 | Journal of machine learning research 3.Jul (2002): 1-48.
19 |
20 | Parameters
21 | ----------
22 | x, y : array-like, shape (n_samples)
23 | Data, where ``n_samples`` is the number of samples.
24 |
25 | Returns
26 | -------
27 | float
28 | The valus of F-correlation.
29 | """
30 |
31 | x_ = check_array(x, ensure_2d=False)
32 | y_ = check_array(y, ensure_2d=False)
33 | n = x_.shape[0]
34 |
35 | if y_.shape[0] != n:
36 | raise ValueError("x and y must be the same size.")
37 |
38 | # Standardize
39 | x_ = (x_ - x_.mean()) / x_.std()
40 | y_ = (y_ - y_.mean()) / y_.std()
41 |
42 | X = [x_, y_]
43 | m = len(X)
44 |
45 | if n > 1000:
46 | kappa, sigma = [2e-3, 0.5]
47 | else:
48 | kappa, sigma = [2e-2, 1.0]
49 |
50 | Rs = []
51 | Us = []
52 | sizes = []
53 | for i in range(m):
54 | # Incomplete Cholesky decomposition
55 | G, P = _incomplete_cholesky(X[i], sigma, n * kappa * 1e-2)
56 | G = G[np.argsort(P), :]
57 | G = G - np.tile(np.mean(G, axis=0), (len(G), 1))
58 |
59 | # Singular value decomposition
60 | R, U = _svd(G, n, kappa)
61 | Rs.append(R)
62 | Us.append(U)
63 | sizes.append(R.shape[0])
64 |
65 | # make R_kappa
66 | R_kappa = np.eye(sum(sizes))
67 | st = np.cumsum(np.hstack([0, sizes]))
68 | st = st[:m]
69 | for i in range(1, m):
70 | for j in range(0, i):
71 | RUUR = np.diag(Rs[i]) @ (Us[i].T @ Us[j]) @ np.diag(Rs[j])
72 | R_kappa[st[i] : st[i] + sizes[i], st[j] : st[j] + sizes[j]] = RUUR
73 | R_kappa[st[j] : st[j] + sizes[j], st[i] : st[i] + sizes[i]] = RUUR.T
74 |
75 | # smallest eigenvalue of R_kappa
76 | w, _ = eigsh(R_kappa, 1, which="SM")
77 |
78 | return 1 - w[0]
79 |
80 |
81 | def _squared_dist(x, y):
82 | """Squared euclidean distance matrix."""
83 | xx = x * x
84 | yy = y * y
85 | xy = x.reshape(-1, 1) * y
86 | xx = np.tile(xx.reshape(-1, 1), (1, len(yy)))
87 | yy = np.tile(yy, (len(xx), 1))
88 | return np.abs(xx + yy - 2 * xy)
89 |
90 |
91 | def _incomplete_cholesky(x, sigma, tol):
92 | """Incomplete cholesky decomposition of the gram matrix with the gaussian kernel."""
93 | n = x.shape[0]
94 | i = 0
95 | Gjj = np.ones((n))
96 | G = np.empty((n, 0))
97 | P = np.array([i for i in range(n)])
98 |
99 | while np.sum(Gjj[i:n]) > tol:
100 | G = np.append(G, np.zeros((n, 1)), axis=1)
101 | if i > 0:
102 | jast = np.argmax(Gjj[i:n])
103 | jast = jast + i
104 | P[[i, jast]] = P[[jast, i]]
105 | G[[i, jast], :i] = G[[jast, i], :i]
106 | else:
107 | jast = 0
108 |
109 | G[i, i] = Gjj[jast]
110 | G[i, i] = np.sqrt(G[i, i])
111 |
112 | if i < n:
113 | dist = _squared_dist(x[P[(i + 1) : n]], x[[P[i]]])
114 | K = np.exp(-0.5 / sigma**2 * dist)
115 | if i > 0:
116 | sigG = G[(i + 1) : n, 0:i] @ G[i, 0:i].reshape(-1, 1)
117 | G[(i + 1) : n, [i]] = 1 / G[i, i] * (K - sigG)
118 | else:
119 | G[(i + 1) : n, [i]] = 1 / G[i, i] * K
120 |
121 | if i < n:
122 | Gjj[(i + 1) : n] = np.ones((n - (i + 1))) - np.sum(
123 | G[(i + 1) : n, 0 : i + 1] ** 2, axis=1
124 | )
125 |
126 | i = i + 1
127 |
128 | return G, P
129 |
130 |
131 | def _svd(G, n, kappa):
132 | """Singular value decomposition."""
133 | eta = kappa * 1e-2
134 | D, A = eigh(G.T @ G)
135 | indexes = np.where(D >= n * eta)[0]
136 | order = np.argsort(D[indexes])
137 | order = order[::-1]
138 | indexes = indexes[order[0 : len(indexes)]]
139 | D = D[indexes]
140 | U = G @ (A[:, indexes] @ np.diag(np.sqrt(1.0 / (D))))
141 | R = D
142 | for j in range(len(D)):
143 | # regularized
144 | R[j] = D[j] / (n * kappa / 2 + D[j])
145 | return R, U
146 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | scikit-learn>=1.2
4 | graphviz
5 | statsmodels
6 | networkx
7 | pandas
8 | pygam
9 | matplotlib
10 | psy
11 | semopy
12 | #optional (install if gpu and cuda is available)
13 | # culingam
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open('README.md', 'r', encoding='utf-8') as fh:
4 | README = fh.read()
5 |
6 | import lingam
7 |
8 | VERSION = lingam.__version__
9 |
10 | setuptools.setup(
11 | name='lingam',
12 | version=VERSION,
13 | author='T.Ikeuchi, G.Haraoka, M.Ide, Y.Zeng, T.N.Maeda, W.Kurebayashi, S.Shimizu',
14 | description='LiNGAM Python Package',
15 | long_description=README,
16 | long_description_content_type='text/markdown',
17 | install_requires=[
18 | 'numpy',
19 | 'scipy',
20 | 'scikit-learn',
21 | 'graphviz',
22 | 'statsmodels',
23 | 'networkx',
24 | 'pandas',
25 | 'pygam',
26 | 'matplotlib',
27 | 'psy',
28 | 'semopy',
29 | ],
30 | url='https://github.com/cdt15/lingam',
31 | packages=setuptools.find_packages(exclude=['tests']),
32 | classifiers=[
33 | 'Programming Language :: Python :: 3',
34 | 'License :: OSI Approved :: MIT License',
35 | 'Operating System :: OS Independent',
36 | ],
37 | python_requires='>=3.8',
38 | )
39 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cdt15/lingam/f2e53cf35e0e9a814e6e3e274d76bc0cf9dbbca0/tests/__init__.py
--------------------------------------------------------------------------------
/tests/test_camuv.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import random
5 | import lingam
6 |
7 |
8 | def get_noise(n):
9 | noise = ((np.random.rand(1, n) - 0.5) * 5).reshape(n)
10 | mean = get_random_constant(0.0, 2.0)
11 | noise += mean
12 | return noise
13 |
14 | def causal_func(cause):
15 | a = get_random_constant(-5.0, 5.0)
16 | b = get_random_constant(-1.0, 1.0)
17 | c = int(random.uniform(2, 3))
18 | return ((cause + a) ** (c)) + b
19 |
20 | def get_random_constant(s, b):
21 | constant = random.uniform(-1.0, 1.0)
22 | if constant > 0:
23 | constant = random.uniform(s, b)
24 | else:
25 | constant = random.uniform(-b, -s)
26 | return constant
27 |
28 | def create_data(n):
29 | causal_pairs = [[0, 1], [0, 3], [2, 4]]
30 | intermediate_pairs = [[2, 5]]
31 | confounder_pairs = [[3, 4]]
32 |
33 | n_variables = 6
34 |
35 | data = np.zeros((n, n_variables)) # observed data
36 | confounders = np.zeros(
37 | (n, len(confounder_pairs))
38 | ) # data of unobserced common causes
39 |
40 | # Adding external effects
41 | for i in range(n_variables):
42 | data[:, i] = get_noise(n)
43 | for i in range(len(confounder_pairs)):
44 | confounders[:, i] = get_noise(n)
45 | confounders[:, i] = confounders[:, i] / np.std(confounders[:, i])
46 |
47 | # Adding the effects of unobserved common causes
48 | for i, cpair in enumerate(confounder_pairs):
49 | cpair = list(cpair)
50 | cpair.sort()
51 | data[:, cpair[0]] += causal_func(confounders[:, i])
52 | data[:, cpair[1]] += causal_func(confounders[:, i])
53 |
54 | for i1 in range(n_variables)[0:n_variables]:
55 | data[:, i1] = data[:, i1] / np.std(data[:, i1])
56 | for i2 in range(n_variables)[i1 + 1 : n_variables + 1]:
57 | # Adding direct effects between observed variables
58 | if [i1, i2] in causal_pairs:
59 | data[:, i2] += causal_func(data[:, i1])
60 | # Adding undirected effects between observed variables mediated through unobserved variables
61 | if [i1, i2] in intermediate_pairs:
62 | interm = causal_func(data[:, i1]) + get_noise(n)
63 | interm = interm / np.std(interm)
64 | data[:, i2] += causal_func(interm)
65 |
66 | return data
67 |
68 | def test_fit_success():
69 | X = create_data(200)
70 | model = lingam.CAMUV()
71 | model.fit(X)
72 | print(model.adjacency_matrix_)
73 |
74 | # f-correlation
75 | model = lingam.CAMUV(independence="fcorr", ind_corr=0.5)
76 | model.fit(X)
77 |
78 | # prior_knowledge
79 | model = lingam.CAMUV(prior_knowledge=[(1, 0)])
80 | model.fit(X)
81 |
82 | def test_fit_invalid():
83 | try:
84 | X = create_data(200)
85 | model = lingam.CAMUV(alpha=-1)
86 | model.fit(X)
87 | except ValueError:
88 | pass
89 | else:
90 | raise AssertionError
91 |
92 | try:
93 | X = create_data(200)
94 | model = lingam.CAMUV(num_explanatory_vals=-1)
95 | model.fit(X)
96 | except ValueError:
97 | pass
98 | else:
99 | raise AssertionError
100 |
101 | # Invalid value: independence
102 | try:
103 | model = lingam.CAMUV(independence="lingam")
104 | model.fit(X)
105 | except ValueError:
106 | pass
107 | else:
108 | raise AssertionError
109 |
110 | try:
111 | X = create_data(200)
112 | model = lingam.CAMUV(ind_corr=-1.0)
113 | model.fit(X)
114 | except ValueError:
115 | pass
116 | else:
117 | raise AssertionError
118 |
--------------------------------------------------------------------------------
/tests/test_causal_based_simulator/test_train2_params.json:
--------------------------------------------------------------------------------
1 | {"x0": {"None": {"coef": [[2.9339164528397097]], "intercept": [0.5291106732914217]}}, "x1": {"None": {"coef": [2.9768986061975427, 2.010538008982108], "intercept": 0.4977407209607847}}, "x2": {"None": {"coef": [[5.9657532449831105]], "intercept": [0.5317138748321436]}}, "x3": {}, "x4": {"None": {"coef": [[8.054803347328619, -1.032443274825341]], "intercept": [0.47720500309615216]}}, "x5": {"None": {"coef": [3.9982221066576265], "intercept": 0.49693661223133834}}}
--------------------------------------------------------------------------------
/tests/test_causal_based_simulator/test_train3_params.json:
--------------------------------------------------------------------------------
1 | {"x0": {"None": {"coef": [2.9701634515780193], "intercept": 0.5202447291038712}}, "x1": {"{'x2': 'a'}": {"coef": [3.001269876037721], "intercept": 0.4948594354078173}, "{'x2': 'b'}": {"coef": [1.4838578471068276], "intercept": 0.5067806444646545}}, "x2": {"None": {"coef": [[-1.4322085873714177]], "intercept": [0.5468779096422264]}}, "x3": {}, "x4": {"{'x2': 'a'}": {"coef": [8.005531925102636], "intercept": 0.4730329362214398}, "{'x2': 'b'}": {"coef": [3.983315963383367], "intercept": 0.5122273904635017}}, "x5": {"None": {"coef": [3.9982221066576265], "intercept": 0.49693661223133834}}}
--------------------------------------------------------------------------------
/tests/test_causal_based_simulator/test_train4_params.json:
--------------------------------------------------------------------------------
1 | {"x0": {}, "x1": {"{'x0': 'a'}": {"expected_value": 1.0155893167967633}, "{'x0': 'b'}": {"expected_value": 0.5225470577555933}}, "x2": {"{'x0': 'a'}": {"classes": [0, 1], "p": [0.7299077733860343, 0.27009222661396576]}, "{'x0': 'b'}": {"classes": [0, 1], "p": [0.24896265560165975, 0.7510373443983402]}}}
--------------------------------------------------------------------------------
/tests/test_causal_based_simulator/test_train_params.json:
--------------------------------------------------------------------------------
1 | {"x0": {"None": {"coef": [2.9701634515780193], "intercept": 0.5202447291038712}}, "x1": {"None": {"coef": [2.9768986061975427, 2.010538008982108], "intercept": 0.4977407209607847}}, "x2": {"None": {"coef": [6.006189651271653], "intercept": 0.5082804052876058}}, "x3": {}, "x4": {"None": {"coef": [8.044809017180901, -1.0265781185585672], "intercept": 0.4870560933939245}}, "x5": {"None": {"coef": [3.9982221066576265], "intercept": 0.49693661223133834}}}
--------------------------------------------------------------------------------
/tests/test_high_dim_direct_lingam.py:
--------------------------------------------------------------------------------
1 | import os
2 | import warnings
3 |
4 | import numpy as np
5 | import pandas as pd
6 | from lingam import HighDimDirectLiNGAM
7 |
8 |
9 | def test_fit_success():
10 | # causal direction: x0 --> x1 --> x3
11 | x0 = np.random.uniform(size=1000)
12 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
13 | x2 = np.random.uniform(size=1000)
14 | x3 = 4.0 * x1 + np.random.uniform(size=1000)
15 | X = pd.DataFrame(np.array([x0, x1, x2, x3]).T, columns=['x0', 'x1', 'x2', 'x3'])
16 |
17 | model = HighDimDirectLiNGAM()
18 | model.fit(X)
19 |
20 | # causal order
21 | co = model.causal_order_
22 | assert co.index(0) < co.index(1) < co.index(3)
23 |
24 | # J
25 | model = HighDimDirectLiNGAM(J=6)
26 | model.fit(X)
27 | co = model.causal_order_
28 | assert co.index(0) < co.index(1) < co.index(3)
29 |
30 | # K
31 | model = HighDimDirectLiNGAM(K=2)
32 | model.fit(X)
33 | co = model.causal_order_
34 | assert co.index(0) < co.index(1) < co.index(3)
35 |
36 | # alpha
37 | model = HighDimDirectLiNGAM(alpha=0.1)
38 | model.fit(X)
39 | co = model.causal_order_
40 | assert co.index(0) < co.index(1) < co.index(3)
41 |
42 | # estimate_adj_mat
43 | model = HighDimDirectLiNGAM(estimate_adj_mat=False)
44 | model.fit(X)
45 | co = model.causal_order_
46 | assert co.index(0) < co.index(1) < co.index(3)
47 | assert model.adjacency_matrix_ is None
48 |
49 | # random_state
50 | model = HighDimDirectLiNGAM(random_state=0)
51 | model.fit(X)
52 | model2 = HighDimDirectLiNGAM(random_state=0)
53 | model2.fit(X)
54 | assert np.isclose(model.adjacency_matrix_, model2.adjacency_matrix_).all()
55 |
56 | # n_samples <= n_features
57 | X2 = np.random.uniform(-1, 1, size=(10, 10))
58 |
59 | model = HighDimDirectLiNGAM()
60 | with warnings.catch_warnings(record=True) as w:
61 | model.fit(X2)
62 | assert w[0].category == UserWarning
63 |
64 | def test_fit_invalid_data():
65 | # Not array data
66 | X = 1
67 | try:
68 | model = HighDimDirectLiNGAM()
69 | model.fit(X)
70 | except ValueError:
71 | pass
72 | else:
73 | raise AssertionError
74 |
75 | # Include non-numeric data
76 | x0 = np.random.uniform(size=5)
77 | x1 = np.array(['X', 'Y', 'X', 'Y', 'X'])
78 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
79 | try:
80 | model = HighDimDirectLiNGAM()
81 | model.fit(X)
82 | except ValueError:
83 | pass
84 | else:
85 | raise AssertionError
86 |
87 | # Include NaN values
88 | x0 = np.random.uniform(size=1000)
89 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
90 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
91 | X.iloc[100, 0] = np.nan
92 | try:
93 | model = HighDimDirectLiNGAM()
94 | model.fit(X)
95 | except ValueError:
96 | pass
97 | else:
98 | raise AssertionError
99 |
100 | # Include infinite values
101 | x0 = np.random.uniform(size=1000)
102 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
103 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
104 | X.iloc[100, 0] = np.inf
105 | try:
106 | model = HighDimDirectLiNGAM()
107 | model.fit(X)
108 | except ValueError:
109 | pass
110 | else:
111 | raise AssertionError
112 |
113 | # J > 2
114 | try:
115 | model = HighDimDirectLiNGAM(J=2)
116 | except ValueError:
117 | pass
118 | else:
119 | raise AssertionError
120 |
121 | # J must be integer
122 | try:
123 | model = HighDimDirectLiNGAM(J=4.0)
124 | except TypeError:
125 | pass
126 | else:
127 | raise AssertionError
128 |
129 | # K >= 1
130 | try:
131 | model = HighDimDirectLiNGAM(K=0)
132 | except ValueError:
133 | pass
134 | else:
135 | raise AssertionError
136 |
137 | # K must be integer
138 | try:
139 | model = HighDimDirectLiNGAM(K=2.0)
140 | except TypeError:
141 | pass
142 | else:
143 | raise AssertionError
144 |
145 | # 0 <= alpha <= 1
146 | try:
147 | model = HighDimDirectLiNGAM(alpha=-0.1)
148 | except ValueError:
149 | pass
150 | else:
151 | raise AssertionError
152 |
--------------------------------------------------------------------------------
/tests/test_ica_lingam.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | from lingam.ica_lingam import ICALiNGAM
4 |
5 |
6 | def test_fit_success():
7 | # causal direction: x0 --> x1 --> x3
8 | x0 = np.random.uniform(size=1000)
9 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
10 | x2 = np.random.uniform(size=1000)
11 | x3 = 4.0 * x1 + np.random.uniform(size=1000)
12 | X = pd.DataFrame(np.array([x0, x1, x2, x3]).T, columns=['x0', 'x1', 'x2', 'x3'])
13 |
14 | model = ICALiNGAM()
15 | model.fit(X)
16 |
17 | # check the causal ordering
18 | co = model.causal_order_
19 | assert co.index(0) < co.index(1) < co.index(3)
20 |
21 | # check the adjacency matrix
22 | am = model.adjacency_matrix_
23 | # assert am[1, 0] > 1.5 and am[3, 1] > 3.5
24 |
25 | am[1, 0] = 0.0
26 | am[3, 1] = 0.0
27 | # assert np.sum(am) < 0.1
28 |
29 | # for coverage
30 | matrix = np.array([
31 | [0, 1, 1, 1],
32 | [0, 0, 1, 1],
33 | [0, 0, 0, 1],
34 | [0, 0, 0, 0],
35 | ])
36 | model = ICALiNGAM()
37 | model._search_causal_order(matrix)
38 |
39 | # for coverage
40 | matrix = np.array([
41 | [1, 1, 1],
42 | [1, 1, 1],
43 | [0, 0, 0],
44 | ])
45 | model = ICALiNGAM()
46 | model._search_causal_order(matrix)
47 |
48 |
49 | def test_fit_invalid_data():
50 | # Not array data
51 | X = 1
52 | try:
53 | model = ICALiNGAM()
54 | model.fit(X)
55 | except ValueError:
56 | pass
57 | else:
58 | raise AssertionError
59 |
60 | # Include non-numeric data
61 | x0 = np.random.uniform(size=5)
62 | x1 = np.array(['X', 'Y', 'X', 'Y', 'X'])
63 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
64 | try:
65 | model = ICALiNGAM()
66 | model.fit(X)
67 | except ValueError:
68 | pass
69 | else:
70 | raise AssertionError
71 |
72 | # Include NaN values
73 | x0 = np.random.uniform(size=1000)
74 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
75 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
76 | X.iloc[100, 0] = np.nan
77 | try:
78 | model = ICALiNGAM()
79 | model.fit(X)
80 | except ValueError:
81 | pass
82 | else:
83 | raise AssertionError
84 |
85 | # Include infinite values
86 | x0 = np.random.uniform(size=1000)
87 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
88 | X = pd.DataFrame(np.array([x0, x1]).T, columns=['x0', 'x1'])
89 | X.iloc[100, 0] = np.inf
90 | try:
91 | model = ICALiNGAM()
92 | model.fit(X)
93 | except ValueError:
94 | pass
95 | else:
96 | raise AssertionError
97 |
--------------------------------------------------------------------------------
/tests/test_lim.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 | import os
7 |
8 | import numpy as np
9 | import sys
10 | sys.path.append('D:/Codes/Git/lingam')
11 | import lingam
12 |
13 | DATA_DIR_PATH = os.path.dirname(__file__)
14 |
15 |
16 | def test_fit_lim():
17 | X = np.loadtxt(f"{DATA_DIR_PATH}/test_lim_data.csv", delimiter=",")
18 | dis_con = np.array([[0.0, 0.0, 0.0, 1.0, 1.0]])
19 | W_true = np.array(
20 | [
21 | [0.0, 1.09482609, -1.29270764, 0.0, -0.84424137],
22 | [0.0, 0.0, 0.80393307, 0.0, 0.0],
23 | [0.0, 0.0, 0.0, 0.0, 0.0],
24 | [0.70346053, 0.0, 1.90912441, 0.0, 1.94441713],
25 | [0.0, 0.0, -0.63152585, 0.0, 0.0],
26 | ]
27 | )
28 | model = lingam.LiM()
29 | model.fit(X, dis_con, only_global=False, is_poisson=True)
30 |
31 | print("The estimated adjacency matrix is:\n", model.adjacency_matrix_)
32 | print("The true adjacency matrix is:\n", W_true)
33 | print("Done.")
34 |
35 | model = lingam.LiM()
36 | model.fit(X, dis_con, only_global=False, is_poisson=False)
37 | # model.fit(X, dis_con, only_global=False)
38 |
39 | print("The estimated adjacency matrix is:\n", model.adjacency_matrix_)
40 | print("The true adjacency matrix is:\n", W_true)
41 | print("Done.")
42 |
--------------------------------------------------------------------------------
/tests/test_lina_MIT.py:
--------------------------------------------------------------------------------
1 | """
2 | Python implementation of the LiNGAM algorithms.
3 | The LiNGAM Project: https://sites.google.com/view/sshimizu06/lingam
4 | """
5 |
6 |
7 | import numpy as np
8 | import sys
9 | sys.path.append('D:/Codes/Git/lingam')
10 | import lingam
11 | import os
12 |
13 | def test_fit_lina():
14 | # load data
15 | DATA_DIR_PATH = os.path.dirname(__file__)
16 | X = np.loadtxt(f"{DATA_DIR_PATH}/test_lina_data.csv", delimiter=",")
17 | W_true = np.array(
18 | [[ 0. , 0. , 0. , 0. , 0. ],
19 | [ 1.23784047, 0. , 0. , 0. , 0. ],
20 | [ 0. , 0. , 0. , -1.49650548, 0. ],
21 | [-1.05331666, -0.52543143, 0. , 0. , 0.50714686],
22 | [ 0. , 0. , 0. , 0. , 0. ]])
23 | G_sign = np.array(
24 | [[ 1., 0., 0., 0., 0.],
25 | [-1., 0., 0., 0., 0.],
26 | [ 0., 1., 0., 0., 0.],
27 | [ 0., -1., 0., 0., 0.],
28 | [ 0., 0., 1., 0., 0.],
29 | [ 0., 0., 1., 0., 0.],
30 | [ 0., 0., 0., -1., 0.],
31 | [ 0., 0., 0., -1., 0.],
32 | [ 0., 0., 0., 0., -1.],
33 | [ 0., 0., 0., 0., -1.]])
34 | scale = np.array([[4.62688314, 1.84996207, 1.36308856, 2.39533958, 1.95656385]])
35 |
36 | model = lingam.LiNA()
37 | model.fit(X, G_sign, scale)
38 |
39 | print('The estimated adjacency matrix is:\n', model.adjacency_matrix_)
40 | print('The true adjacency matrix is:\n', W_true)
41 |
42 |
43 | def test_fit_mdlina():
44 | # load data
45 | DATA_DIR_PATH = os.path.dirname(__file__)
46 | XX = np.loadtxt(f"{DATA_DIR_PATH}/test_mdlina_data.csv", delimiter=",")
47 | W_true = np.array(
48 | [[ 0. , 1.02343092, -1.70436068],
49 | [ 0. , 0. , -1.47895291],
50 | [ 0. , 0. , 0. ]])
51 | G_sign = np.array(
52 | [[ 1., 0., 0., 0., 0., 0.],
53 | [ 1., 0., 0., 0., 0., 0.],
54 | [ 0., -1., 0., 0., 0., 0.],
55 | [ 0., 1., 0., 0., 0., 0.],
56 | [ 0., 0., 1., 0., 0., 0.],
57 | [ 0., 0., 1., 0., 0., 0.],
58 | [ 0., 0., 0., 1., 0., 0.],
59 | [ 0., 0., 0., 1., 0., 0.],
60 | [ 0., 0., 0., 0., 1., 0.],
61 | [ 0., 0., 0., 0., 1., 0.],
62 | [ 0., 0., 0., 0., 0., -1.],
63 | [ 0., 0., 0., 0., 0., 1.]]
64 | )
65 | scale = np.array([[1. , 1.42970805, 3.66739664, 0. , 0. ,
66 | 0. ],
67 | [0. , 0. , 0. , 1. , 1.45710481,
68 | 3.70389115]])
69 |
70 | model = lingam.MDLiNA()
71 | model.fit(XX, G_sign, scale)
72 |
73 | print('The estimated adjacency matrix is:\n', model._adjacency_matrix)
74 | print('The true adjacency matrix is:\n', W_true)
75 |
--------------------------------------------------------------------------------
/tests/test_multi_group_camuv.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import random
5 | import lingam
6 |
7 |
8 | def get_noise(n):
9 | noise = ((np.random.rand(1, n) - 0.5) * 5).reshape(n)
10 | mean = get_random_constant(0.0, 2.0)
11 | noise += mean
12 | return noise
13 |
14 | def causal_func(cause):
15 | a = get_random_constant(-5.0, 5.0)
16 | b = get_random_constant(-1.0, 1.0)
17 | c = int(random.uniform(2, 3))
18 | return ((cause + a) ** (c)) + b
19 |
20 | def get_random_constant(s, b):
21 | constant = random.uniform(-1.0, 1.0)
22 | if constant > 0:
23 | constant = random.uniform(s, b)
24 | else:
25 | constant = random.uniform(-b, -s)
26 | return constant
27 |
28 | def create_data(n):
29 | causal_pairs = [[0, 1], [0, 3], [2, 4]]
30 | intermediate_pairs = [[2, 5]]
31 | confounder_pairs = [[3, 4]]
32 |
33 | n_variables = 6
34 |
35 | data = np.zeros((n, n_variables)) # observed data
36 | confounders = np.zeros(
37 | (n, len(confounder_pairs))
38 | ) # data of unobserced common causes
39 |
40 | # Adding external effects
41 | for i in range(n_variables):
42 | data[:, i] = get_noise(n)
43 | for i in range(len(confounder_pairs)):
44 | confounders[:, i] = get_noise(n)
45 | confounders[:, i] = confounders[:, i] / np.std(confounders[:, i])
46 |
47 | # Adding the effects of unobserved common causes
48 | for i, cpair in enumerate(confounder_pairs):
49 | cpair = list(cpair)
50 | cpair.sort()
51 | data[:, cpair[0]] += causal_func(confounders[:, i])
52 | data[:, cpair[1]] += causal_func(confounders[:, i])
53 |
54 | for i1 in range(n_variables)[0:n_variables]:
55 | data[:, i1] = data[:, i1] / np.std(data[:, i1])
56 | for i2 in range(n_variables)[i1 + 1 : n_variables + 1]:
57 | # Adding direct effects between observed variables
58 | if [i1, i2] in causal_pairs:
59 | data[:, i2] += causal_func(data[:, i1])
60 | # Adding undirected effects between observed variables mediated through unobserved variables
61 | if [i1, i2] in intermediate_pairs:
62 | interm = causal_func(data[:, i1]) + get_noise(n)
63 | interm = interm / np.std(interm)
64 | data[:, i2] += causal_func(interm)
65 |
66 | return data
67 |
68 | def test_fit_success():
69 | X1 = create_data(200)
70 | X2 = create_data(200)
71 | X_list = [X1, X2]
72 | model = lingam.MultiGroupCAMUV()
73 | model.fit(X_list)
74 | print(model.adjacency_matrix_)
75 |
76 | # f-correlation
77 | model = lingam.MultiGroupCAMUV(independence="fcorr", ind_corr=0.5)
78 | model.fit(X_list)
79 |
80 | # prior_knowledge
81 | model = lingam.MultiGroupCAMUV(prior_knowledge=[(1, 0)])
82 | model.fit(X_list)
83 |
84 | def test_fit_invalid():
85 | try:
86 | X1 = create_data(200)
87 | X2 = create_data(200)
88 | X_list = [X1, X2]
89 | model = lingam.MultiGroupCAMUV(alpha=-1)
90 | model.fit(X_list)
91 | except ValueError:
92 | pass
93 | else:
94 | raise AssertionError
95 |
96 | try:
97 | X1 = create_data(200)
98 | X2 = create_data(200)
99 | X_list = [X1, X2]
100 | model = lingam.MultiGroupCAMUV(num_explanatory_vals=-1)
101 | model.fit(X_list)
102 | except ValueError:
103 | pass
104 | else:
105 | raise AssertionError
106 |
107 | # Invalid value: independence
108 | try:
109 | model = lingam.MultiGroupCAMUV(independence="lingam")
110 | model.fit(X_list)
111 | except ValueError:
112 | pass
113 | else:
114 | raise AssertionError
115 |
116 | try:
117 | X = create_data(200)
118 | model = lingam.MultiGroupCAMUV(ind_corr=-1.0)
119 | model.fit(X_list)
120 | except ValueError:
121 | pass
122 | else:
123 | raise AssertionError
124 |
--------------------------------------------------------------------------------
/tests/test_resit.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from lingam.resit import RESIT
6 | from sklearn.ensemble import RandomForestRegressor
7 |
8 |
9 | def test_fit_success():
10 | # causal direction: x0 --> x1 --> x3
11 | x0 = np.random.uniform(size=1000)
12 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
13 | x2 = np.random.uniform(size=1000)
14 | x3 = 4.0 * x1 + np.random.uniform(size=1000)
15 | X = pd.DataFrame(np.array([x0, x1, x2, x3]).T, columns=["x0", "x1", "x2", "x3"])
16 |
17 | reg = RandomForestRegressor(max_depth=4, random_state=0)
18 | model = RESIT(regressor=reg)
19 | model.fit(X)
20 |
21 | # check the causal ordering
22 | co = model.causal_order_
23 |
24 | # check the adjacency matrix
25 | am = model.adjacency_matrix_
26 |
27 | # check estimate_total_effect (Not implement)
28 | te = model.estimate_total_effect(X, 0, 3)
29 |
30 | # check get_error_independence_p_values (Not implement)
31 | p_values = model.get_error_independence_p_values(X)
32 |
33 |
34 | def test_fit_invalid():
35 | reg = RandomForestRegressor(max_depth=4, random_state=0)
36 |
37 | # Not array data
38 | X = 1
39 | try:
40 | model = RESIT(regressor=reg)
41 | model.fit(X)
42 | except ValueError:
43 | pass
44 | else:
45 | raise AssertionError
46 |
47 | # causal direction: x0 --> x1 --> x3
48 | x0 = np.random.uniform(size=1000)
49 | x1 = 2.0 * x0 + np.random.uniform(size=1000)
50 | x2 = np.random.uniform(size=1000)
51 | x3 = 4.0 * x1 + np.random.uniform(size=1000)
52 | X = pd.DataFrame(np.array([x0, x1, x2, x3]).T, columns=["x0", "x1", "x2", "x3"])
53 |
54 | # Invalid regressor
55 | try:
56 | model = RESIT(regressor=None)
57 | model.fit(X)
58 | except ValueError:
59 | pass
60 | else:
61 | raise AssertionError
62 |
63 | # Invalid regressor
64 | dummy_reg = lambda x: x
65 | try:
66 | model = RESIT(regressor=dummy_reg)
67 | model.fit(X)
68 | except ValueError:
69 | pass
70 | else:
71 | raise AssertionError
72 |
73 | # Invalid alpha
74 | try:
75 | model = RESIT(regressor=reg, alpha=-1)
76 | model.fit(X)
77 | except ValueError:
78 | pass
79 | else:
80 | raise AssertionError
81 |
--------------------------------------------------------------------------------