├── tests ├── __init__.py ├── test_basic.py ├── test_utils.py └── test_advanced.py ├── .flake8 ├── requirements.txt ├── MANIFEST.in ├── pattern_causality ├── cpp │ ├── __init__.py │ ├── fcp.cpp │ ├── patternhashing.cpp │ ├── patternspace.cpp │ ├── distancematrix.cpp │ ├── databank.cpp │ ├── signaturespace.cpp │ ├── statespace.cpp │ ├── pastNNs.cpp │ ├── natureOfCausality.cpp │ ├── predictionY.cpp │ ├── projectedNNs.cpp │ └── fillPCMatrix.cpp ├── __init__.py ├── datasets.py ├── data │ └── Climate_Indices.csv └── pattern_causality.py ├── LICENSE ├── pyproject.toml ├── meta.yaml ├── .github └── workflows │ ├── lint.yml │ └── tests.yml ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203 4 | exclude = .git,__pycache__,build,dist 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.0 2 | pandas>=1.0.0 3 | pytest>=6.0.0 4 | pytest-cov>=2.0.0 5 | black>=22.0.0 6 | isort>=5.0.0 7 | flake8>=4.0.0 8 | mypy>=0.900 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include setup.py 4 | include requirements.txt 5 | include pyproject.toml 6 | 7 | # C++ source files 8 | recursive-include pattern_causality/cpp *.cpp 9 | recursive-include pattern_causality/cpp *.h 10 | recursive-include pattern_causality/cpp *.hpp 11 | 12 | # Data files 13 | recursive-include pattern_causality/data *.csv 14 | 15 | # Python source files 16 | recursive-include pattern_causality *.py 17 | 18 | # Include all header files 19 | recursive-include . *.h 20 | recursive-include . *.hpp 21 | 22 | # Exclude unnecessary files 23 | global-exclude *.py[cod] 24 | global-exclude __pycache__ 25 | global-exclude *.so 26 | global-exclude *.dylib 27 | global-exclude .DS_Store 28 | global-exclude *.o 29 | global-exclude *.a 30 | global-exclude *.lib 31 | global-exclude *.dll 32 | global-exclude .git* 33 | global-exclude .vscode* 34 | global-exclude .idea* 35 | global-exclude *.egg-info -------------------------------------------------------------------------------- /pattern_causality/cpp/__init__.py: -------------------------------------------------------------------------------- 1 | """C++ implementations of pattern causality functions.""" 2 | 3 | try: 4 | from utils.databank import databank 5 | from utils.distancematrix import distancematrix 6 | from utils.fcp import fcp 7 | from utils.fillPCMatrix import fillPCMatrix 8 | from utils.natureOfCausality import natureOfCausality 9 | from utils.pastNNs import pastNNs 10 | from utils.patternhashing import patternhashing 11 | from utils.patternspace import patternspace 12 | from utils.predictionY import predictionY 13 | from utils.projectedNNs import projectedNNs 14 | from utils.signaturespace import signaturespace 15 | from utils.statespace import statespace 16 | except ImportError as e: 17 | import warnings 18 | warnings.warn(f"Failed to import C++ extensions: {str(e)}") 19 | 20 | __all__ = [ 21 | "databank", 22 | "distancematrix", 23 | "fcp", 24 | "fillPCMatrix", 25 | "natureOfCausality", 26 | "pastNNs", 27 | "patternhashing", 28 | "patternspace", 29 | "predictionY", 30 | "projectedNNs", 31 | "signaturespace", 32 | "statespace", 33 | ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2024, Stavros Stavroglou, Athanasios Pantelous, Hui Wang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /pattern_causality/__init__.py: -------------------------------------------------------------------------------- 1 | """Pattern Causality Analysis Package. 2 | 3 | This package provides tools for analyzing causal relationships in time series data. 4 | """ 5 | 6 | from importlib.metadata import version, metadata 7 | 8 | # Get package metadata 9 | __version__ = version("pattern-causality") 10 | __author__ = metadata("pattern-causality").get("Author") 11 | __email__ = metadata("pattern-causality").get("Author-email") 12 | __license__ = metadata("pattern-causality").get("License") 13 | __copyright__ = f"Copyright (c) 2024 {__author__}" 14 | 15 | # Import core classes 16 | from .pattern_causality import pattern_causality 17 | from .datasets import load_data, get_dataset_info 18 | 19 | # Import C++ extensions 20 | try: 21 | from utils.databank import databank 22 | from utils.distancematrix import distancematrix 23 | from utils.fcp import fcp 24 | from utils.fillPCMatrix import fillPCMatrix 25 | from utils.natureOfCausality import natureOfCausality 26 | from utils.pastNNs import pastNNs 27 | from utils.patternhashing import patternhashing 28 | from utils.patternspace import patternspace 29 | from utils.predictionY import predictionY 30 | from utils.projectedNNs import projectedNNs 31 | from utils.signaturespace import signaturespace 32 | from utils.statespace import statespace 33 | except ImportError as e: 34 | import warnings 35 | warnings.warn(f"Failed to import C++ extensions: {str(e)}") 36 | 37 | __all__ = [ 38 | "pattern_causality", 39 | "load_data", 40 | "get_dataset_info", 41 | "databank", 42 | "distancematrix", 43 | "fcp", 44 | "fillPCMatrix", 45 | "natureOfCausality", 46 | "pastNNs", 47 | "patternhashing", 48 | "patternspace", 49 | "predictionY", 50 | "projectedNNs", 51 | "signaturespace", 52 | "statespace", 53 | ] 54 | -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import pandas as pd 4 | from pattern_causality import pattern_causality, load_data 5 | 6 | 7 | class TestBasicFunctionality(unittest.TestCase): 8 | @classmethod 9 | def setUpClass(cls): 10 | """Load data once for all tests""" 11 | cls.data = load_data() 12 | cls.X = cls.data["NAO"].values 13 | cls.Y = cls.data["AAO"].values 14 | cls.pc = pattern_causality(verbose=False) 15 | 16 | def test_pc_lightweight_basic(self): 17 | """Test basic functionality of pc_lightweight""" 18 | result = self.pc.pc_lightweight(X=self.X, Y=self.Y, E=3, tau=1, h=1) 19 | self.assertIsInstance(result, pd.DataFrame) 20 | self.assertTrue("Total Causality" in result.columns) 21 | self.assertTrue("Positive Causality" in result.columns) 22 | self.assertTrue("Negative Causality" in result.columns) 23 | self.assertTrue("Dark Causality" in result.columns) 24 | 25 | def test_input_validation(self): 26 | """Test input validation""" 27 | # Test with non-numeric data 28 | with self.assertRaises(TypeError): 29 | self.pc.pc_lightweight(X=["invalid"], Y=self.Y, E=3, tau=1, h=1) 30 | 31 | # Test with invalid dimensions 32 | with self.assertRaises(ValueError): 33 | self.pc.pc_lightweight(X=[], Y=self.Y, E=3, tau=1, h=1) 34 | 35 | def test_weighted_vs_unweighted(self): 36 | """Test that weighted and unweighted calculations give different results""" 37 | weighted_result = self.pc.pc_lightweight( 38 | X=self.X, Y=self.Y, E=3, tau=1, h=1, weighted=True 39 | ) 40 | 41 | unweighted_result = self.pc.pc_lightweight( 42 | X=self.X, Y=self.Y, E=3, tau=1, h=1, weighted=False 43 | ) 44 | 45 | # Results should be different 46 | self.assertNotEqual( 47 | weighted_result["Positive Causality"].values[0], 48 | unweighted_result["Positive Causality"].values[0], 49 | ) 50 | 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=45", 4 | "wheel", 5 | "numpy>=1.19.0", 6 | ] 7 | build-backend = "setuptools.build_meta" 8 | 9 | [project] 10 | name = "pattern-causality" 11 | version = "1.0.3" 12 | description = "Pattern Causality Algorithm in Python" 13 | readme = "README.md" 14 | requires-python = ">=3.8" 15 | license = {text = "BSD License"} 16 | authors = [ 17 | {name = "Stavros Stavroglou", email = "stavros.k.stavroglou@gmail.com"}, 18 | {name = "Athanasios Pantelous", email = "Athanasios.Pantelous@monash.edu"}, 19 | {name = "Hui Wang", email = "huiw1128@gmail.com"}, 20 | ] 21 | maintainers = [ 22 | {name = "Hui Wang", email = "huiw1128@gmail.com"}, 23 | ] 24 | dependencies = [ 25 | "numpy>=1.19.0", 26 | "pandas>=1.0.0", 27 | ] 28 | classifiers = [ 29 | "Programming Language :: Python :: 3", 30 | "Programming Language :: Python :: 3.8", 31 | "Programming Language :: Python :: 3.9", 32 | "Programming Language :: Python :: 3.10", 33 | "Programming Language :: Python :: 3.11", 34 | ] 35 | [project.urls] 36 | Homepage = "https://github.com/skstavroglou/pattern_causality_py" 37 | Repository = "https://github.com/skstavroglou/pattern_causality_py.git" 38 | Documentation = "https://github.com/skstavroglou/pattern_causality_py#readme" 39 | 40 | [tool.setuptools] 41 | packages = ["pattern_causality"] 42 | 43 | [tool.pytest.ini_options] 44 | testpaths = ["tests"] 45 | python_files = ["test_*.py"] 46 | addopts = "-v --cov=pattern_causality" 47 | 48 | [project.optional-dependencies] 49 | dev = [ 50 | "pytest>=6.0", 51 | "pytest-cov>=2.0", 52 | "black>=22.0", 53 | "isort>=5.0", 54 | "flake8>=4.0", 55 | ] 56 | 57 | [tool.black] 58 | line-length = 88 59 | target-version = ['py38'] 60 | extend-exclude = ''' 61 | # A regex preceded with ^/ will apply only to files and directories 62 | # in the root of the project. 63 | ^/pattern_causality/pattern_causality.py 64 | ''' 65 | 66 | [tool.isort] 67 | profile = "black" 68 | multi_line_output = 3 69 | line_length = 88 70 | skip = ["pattern_causality/pattern_causality.py"] 71 | 72 | [tool.mypy] 73 | python_version = "3.8" 74 | ignore_missing_imports = true 75 | -------------------------------------------------------------------------------- /meta.yaml: -------------------------------------------------------------------------------- 1 | {% set name = "pattern-causality" %} 2 | {% set version = "1.0.3" %} 3 | 4 | package: 5 | name: {{ name|lower }} 6 | version: {{ version }} 7 | 8 | source: 9 | url: https://github.com/skstavroglou/pattern_causality_py/releases/download/v{{ version }}/pattern_causality-{{ version }}.tar.gz 10 | sha256: 99d42253d559992eb0137356ad52036789f00dd87ee7f5b1e4e6ebbad5e55141 11 | 12 | build: 13 | number: 0 14 | skip: true # [win] 15 | script: >- 16 | {% if target_platform == "osx-64" %} 17 | export SDKROOT=$(xcrun --sdk macosx --show-sdk-path) && export CC=${CC} && export CXX=${CXX} && export CFLAGS="${CFLAGS} -isysroot ${SDKROOT} -I${SDKROOT}/usr/include -mmacosx-version-min=10.9 -Xpreprocessor -fopenmp" && export CXXFLAGS="${CXXFLAGS} -stdlib=libc++ -isysroot ${SDKROOT} -I${SDKROOT}/usr/include -I${SDKROOT}/usr/include/c++/v1 -mmacosx-version-min=10.9 -Xpreprocessor -fopenmp" && export LDFLAGS="${LDFLAGS} -stdlib=libc++ -isysroot ${SDKROOT} -mmacosx-version-min=10.9 -lomp" && {{ PYTHON }} -m pip install . --no-deps -vv 18 | {% else %} 19 | {{ PYTHON }} -m pip install . --no-deps -vv 20 | {% endif %} 21 | 22 | requirements: 23 | build: 24 | - {{ compiler('c') }} 25 | - {{ compiler('cxx') }} 26 | - {{ stdlib('c') }} 27 | - make # [unix] 28 | - cmake # [win] 29 | - llvm-openmp >=14.0.6 # [osx] 30 | host: 31 | - python 32 | - pip 33 | - numpy 34 | - setuptools >=45 35 | - wheel 36 | - llvm-openmp >=14.0.6 # [osx] 37 | - libcxx # [osx] 38 | run: 39 | - python 40 | - {{ pin_compatible('numpy') }} 41 | - pandas >=1.0.0 42 | - llvm-openmp >=14.0.6 # [osx] 43 | - libcxx # [osx] 44 | 45 | test: 46 | requires: 47 | - pip 48 | - pytest >=6.0 49 | - pytest-cov >=2.0 50 | source_files: 51 | - tests 52 | - setup.py 53 | - pyproject.toml 54 | imports: 55 | - pattern_causality 56 | commands: 57 | - pip check 58 | - pytest tests -v --import-mode=append 59 | 60 | about: 61 | home: https://github.com/skstavroglou/pattern_causality_py 62 | license: BSD-3-Clause 63 | license_family: BSD 64 | license_file: LICENSE 65 | summary: Pattern Causality Algorithm in Python 66 | description: | 67 | A comprehensive Python library that implements the Pattern Causality algorithm 68 | for analyzing causal relationships in time series data. This package provides 69 | efficient tools for detecting and quantifying causality patterns between 70 | multiple time series, with a particular focus on nonlinear complex systems. 71 | doc_url: https://github.com/skstavroglou/pattern_causality_py#readme 72 | dev_url: https://github.com/skstavroglou/pattern_causality_py 73 | 74 | extra: 75 | recipe-maintainers: 76 | - wanghui5801 -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from utils.statespace import statespace 4 | from utils.patternhashing import patternhashing 5 | from utils.signaturespace import signaturespace 6 | from utils.distancematrix import distancematrix 7 | from utils.patternspace import patternspace 8 | from utils.pastNNs import pastNNs 9 | from utils.projectedNNs import projectedNNs 10 | from utils.predictionY import predictionY 11 | from utils.fillPCMatrix import fillPCMatrix 12 | from utils.natureOfCausality import natureOfCausality 13 | from utils.databank import databank 14 | from utils.fcp import fcp 15 | 16 | 17 | class TestUtils(unittest.TestCase): 18 | def setUp(self): 19 | """Set up test data""" 20 | self.time_series = np.sin(np.linspace(0, 10, 100)) 21 | self.E = 3 22 | self.tau = 1 23 | 24 | def test_statespace(self): 25 | """Test state space creation""" 26 | result = statespace(self.time_series.tolist(), self.E, self.tau) 27 | self.assertIsInstance(result, np.ndarray) 28 | expected_shape = (len(self.time_series) - (self.E - 1) * self.tau, self.E) 29 | self.assertEqual(result.shape, expected_shape) 30 | 31 | def test_patternhashing(self): 32 | """Test pattern hashing""" 33 | result = patternhashing(self.E) 34 | self.assertIsInstance(result, np.ndarray) 35 | self.assertEqual(len(result), self.E ** 2) 36 | 37 | def test_distance_matrix(self): 38 | """Test distance matrix calculation""" 39 | state_space = statespace(self.time_series.tolist(), self.E, self.tau) 40 | result = distancematrix(state_space, metric="euclidean") 41 | self.assertIsInstance(result, np.ndarray) 42 | self.assertEqual(result.shape, (len(state_space), len(state_space))) 43 | 44 | def test_fcp(self): 45 | """Test first causality point calculation""" 46 | result = fcp(self.E, self.tau, 1, self.time_series.tolist()) 47 | self.assertIsInstance(result, int) 48 | self.assertGreater(result, 0) 49 | 50 | def test_databank(self): 51 | """Test databank functionality""" 52 | # Test vector creation 53 | vector = databank("vector", [5]) 54 | self.assertIsInstance(vector, np.ndarray) 55 | self.assertEqual(vector.shape, (5,)) 56 | 57 | # Test matrix creation 58 | matrix = databank("matrix", [3, 3]) 59 | self.assertIsInstance(matrix, np.ndarray) 60 | self.assertEqual(matrix.shape, (3, 3)) 61 | 62 | # Test array creation 63 | array = databank("array", [2, 2, 2]) 64 | self.assertIsInstance(array, np.ndarray) 65 | self.assertEqual(array.shape, (2, 2, 2)) 66 | 67 | 68 | if __name__ == "__main__": 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /tests/test_advanced.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import pandas as pd 4 | from pattern_causality import pattern_causality, load_data 5 | 6 | 7 | class TestAdvancedFunctionality(unittest.TestCase): 8 | @classmethod 9 | def setUpClass(cls): 10 | """Load data once for all tests""" 11 | cls.data = load_data() 12 | cls.X = cls.data["NAO"].values 13 | cls.Y = cls.data["AAO"].values 14 | cls.pc = pattern_causality(verbose=False) 15 | 16 | def test_pc_matrix(self): 17 | """Test pc_matrix functionality""" 18 | results = self.pc.pc_matrix( 19 | dataset=self.data.drop(columns=["Date"]), 20 | E=3, 21 | tau=1, 22 | metric="euclidean", 23 | h=1, 24 | weighted=True, 25 | ) 26 | self.assertIsInstance(results, pd.DataFrame) 27 | self.assertTrue("from_var" in results.columns) 28 | self.assertTrue("to_var" in results.columns) 29 | self.assertTrue("positive" in results.columns) 30 | self.assertTrue("negative" in results.columns) 31 | self.assertTrue("dark" in results.columns) 32 | 33 | def test_cross_validation(self): 34 | """Test cross-validation functionality""" 35 | cv_results = self.pc.pc_cross_validation( 36 | X=self.X, 37 | Y=self.Y, 38 | E=3, 39 | tau=1, 40 | metric="euclidean", 41 | h=1, 42 | weighted=True, 43 | numberset=[100, 200, 300], 44 | ) 45 | self.assertIsInstance(cv_results, pd.DataFrame) 46 | self.assertEqual(len(cv_results), 3) 47 | self.assertTrue("positive" in cv_results.columns) 48 | self.assertTrue("negative" in cv_results.columns) 49 | self.assertTrue("dark" in cv_results.columns) 50 | 51 | def test_parameter_optimization(self): 52 | """Test parameter optimization""" 53 | result = self.pc.optimal_parameters_search( 54 | Emax=3, 55 | tau_max=2, 56 | metric="euclidean", 57 | h=1, 58 | weighted=False, 59 | dataset=self.data.drop(columns=["Date"]), 60 | ) 61 | self.assertIsInstance(result, pd.DataFrame) 62 | # Check if DataFrame contains necessary columns 63 | expected_columns = [ 64 | "E", 65 | "tau", 66 | "Total", 67 | "of which Positive", 68 | "of which Negative", 69 | "of which Dark", 70 | ] 71 | for col in expected_columns: 72 | self.assertIn(col, result.columns) 73 | # Check data types and ranges 74 | self.assertTrue(all(result["E"] >= 2)) 75 | self.assertTrue(all(result["tau"] >= 1)) 76 | 77 | 78 | if __name__ == "__main__": 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | 11 | - name: Set up Python 12 | uses: actions/setup-python@v4 13 | with: 14 | python-version: "3.8" 15 | 16 | - name: Cache pip 17 | uses: actions/cache@v3 18 | with: 19 | path: ~/.cache/pip 20 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', 'setup.py', 'pyproject.toml') }} 21 | restore-keys: | 22 | ${{ runner.os }}-pip- 23 | 24 | - name: Install system dependencies 25 | run: | 26 | sudo apt-get update 27 | sudo apt-get install -y g++ python3-dev libomp-dev build-essential 28 | 29 | - name: Install Python dependencies 30 | run: | 31 | python -m pip install --upgrade pip setuptools wheel 32 | python -m pip install numpy pandas 33 | python -m pip install black isort flake8 mypy typing-extensions 34 | python -m pip install build 35 | 36 | - name: Set up compiler environment 37 | run: | 38 | python_include=$(python3 -c 'import sysconfig; print(sysconfig.get_path("include"))') 39 | numpy_include=$(python3 -c 'import numpy; print(numpy.get_include())') 40 | echo "CFLAGS=-I${python_include} -I${numpy_include}" >> $GITHUB_ENV 41 | echo "CXXFLAGS=-I${python_include} -I${numpy_include} -fopenmp -std=c++11 -O3 -Wall -fPIC" >> $GITHUB_ENV 42 | echo "LDFLAGS=-fopenmp" >> $GITHUB_ENV 43 | echo "NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION" >> $GITHUB_ENV 44 | echo "CC=gcc" >> $GITHUB_ENV 45 | echo "CXX=g++" >> $GITHUB_ENV 46 | 47 | - name: Create directories 48 | run: | 49 | mkdir -p pattern_causality/utils 50 | mkdir -p pattern_causality/cpp 51 | mkdir -p utils 52 | touch pattern_causality/__init__.py 53 | touch pattern_causality/utils/__init__.py 54 | touch pattern_causality/cpp/__init__.py 55 | touch utils/__init__.py 56 | 57 | - name: Build package 58 | run: | 59 | python -m pip install -v -e . 60 | 61 | - name: Format with Black 62 | run: | 63 | black . --check --diff 64 | continue-on-error: true 65 | 66 | - name: Check imports with isort 67 | run: | 68 | isort . --check-only --diff 69 | continue-on-error: true 70 | 71 | - name: Lint with flake8 72 | run: | 73 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 74 | flake8 . --count --exit-zero --max-complexity=10 --statistics 75 | continue-on-error: true 76 | 77 | - name: Type check with mypy 78 | run: | 79 | mypy pattern_causality --ignore-missing-imports 80 | continue-on-error: true 81 | -------------------------------------------------------------------------------- /pattern_causality/datasets.py: -------------------------------------------------------------------------------- 1 | """Pattern Causality Datasets Module. 2 | 3 | This module provides access to built-in datasets for pattern causality analysis. 4 | The datasets included are: 5 | - Climate_Indices: A dataset containing climate oscillation indices for pattern causality analysis 6 | """ 7 | 8 | from typing import Dict 9 | import pandas as pd 10 | import os 11 | 12 | 13 | def load_data() -> pd.DataFrame: 14 | """Load the Climate Indices dataset included with the package. 15 | 16 | This function loads the built-in Climate_Indices.csv dataset, which contains 17 | climate oscillation indices data suitable for pattern causality analysis. 18 | 19 | Returns: 20 | pd.DataFrame: A DataFrame containing the climate indices data with the following columns: 21 | - Date: The date of the observation (YYYY-MM-DD) 22 | - AO: Arctic Oscillation index 23 | - AAO: Antarctic Oscillation index 24 | - NAO: North Atlantic Oscillation index 25 | - PNA: Pacific North American index 26 | 27 | Example: 28 | >>> from pattern_causality import load_data 29 | >>> data = load_data() 30 | >>> print(data.shape) 31 | (535, 5) 32 | """ 33 | data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "Climate_Indices.csv") 34 | return pd.read_csv(data_path) 35 | 36 | 37 | def get_dataset_info() -> Dict[str, str]: 38 | """Get information about the built-in Climate Indices dataset. 39 | 40 | Returns: 41 | Dict[str, str]: A dictionary containing dataset information with keys: 42 | - description: General description of the dataset 43 | - source: Source of the data 44 | - citation: Citation information 45 | - variables: Description of the variables 46 | 47 | Example: 48 | >>> from pattern_causality import get_dataset_info 49 | >>> info = get_dataset_info() 50 | >>> print(info['description']) 51 | """ 52 | return { 53 | "description": "Climate Oscillation Indices dataset for pattern causality analysis", 54 | "source": "NOAA Climate Prediction Center", 55 | "citation": ( 56 | "Please cite the Pattern Causality package and the NOAA Climate " 57 | "Prediction Center when using this dataset." 58 | ), 59 | "variables": ( 60 | "AO: Arctic Oscillation index - A climate pattern characterized by winds circulating " 61 | "counterclockwise around the Arctic.\n" 62 | "AAO: Antarctic Oscillation index - Also known as the Southern Annular Mode (SAM), " 63 | "describing the north-south movement of the westerly wind belt around Antarctica.\n" 64 | "NAO: North Atlantic Oscillation index - The atmospheric pressure difference between " 65 | "the Azores and Iceland.\n" 66 | "PNA: Pacific North American index - A climate pattern reflecting large-scale changes " 67 | "in atmospheric wave patterns over North America." 68 | ) 69 | } 70 | -------------------------------------------------------------------------------- /pattern_causality/cpp/fcp.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | 6 | static PyObject* fcp(PyObject* self, PyObject* args) { 7 | int E, tau, h; 8 | PyObject* X; 9 | 10 | // Parse input arguments 11 | if (!PyArg_ParseTuple(args, "iiiO", &E, &tau, &h, &X)) { 12 | return NULL; 13 | } 14 | 15 | // Validate input types 16 | if (!PyList_Check(X) && !PyArray_Check(X)) { 17 | PyErr_SetString(PyExc_TypeError, "X must be a list or numpy array"); 18 | return NULL; 19 | } 20 | 21 | // Validate input values 22 | if (E < 2) { 23 | PyErr_SetString(PyExc_ValueError, "E must be >= 2"); 24 | return NULL; 25 | } 26 | if (tau < 1) { 27 | PyErr_SetString(PyExc_ValueError, "tau must be >= 1"); 28 | return NULL; 29 | } 30 | if (h < 0) { 31 | PyErr_SetString(PyExc_ValueError, "h must be >= 0"); 32 | return NULL; 33 | } 34 | 35 | // Get length of input 36 | Py_ssize_t X_len; 37 | if (PyList_Check(X)) { 38 | X_len = PyList_Size(X); 39 | } else { 40 | PyArrayObject* arr = (PyArrayObject*)X; 41 | X_len = PyArray_SIZE(arr); 42 | } 43 | 44 | if (X_len < 1) { 45 | PyErr_SetString(PyExc_ValueError, "Input X cannot be empty"); 46 | return NULL; 47 | } 48 | 49 | // Calculate constants with overflow checking 50 | if (E > (INT_MAX - 1) || tau > INT_MAX / (E - 1)) { 51 | PyErr_SetString(PyExc_OverflowError, "Parameters too large"); 52 | return NULL; 53 | } 54 | 55 | int NNSPAN = E + 1; // Former NN | Reserves a minimum number of nearest neighbors 56 | int CCSPAN = (E - 1) * tau; // This will remove the common coordinate NNs 57 | int PredSPAN = h; 58 | 59 | // Check for integer overflow in final calculation 60 | if (NNSPAN > INT_MAX - CCSPAN || 61 | NNSPAN + CCSPAN > INT_MAX - PredSPAN || 62 | NNSPAN + CCSPAN + PredSPAN > INT_MAX - 1) { 63 | PyErr_SetString(PyExc_OverflowError, "Integer overflow in FCP calculation"); 64 | return NULL; 65 | } 66 | 67 | int FCP = 1 + NNSPAN + CCSPAN + PredSPAN; 68 | 69 | // Validate sufficient data points 70 | if (NNSPAN + CCSPAN + PredSPAN >= X_len - CCSPAN) { 71 | PyErr_SetString(PyExc_ValueError, 72 | "The First Point to consider for Causality does not have sufficient " 73 | "Nearest Neighbors. Please Check parameters: " 74 | "E, lag, p as well as the length of X and Y"); 75 | return NULL; 76 | } 77 | 78 | return PyLong_FromLong((long)FCP); 79 | } 80 | 81 | static PyMethodDef FcpMethods[] = { 82 | {"fcp", fcp, METH_VARARGS, "Calculate first causality point"}, 83 | {NULL, NULL, 0, NULL} 84 | }; 85 | 86 | static struct PyModuleDef fcpmodule = { 87 | PyModuleDef_HEAD_INIT, 88 | "utils.fcp", // Changed back to "utils.fcp" 89 | "First causality point calculation module", 90 | -1, 91 | FcpMethods 92 | }; 93 | 94 | PyMODINIT_FUNC PyInit_fcp(void) { 95 | import_array(); // Initialize NumPy 96 | 97 | PyObject* m = PyModule_Create(&fcpmodule); 98 | if (m == NULL) { 99 | return NULL; 100 | } 101 | 102 | return m; 103 | } 104 | -------------------------------------------------------------------------------- /pattern_causality/cpp/patternhashing.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Helper function for factorial calculation 10 | static double factorial(int n) { 11 | if (n <= 1) return 1.0; 12 | double result = 1.0; 13 | for (int i = 2; i <= n; ++i) { 14 | result *= i; 15 | } 16 | return result; 17 | } 18 | 19 | // Helper function to generate possible patterns 20 | static std::vector> possiblePatterns(int E) { 21 | if (E <= 1) { 22 | return std::vector>(); 23 | } 24 | 25 | // Calculate total number of combinations 26 | const int numPatterns = pow(3, E-1); 27 | std::vector> patterns(numPatterns); 28 | 29 | // Generate patterns using R's expand.grid logic 30 | for (int i = 0; i < numPatterns; ++i) { 31 | std::vector pattern(E-1); 32 | int temp = i; 33 | 34 | // Fill pattern from right to left (least significant to most significant) 35 | for (int j = E-2; j >= 0; --j) { 36 | pattern[j] = (temp % 3) + 1; // Convert to 1, 2, 3 37 | temp /= 3; 38 | } 39 | 40 | patterns[i] = pattern; 41 | } 42 | 43 | return patterns; 44 | } 45 | 46 | // Helper function for hashing - must match R implementation exactly 47 | static double hashing(const std::vector& vec) { 48 | double hash = 0.0; 49 | for (size_t i = 0; i < vec.size(); i++) { 50 | hash += static_cast(vec[i]) * factorial(i + 2); 51 | } 52 | return hash; 53 | } 54 | 55 | // Main function: patternHashing 56 | static PyObject* patternHashing(PyObject* self, PyObject* args) { 57 | int E; 58 | if (!PyArg_ParseTuple(args, "i", &E)) { 59 | return NULL; 60 | } 61 | 62 | std::vector> patterns = possiblePatterns(E); 63 | 64 | // Handle E <= 1 case 65 | if (patterns.empty()) { 66 | npy_intp dims[] = {0}; 67 | return (PyObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 68 | } 69 | 70 | // Calculate hash values 71 | std::vector hash_values; 72 | hash_values.reserve(patterns.size()); 73 | 74 | for (const auto& pattern : patterns) { 75 | hash_values.push_back(hashing(pattern)); 76 | } 77 | 78 | // Create numpy array for results 79 | npy_intp dims[] = {static_cast(hash_values.size())}; 80 | PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 81 | if (!result_array) { 82 | return NULL; 83 | } 84 | 85 | // Copy hash values to output array 86 | double* data = (double*)PyArray_DATA(result_array); 87 | std::copy(hash_values.begin(), hash_values.end(), data); 88 | 89 | return (PyObject*)result_array; 90 | } 91 | 92 | static PyMethodDef PatternHashingMethods[] = { 93 | {"patternhashing", patternHashing, METH_VARARGS, "Calculate pattern hashing"}, 94 | {NULL, NULL, 0, NULL} 95 | }; 96 | 97 | static struct PyModuleDef patternhashing_module = { 98 | PyModuleDef_HEAD_INIT, 99 | "patternhashing", 100 | NULL, 101 | -1, 102 | PatternHashingMethods 103 | }; 104 | 105 | PyMODINIT_FUNC PyInit_patternhashing(void) { 106 | import_array(); 107 | return PyModule_Create(&patternhashing_module); 108 | } -------------------------------------------------------------------------------- /pattern_causality/cpp/patternspace.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Helper function for factorial calculation - must match R implementation exactly 10 | static double factorial(int n) { 11 | if (n <= 1) return 1.0; 12 | double result = 1.0; 13 | for (int i = 2; i <= n; ++i) { 14 | result *= i; 15 | } 16 | return result; 17 | } 18 | 19 | // Helper function for hashing - must match R implementation exactly 20 | static double hashing(const std::vector& vec) { 21 | double hash = 0.0; 22 | for (size_t i = 0; i < vec.size(); i++) { 23 | hash += static_cast(vec[i]) * factorial(i + 2); 24 | } 25 | return hash; 26 | } 27 | 28 | // Pre-allocated vectors to avoid repeated allocation 29 | thread_local std::vector result_buffer; 30 | thread_local std::vector p_vec_buffer; 31 | 32 | static double pattern_vector_difference(const std::vector& sVec) { 33 | // Quick check for NaN 34 | for (const auto& val : sVec) { 35 | if (std::isnan(val)) { 36 | return std::numeric_limits::quiet_NaN(); 37 | } 38 | } 39 | 40 | // Reuse pre-allocated vector 41 | if (p_vec_buffer.capacity() < sVec.size()) { 42 | p_vec_buffer.reserve(sVec.size()); 43 | } 44 | p_vec_buffer.clear(); 45 | 46 | const double eps = std::numeric_limits::epsilon(); 47 | 48 | // Pattern calculation - must match R implementation exactly 49 | for (const auto& val : sVec) { 50 | if (std::abs(val) < eps) { 51 | p_vec_buffer.push_back(2); // zero 52 | } else if (val > 0) { 53 | p_vec_buffer.push_back(3); // positive 54 | } else { 55 | p_vec_buffer.push_back(1); // negative 56 | } 57 | } 58 | 59 | return hashing(p_vec_buffer); 60 | } 61 | 62 | static PyObject* patternspace(PyObject* self, PyObject* args) { 63 | PyObject* sm_obj; 64 | int E; 65 | 66 | if (!PyArg_ParseTuple(args, "Oi", &sm_obj, &E)) { 67 | return NULL; 68 | } 69 | 70 | PyArrayObject* sm_array = (PyArrayObject*)PyArray_FROM_OTF(sm_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 71 | if (!sm_array || PyArray_NDIM(sm_array) != 2) { 72 | Py_XDECREF(sm_array); 73 | PyErr_SetString(PyExc_ValueError, "Input must be a 2D array"); 74 | return NULL; 75 | } 76 | 77 | npy_intp* dims = PyArray_DIMS(sm_array); 78 | npy_intp num_rows = dims[0]; 79 | npy_intp num_cols = dims[1]; 80 | 81 | npy_intp out_dims[2] = {num_rows, 1}; 82 | PyObject* result = PyArray_SimpleNew(2, out_dims, NPY_DOUBLE); 83 | if (!result) { 84 | Py_DECREF(sm_array); 85 | return NULL; 86 | } 87 | 88 | double* sm_data = (double*)PyArray_DATA(sm_array); 89 | double* result_data = (double*)PyArray_DATA((PyArrayObject*)result); 90 | 91 | // Pre-allocate vector for row data 92 | std::vector row_buffer(num_cols); 93 | 94 | // Process each row sequentially to ensure consistent results 95 | for (npy_intp i = 0; i < num_rows; i++) { 96 | // Copy row data 97 | std::copy(sm_data + i * num_cols, sm_data + (i + 1) * num_cols, row_buffer.begin()); 98 | result_data[i] = pattern_vector_difference(row_buffer); 99 | } 100 | 101 | Py_DECREF(sm_array); 102 | return result; 103 | } 104 | 105 | static PyMethodDef PatternSpaceMethods[] = { 106 | {"patternspace", patternspace, METH_VARARGS, "Calculate pattern space matrix from signature matrix"}, 107 | {NULL, NULL, 0, NULL} 108 | }; 109 | 110 | static struct PyModuleDef patternspacemodule = { 111 | PyModuleDef_HEAD_INIT, 112 | "patternspace", 113 | "Pattern space calculation module", 114 | -1, 115 | PatternSpaceMethods 116 | }; 117 | 118 | PyMODINIT_FUNC PyInit_patternspace(void) { 119 | import_array(); 120 | return PyModule_Create(&patternspacemodule); 121 | } 122 | -------------------------------------------------------------------------------- /pattern_causality/cpp/distancematrix.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // Helper functions for different distance metrics 8 | static inline double euclideanDistance(const double* vec1, const double* vec2, size_t size) { 9 | double sum = 0.0; 10 | for (size_t i = 0; i < size; i++) { 11 | double diff = vec1[i] - vec2[i]; 12 | sum += diff * diff; 13 | } 14 | return sqrt(sum); 15 | } 16 | 17 | static inline double manhattanDistance(const double* vec1, const double* vec2, size_t size) { 18 | double sum = 0.0; 19 | for (size_t i = 0; i < size; i++) { 20 | sum += fabs(vec1[i] - vec2[i]); 21 | } 22 | return sum; 23 | } 24 | 25 | static inline double minkowskiDistance(const double* vec1, const double* vec2, size_t size, int n) { 26 | double sum = 0.0; 27 | for (size_t i = 0; i < size; i++) { 28 | sum += pow(fabs(vec1[i] - vec2[i]), n); 29 | } 30 | return pow(sum, 1.0/n); 31 | } 32 | 33 | static inline double calculateDistance(const double* vec1, const double* vec2, size_t size, 34 | const std::string& metric, int n = 2) { 35 | if (metric == "euclidean") { 36 | return euclideanDistance(vec1, vec2, size); 37 | } else if (metric == "manhattan") { 38 | return manhattanDistance(vec1, vec2, size); 39 | } else if (metric == "minkowski") { 40 | return minkowskiDistance(vec1, vec2, size, n); 41 | } 42 | return euclideanDistance(vec1, vec2, size); 43 | } 44 | 45 | static PyObject* distanceMatrix(PyObject* self, PyObject* args, PyObject* kwargs) { 46 | PyObject* matrix_obj; 47 | const char* metric_str = "euclidean"; 48 | int n = 2; 49 | 50 | static char* kwlist[] = {"matrix", "metric", "n", NULL}; 51 | 52 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|si", kwlist, 53 | &matrix_obj, &metric_str, &n)) { 54 | return NULL; 55 | } 56 | 57 | PyArrayObject* matrix_array = (PyArrayObject*)PyArray_FROM_OTF(matrix_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 58 | if (matrix_array == NULL) { 59 | PyErr_SetString(PyExc_TypeError, "Could not convert input to numpy array"); 60 | return NULL; 61 | } 62 | 63 | if (PyArray_NDIM(matrix_array) != 2) { 64 | Py_DECREF(matrix_array); 65 | PyErr_SetString(PyExc_ValueError, "Input must be a 2D array"); 66 | return NULL; 67 | } 68 | 69 | npy_intp num_rows = PyArray_DIM(matrix_array, 0); 70 | npy_intp vec_size = PyArray_DIM(matrix_array, 1); 71 | 72 | npy_intp dims[2] = {num_rows, num_rows}; 73 | PyObject* result_matrix = PyArray_SimpleNew(2, dims, NPY_DOUBLE); 74 | if (result_matrix == NULL) { 75 | Py_DECREF(matrix_array); 76 | return NULL; 77 | } 78 | 79 | double* matrix_data = (double*)PyArray_DATA(matrix_array); 80 | double* result_data = (double*)PyArray_DATA((PyArrayObject*)result_matrix); 81 | 82 | std::string metric(metric_str); 83 | 84 | for (npy_intp i = 0; i < num_rows; i++) { 85 | const double* vec1 = matrix_data + i * vec_size; 86 | result_data[i * num_rows + i] = 0.0; 87 | 88 | for (npy_intp j = i + 1; j < num_rows; j++) { 89 | const double* vec2 = matrix_data + j * vec_size; 90 | double dist = calculateDistance(vec1, vec2, vec_size, metric, n); 91 | 92 | result_data[i * num_rows + j] = dist; 93 | result_data[j * num_rows + i] = dist; 94 | } 95 | } 96 | 97 | Py_DECREF(matrix_array); 98 | 99 | return result_matrix; 100 | } 101 | 102 | static PyMethodDef DistanceMatrixMethods[] = { 103 | {"distancematrix", (PyCFunction)distanceMatrix, METH_VARARGS | METH_KEYWORDS, 104 | "Calculate distance matrix for a set of vectors"}, 105 | {NULL, NULL, 0, NULL} 106 | }; 107 | 108 | static struct PyModuleDef distancematrixmodule = { 109 | PyModuleDef_HEAD_INIT, 110 | "distancematrix", 111 | "Distance calculation module", 112 | -1, 113 | DistanceMatrixMethods 114 | }; 115 | 116 | PyMODINIT_FUNC PyInit_distancematrix(void) { 117 | import_array(); 118 | return PyModule_Create(&distancematrixmodule); 119 | } -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.8", "3.9", "3.10"] 15 | fail-fast: false 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | with: 20 | fetch-depth: 0 21 | 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v4 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | 27 | - name: Cache pip 28 | uses: actions/cache@v3 29 | with: 30 | path: ~/.cache/pip 31 | key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', 'setup.py', 'pyproject.toml') }} 32 | restore-keys: | 33 | ${{ runner.os }}-pip- 34 | 35 | - name: Install system dependencies 36 | run: | 37 | sudo apt-get update 38 | sudo apt-get install -y g++ python3-dev libomp-dev build-essential 39 | # Print system information 40 | echo "System information:" 41 | uname -a 42 | g++ --version 43 | python3 --version 44 | 45 | - name: Install build dependencies 46 | run: | 47 | python -m pip install --upgrade pip setuptools wheel 48 | python -m pip install numpy pandas 49 | python -m pip install pytest pytest-cov 50 | # Print installed packages 51 | echo "Installed Python packages:" 52 | pip list 53 | 54 | - name: Set up compiler environment 55 | run: | 56 | # Get Python and NumPy include paths 57 | PYTHON_INCLUDE=$(python3 -c 'import sysconfig; print(sysconfig.get_path("include"))') 58 | NUMPY_INCLUDE=$(python3 -c 'import numpy; print(numpy.get_include())') 59 | 60 | # Set up environment variables 61 | echo "CFLAGS=-I${PYTHON_INCLUDE} -I${NUMPY_INCLUDE} -O3" >> $GITHUB_ENV 62 | echo "CXXFLAGS=-I${PYTHON_INCLUDE} -I${NUMPY_INCLUDE} -std=c++11 -fopenmp -O3 -Wall -fPIC" >> $GITHUB_ENV 63 | echo "LDFLAGS=-fopenmp" >> $GITHUB_ENV 64 | echo "NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION" >> $GITHUB_ENV 65 | echo "CC=gcc" >> $GITHUB_ENV 66 | echo "CXX=g++" >> $GITHUB_ENV 67 | 68 | # Print environment for debugging 69 | echo "Python include path: ${PYTHON_INCLUDE}" 70 | echo "NumPy include path: ${NUMPY_INCLUDE}" 71 | echo "Checking if include directories exist:" 72 | ls -la ${PYTHON_INCLUDE} || echo "Python include directory not found" 73 | ls -la ${NUMPY_INCLUDE} || echo "NumPy include directory not found" 74 | 75 | - name: Create package structure 76 | run: | 77 | mkdir -p utils pattern_causality/utils pattern_causality/cpp 78 | 79 | cat > utils/__init__.py << 'EOL' 80 | from .statespace import statespace 81 | from .patternhashing import patternhashing 82 | from .signaturespace import signaturespace 83 | from .distancematrix import distancematrix 84 | from .patternspace import patternspace 85 | from .pastNNs import pastNNs 86 | from .projectedNNs import projectedNNs 87 | from .predictionY import predictionY 88 | from .fillPCMatrix import fillPCMatrix 89 | from .natureOfCausality import natureOfCausality 90 | from .databank import databank 91 | from .fcp import fcp 92 | 93 | __all__ = [ 94 | 'statespace', 'patternhashing', 'signaturespace', 'distancematrix', 95 | 'patternspace', 'pastNNs', 'projectedNNs', 'predictionY', 96 | 'fillPCMatrix', 'natureOfCausality', 'databank', 'fcp' 97 | ] 98 | EOL 99 | 100 | touch pattern_causality/utils/__init__.py 101 | touch pattern_causality/cpp/__init__.py 102 | 103 | echo "Package structure created:" 104 | find . -type d 105 | 106 | - name: Install package 107 | run: | 108 | echo "Building package in verbose mode..." 109 | python -m pip install -v -e . 110 | 111 | - name: List directory structure and environment 112 | run: | 113 | echo "Current directory structure:" 114 | find . -type f -name "*.py" -o -name "*.cpp" 115 | echo "Environment variables:" 116 | env | grep -E "CFLAGS|CXXFLAGS|LDFLAGS|NPY|CC|CXX" 117 | echo "Python and package information:" 118 | python --version 119 | pip list 120 | echo "C++ source files:" 121 | ls -la pattern_causality/cpp/ 122 | 123 | - name: Run tests 124 | run: | 125 | echo "Running tests with coverage..." 126 | python -m pytest tests/ --cov=pattern_causality -v 127 | 128 | - name: Upload coverage reports 129 | if: success() 130 | uses: codecov/codecov-action@v3 131 | with: 132 | fail_ci_if_error: false 133 | verbose: true -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Extension 2 | import os 3 | import platform 4 | import sys 5 | import numpy as np 6 | import sysconfig 7 | 8 | def get_compiler_args(): 9 | """Get platform-specific compiler arguments.""" 10 | system = platform.system().lower() 11 | 12 | if system == "darwin": # macOS 13 | return { 14 | "extra_compile_args": [ 15 | "-O3", 16 | "-fPIC", 17 | "-std=c++11", 18 | "-stdlib=libc++", 19 | "-mmacosx-version-min=10.9", 20 | "-Wno-unused-function", 21 | "-Wno-unused-variable", 22 | "-Wno-deprecated-declarations", 23 | "-Wno-c++11-narrowing", 24 | "-v", 25 | ], 26 | "extra_link_args": [ 27 | "-stdlib=libc++", 28 | "-mmacosx-version-min=10.9", 29 | "-v", 30 | ] 31 | } 32 | elif system == "linux": 33 | return { 34 | "extra_compile_args": [ 35 | "-O3", 36 | "-fPIC", 37 | "-std=c++11", 38 | "-v", 39 | ], 40 | "extra_link_args": ["-v"] 41 | } 42 | elif system == "windows": 43 | return { 44 | "extra_compile_args": ["/O2", "/W3", "/EHsc", "/std:c++11", "/verbose"], 45 | "extra_link_args": ["/verbose"] 46 | } 47 | else: 48 | return { 49 | "extra_compile_args": ["-O3", "-fPIC", "-std=c++11", "-v"], 50 | "extra_link_args": ["-v"] 51 | } 52 | 53 | def get_include_dirs(): 54 | """Get include directories for compilation.""" 55 | cpp_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pattern_causality", "cpp") 56 | include_dirs = [ 57 | np.get_include(), 58 | cpp_dir, 59 | sysconfig.get_path('include'), 60 | ] 61 | 62 | # Add platform-specific include directories 63 | if platform.system() == "Darwin": 64 | mac_dirs = [ 65 | "/usr/local/include", 66 | "/usr/include", 67 | "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include", 68 | ] 69 | include_dirs.extend(d for d in mac_dirs if os.path.exists(d)) 70 | 71 | return include_dirs 72 | 73 | def get_extensions(): 74 | """Get the list of C++ extensions to be built.""" 75 | cpp_dir = os.path.join("pattern_causality", "cpp") 76 | include_dirs = get_include_dirs() 77 | 78 | compiler_args = get_compiler_args() 79 | 80 | extensions = [] 81 | cpp_files = [ 82 | "statespace", 83 | "patternhashing", 84 | "signaturespace", 85 | "distancematrix", 86 | "patternspace", 87 | "pastNNs", 88 | "projectedNNs", 89 | "predictionY", 90 | "fillPCMatrix", 91 | "natureOfCausality", 92 | "databank", 93 | "fcp" 94 | ] 95 | 96 | for cpp_file in cpp_files: 97 | ext = Extension( 98 | f"utils.{cpp_file}", 99 | sources=[f"pattern_causality/cpp/{cpp_file}.cpp"], 100 | language="c++", 101 | include_dirs=include_dirs, 102 | extra_compile_args=compiler_args["extra_compile_args"], 103 | extra_link_args=compiler_args["extra_link_args"] 104 | ) 105 | extensions.append(ext) 106 | 107 | return extensions 108 | 109 | # Print build environment information 110 | print("\nBuild Environment:") 111 | print(f"Platform: {platform.system()} {platform.machine()}") 112 | print(f"Python: {sys.version}") 113 | print(f"NumPy: {np.__version__}") 114 | print(f"Compiler: {sysconfig.get_config_var('CC')}") 115 | 116 | # Read README 117 | with open("README.md", encoding="utf-8") as f: 118 | long_description = f.read() 119 | 120 | setup( 121 | name="pattern-causality", 122 | version="1.0.3", 123 | description="Pattern Causality Algorithm in Python", 124 | long_description=long_description, 125 | long_description_content_type="text/markdown", 126 | author="Stavros Stavroglou, Athanasios Pantelous, Hui Wang", 127 | author_email="huiw1128@gmail.com", 128 | url="https://github.com/skstavroglou/pattern_causality_py", 129 | packages=find_packages(), 130 | package_dir={"": "."}, 131 | package_data={ 132 | 'pattern_causality': [ 133 | 'cpp/*.cpp', 134 | 'cpp/*.h', 135 | 'cpp/*.hpp', 136 | 'cpp/*.so', 137 | 'cpp/*.dylib', 138 | 'data/*.csv' 139 | ], 140 | }, 141 | ext_modules=get_extensions(), 142 | python_requires=">=3.8", 143 | install_requires=[ 144 | "numpy>=1.19.0", 145 | "pandas>=1.0.0", 146 | ], 147 | include_package_data=True, 148 | zip_safe=False, 149 | classifiers=[ 150 | "Development Status :: 5 - Production/Stable", 151 | "Intended Audience :: Science/Research", 152 | "License :: OSI Approved :: BSD License", 153 | "Operating System :: OS Independent", 154 | "Programming Language :: Python :: 3", 155 | "Programming Language :: Python :: 3.8", 156 | "Programming Language :: Python :: 3.9", 157 | "Programming Language :: Python :: 3.10", 158 | "Programming Language :: Python :: 3.11", 159 | "Programming Language :: C++", 160 | "Topic :: Scientific/Engineering", 161 | ], 162 | ) 163 | -------------------------------------------------------------------------------- /pattern_causality/cpp/databank.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | static PyObject* databank(PyObject* self, PyObject* args) { 9 | const char* type_name; 10 | PyObject* dimensions_obj; 11 | 12 | // Parse arguments 13 | if (!PyArg_ParseTuple(args, "sO", &type_name, &dimensions_obj)) { 14 | return NULL; 15 | } 16 | 17 | // Convert dimensions to vector 18 | std::vector dimensions; 19 | if (PyList_Check(dimensions_obj) || PyTuple_Check(dimensions_obj)) { 20 | Py_ssize_t size = PySequence_Size(dimensions_obj); 21 | dimensions.reserve(size); 22 | for (Py_ssize_t i = 0; i < size; i++) { 23 | PyObject* item = PySequence_GetItem(dimensions_obj, i); 24 | dimensions.push_back(PyLong_AsLong(item)); 25 | Py_DECREF(item); 26 | } 27 | } else { 28 | PyErr_SetString(PyExc_TypeError, "dimensions must be a list or tuple"); 29 | return NULL; 30 | } 31 | 32 | std::string type(type_name); 33 | 34 | if (type == "array") { 35 | npy_intp* dims = dimensions.data(); 36 | PyObject* arr = PyArray_EMPTY(dimensions.size(), dims, NPY_DOUBLE, 0); 37 | double* data = (double*)PyArray_DATA((PyArrayObject*)arr); 38 | for (npy_intp i = 0; i < PyArray_SIZE((PyArrayObject*)arr); i++) { 39 | data[i] = std::numeric_limits::quiet_NaN(); 40 | } 41 | return arr; 42 | } 43 | else if (type == "vector") { 44 | npy_intp dims[1] = {dimensions[0]}; 45 | PyObject* arr = PyArray_EMPTY(1, dims, NPY_DOUBLE, 0); 46 | double* data = (double*)PyArray_DATA((PyArrayObject*)arr); 47 | for (npy_intp i = 0; i < dimensions[0]; i++) { 48 | data[i] = std::numeric_limits::quiet_NaN(); 49 | } 50 | return arr; 51 | } 52 | else if (type == "matrix") { 53 | npy_intp dims[2] = {dimensions[0], dimensions[1]}; 54 | PyObject* arr = PyArray_EMPTY(2, dims, NPY_DOUBLE, 0); 55 | double* data = (double*)PyArray_DATA((PyArrayObject*)arr); 56 | for (npy_intp i = 0; i < dimensions[0] * dimensions[1]; i++) { 57 | data[i] = std::numeric_limits::quiet_NaN(); 58 | } 59 | return arr; 60 | } 61 | else if (type == "neighborhood memories") { 62 | // Validate dimensions 63 | npy_intp expected_cols = 1 + 4 * dimensions[2] + (dimensions[3] - 1) * dimensions[2] + dimensions[3] * dimensions[2]; 64 | if (dimensions[1] != expected_cols) { 65 | PyErr_SetString(PyExc_ValueError, "The dimensions[1] is wrong!"); 66 | return NULL; 67 | } 68 | 69 | // Create empty DataFrame equivalent (numpy array) 70 | npy_intp dims[2] = {dimensions[0], dimensions[1]}; 71 | PyObject* arr = PyArray_EMPTY(2, dims, NPY_DOUBLE, 0); 72 | if (!arr) return NULL; 73 | 74 | // Fill with NaN 75 | double* data = (double*)PyArray_DATA((PyArrayObject*)arr); 76 | for (npy_intp i = 0; i < dimensions[0] * dimensions[1]; i++) { 77 | data[i] = std::numeric_limits::quiet_NaN(); 78 | } 79 | 80 | // Create list for column names 81 | PyObject* col_names = PyList_New(dimensions[1]); 82 | if (!col_names) { 83 | Py_DECREF(arr); 84 | return NULL; 85 | } 86 | 87 | // Add column names 88 | int col_idx = 0; 89 | 90 | // "i" column 91 | PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString("i")); 92 | 93 | // nn-times, nn-dists, nn-weights, nn-patt 94 | for (int j = 0; j < 4; j++) { 95 | const char* prefix; 96 | switch(j) { 97 | case 0: prefix = "nn-times"; break; 98 | case 1: prefix = "nn-dists"; break; 99 | case 2: prefix = "nn-weights"; break; 100 | case 3: prefix = "nn-patt"; break; 101 | } 102 | for (npy_intp k = 0; k < dimensions[2]; k++) { 103 | PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(prefix)); 104 | } 105 | } 106 | 107 | // Signature component columns 108 | for (npy_intp nn = 1; nn <= dimensions[2]; nn++) { 109 | for (npy_intp comp = 1; comp < dimensions[3]; comp++) { 110 | char buf[100]; 111 | snprintf(buf, sizeof(buf), "Sig-Comp.%ld of NN%ld", (long)comp, (long)nn); 112 | PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(buf)); 113 | } 114 | } 115 | 116 | // Coordinate columns 117 | for (npy_intp nn = 1; nn <= dimensions[2]; nn++) { 118 | for (npy_intp coord = 1; coord <= dimensions[3]; coord++) { 119 | char buf[100]; 120 | snprintf(buf, sizeof(buf), "Coord.%ld of NN%ld", (long)coord, (long)nn); 121 | PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(buf)); 122 | } 123 | } 124 | 125 | // Import pandas 126 | PyObject* pandas = PyImport_ImportModule("pandas"); 127 | if (!pandas) { 128 | Py_DECREF(arr); 129 | Py_DECREF(col_names); 130 | return NULL; 131 | } 132 | 133 | // Create DataFrame 134 | PyObject* df_class = PyObject_GetAttrString(pandas, "DataFrame"); 135 | PyObject* df = PyObject_CallFunction(df_class, "OO", arr, col_names); 136 | 137 | Py_DECREF(pandas); 138 | Py_DECREF(df_class); 139 | Py_DECREF(arr); 140 | Py_DECREF(col_names); 141 | 142 | return df; 143 | } 144 | 145 | Py_RETURN_NONE; 146 | } 147 | 148 | static PyMethodDef DatabankMethods[] = { 149 | {"databank", databank, METH_VARARGS, "Create data structures based on type and dimensions"}, 150 | {NULL, NULL, 0, NULL} 151 | }; 152 | 153 | static struct PyModuleDef databankmodule = { 154 | PyModuleDef_HEAD_INIT, 155 | "databank", 156 | NULL, 157 | -1, 158 | DatabankMethods 159 | }; 160 | 161 | PyMODINIT_FUNC PyInit_databank(void) { 162 | import_array(); 163 | return PyModule_Create(&databankmodule); 164 | } 165 | -------------------------------------------------------------------------------- /pattern_causality/cpp/signaturespace.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // Optimized inline difference calculation 9 | static inline void calculate_differences(const double* input, double* output, npy_intp length) { 10 | for (npy_intp i = 0; i < length - 1; i++) { 11 | if (std::isnan(input[i]) || std::isnan(input[i + 1])) { 12 | output[i] = std::numeric_limits::quiet_NaN(); 13 | } else { 14 | output[i] = input[i + 1] - input[i]; 15 | } 16 | } 17 | } 18 | 19 | static PyObject* signatureVectorDifference(PyObject* self, PyObject* args) { 20 | PyObject* input_array; 21 | if (!PyArg_ParseTuple(args, "O", &input_array)) { 22 | return NULL; 23 | } 24 | 25 | // Convert to numpy array 26 | PyArrayObject* array = (PyArrayObject*)PyArray_FROM_OTF( 27 | input_array, 28 | NPY_DOUBLE, 29 | NPY_ARRAY_IN_ARRAY 30 | ); 31 | if (!array) { 32 | return NULL; 33 | } 34 | 35 | // Check dimensions 36 | if (PyArray_NDIM(array) != 1) { 37 | Py_DECREF(array); 38 | PyErr_SetString(PyExc_ValueError, "Input must be a 1D array"); 39 | return NULL; 40 | } 41 | 42 | const npy_intp length = PyArray_DIM(array, 0); 43 | const npy_intp output_length = length - 1; 44 | 45 | // Create output array 46 | npy_intp dims[1] = {output_length}; 47 | PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 48 | if (!result_array) { 49 | Py_DECREF(array); 50 | return NULL; 51 | } 52 | 53 | // Get data pointers 54 | double* input_data = (double*)PyArray_DATA(array); 55 | double* output_data = (double*)PyArray_DATA(result_array); 56 | 57 | // Calculate differences 58 | calculate_differences(input_data, output_data, length); 59 | 60 | Py_DECREF(array); 61 | return (PyObject*)result_array; 62 | } 63 | 64 | static PyObject* signaturespace(PyObject* self, PyObject* args, PyObject* kwargs) { 65 | PyObject* input_matrix; 66 | int E; 67 | int relative = 1; // Default to relative (1 for true) 68 | 69 | static char* kwlist[] = {"input_matrix", "E", "relative", NULL}; 70 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|p", kwlist, 71 | &input_matrix, &E, &relative)) { 72 | return NULL; 73 | } 74 | 75 | // Validate parameters 76 | if (E < 2) { 77 | PyErr_SetString(PyExc_ValueError, "State space matrix must have at least 2 columns"); 78 | return NULL; 79 | } 80 | 81 | // Convert input to numpy array 82 | PyArrayObject* array = (PyArrayObject*)PyArray_FROM_OTF( 83 | input_matrix, 84 | NPY_DOUBLE, 85 | NPY_ARRAY_IN_ARRAY 86 | ); 87 | if (!array) { 88 | PyErr_SetString(PyExc_ValueError, "Input must be a matrix"); 89 | return NULL; 90 | } 91 | 92 | // Validate dimensions 93 | if (PyArray_NDIM(array) != 2) { 94 | Py_DECREF(array); 95 | PyErr_SetString(PyExc_ValueError, "Input must be a matrix"); 96 | return NULL; 97 | } 98 | 99 | const npy_intp rows = PyArray_DIM(array, 0); 100 | const npy_intp cols = PyArray_DIM(array, 1); 101 | const npy_intp output_cols = cols - 1; 102 | 103 | // Handle empty input 104 | if (rows == 0) { 105 | Py_DECREF(array); 106 | npy_intp dims[2] = {0, output_cols}; 107 | return (PyObject*)PyArray_SimpleNew(2, dims, NPY_DOUBLE); 108 | } 109 | 110 | // Create output array 111 | npy_intp out_dims[2] = {rows, output_cols}; 112 | PyArrayObject* result_matrix = (PyArrayObject*)PyArray_SimpleNew(2, out_dims, NPY_DOUBLE); 113 | if (!result_matrix) { 114 | Py_DECREF(array); 115 | return NULL; 116 | } 117 | 118 | // Get data pointers 119 | const double* input_data = (double*)PyArray_DATA(array); 120 | double* output_data = (double*)PyArray_DATA(result_matrix); 121 | 122 | // Calculate differences for each row 123 | for (npy_intp i = 0; i < rows; i++) { 124 | const double* input_row = input_data + i * cols; 125 | double* output_row = output_data + i * output_cols; 126 | 127 | for (npy_intp j = 0; j < output_cols; j++) { 128 | if (std::isnan(input_row[j]) || std::isnan(input_row[j + 1])) { 129 | output_row[j] = std::numeric_limits::quiet_NaN(); 130 | } else { 131 | if (relative) { 132 | // Relative change: (new - old) / old 133 | // Exactly match R's behavior: no special handling for zero values 134 | output_row[j] = (input_row[j + 1] - input_row[j]) / input_row[j]; 135 | } else { 136 | // Absolute change: new - old 137 | output_row[j] = input_row[j + 1] - input_row[j]; 138 | } 139 | } 140 | } 141 | } 142 | 143 | Py_DECREF(array); 144 | return (PyObject*)result_matrix; 145 | } 146 | 147 | // Module method definitions 148 | static PyMethodDef SignatureSpaceMethods[] = { 149 | {"signatureVectorDifference", signatureVectorDifference, METH_VARARGS, 150 | "Calculate differences between successive elements using SIMD optimization"}, 151 | {"signaturespace", (PyCFunction)signaturespace, METH_VARARGS | METH_KEYWORDS, 152 | "Calculate signature space matrix with parallel processing and SIMD optimization.\n" 153 | "Args:\n" 154 | " input_matrix: Input 2D array\n" 155 | " E: Embedding dimension\n" 156 | " relative: If True, calculate relative differences (new-old)/old, otherwise absolute differences (new-old). Default is False."}, 157 | {NULL, NULL, 0, NULL} 158 | }; 159 | 160 | // Module definition 161 | static struct PyModuleDef signaturespacemodule = { 162 | PyModuleDef_HEAD_INIT, 163 | "signaturespace", 164 | "Optimized signature space calculation module", 165 | -1, 166 | SignatureSpaceMethods 167 | }; 168 | 169 | // Module initialization 170 | PyMODINIT_FUNC PyInit_signaturespace(void) { 171 | import_array(); 172 | return PyModule_Create(&signaturespacemodule); 173 | } -------------------------------------------------------------------------------- /pattern_causality/cpp/statespace.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | // Thread-local storage for reusable buffers 12 | thread_local std::vector ts_buffer; 13 | 14 | // Optimized conversion from Python object to double 15 | static inline double convert_to_double(PyObject* item, bool& success) { 16 | if (PyFloat_Check(item)) { 17 | success = true; 18 | return PyFloat_AS_DOUBLE(item); 19 | } else if (PyLong_Check(item)) { 20 | success = true; 21 | return (double)PyLong_AsLongLong(item); 22 | } else { 23 | PyObject* float_obj = PyNumber_Float(item); 24 | if (!float_obj) { 25 | success = false; 26 | return 0.0; 27 | } 28 | double result = PyFloat_AS_DOUBLE(float_obj); 29 | Py_DECREF(float_obj); 30 | success = true; 31 | return result; 32 | } 33 | } 34 | 35 | // Fast check for numpy array contiguity and type 36 | static inline bool check_array_valid(PyArrayObject* arr) { 37 | return (PyArray_ISCARRAY_RO(arr) && 38 | (PyArray_TYPE(arr) == NPY_DOUBLE || 39 | PyArray_TYPE(arr) == NPY_FLOAT || 40 | PyArray_TYPE(arr) == NPY_INT64 || 41 | PyArray_TYPE(arr) == NPY_INT32)); 42 | } 43 | 44 | static PyObject* stateSpace(PyObject* self, PyObject* args) { 45 | PyObject* ts_obj; 46 | int E, tau; 47 | 48 | // Parse Python arguments 49 | if (!PyArg_ParseTuple(args, "Oii", &ts_obj, &E, &tau)) { 50 | return NULL; 51 | } 52 | 53 | // Quick parameter validation 54 | if (E < 2 || tau < 1) { 55 | PyErr_SetString(PyExc_ValueError, "E must be >= 2 and tau must be >= 1"); 56 | return NULL; 57 | } 58 | 59 | // Get input type and length 60 | const bool is_list = PyList_Check(ts_obj); 61 | const bool is_array = PyArray_Check(ts_obj); 62 | if (!is_list && !is_array) { 63 | PyErr_SetString(PyExc_TypeError, "Input must be a list or numpy array"); 64 | return NULL; 65 | } 66 | 67 | // Get length of input time series 68 | const Py_ssize_t ts_len = is_list ? PyList_Size(ts_obj) : PyArray_SIZE((PyArrayObject*)ts_obj); 69 | 70 | // Check minimum length requirement 71 | if (ts_len < (E - 1) * tau + 1) { 72 | PyErr_SetString(PyExc_ValueError, "Time series too short for given E and tau"); 73 | return NULL; 74 | } 75 | 76 | // Calculate output dimensions 77 | const npy_intp rows = ts_len - (E - 1) * tau; 78 | const npy_intp cols = E; 79 | npy_intp dims[2] = {rows, cols}; 80 | 81 | // Create output array with alignment 82 | PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(2, dims, NPY_DOUBLE); 83 | if (!result_array) { 84 | PyErr_SetString(PyExc_MemoryError, "Failed to create output array"); 85 | return NULL; 86 | } 87 | 88 | // Resize thread-local buffer if needed 89 | if (static_cast(ts_buffer.size()) < ts_len) { 90 | ts_buffer.resize(static_cast(ts_len)); 91 | } 92 | 93 | // Get data pointers 94 | double* const data = (double*)PyArray_DATA(result_array); 95 | double* const ts_data = ts_buffer.data(); 96 | 97 | // Convert input to double array using the most efficient method 98 | if (is_list) { 99 | #pragma omp parallel for schedule(static) 100 | for (Py_ssize_t i = 0; i < ts_len; i++) { 101 | PyObject* item = PyList_GET_ITEM(ts_obj, i); 102 | bool success = true; 103 | ts_data[i] = convert_to_double(item, success); 104 | if (!success) { 105 | PyErr_SetString(PyExc_TypeError, "All elements must be numeric"); 106 | // Note: Cannot return NULL here due to OpenMP, error will be checked later 107 | } 108 | } 109 | if (PyErr_Occurred()) { 110 | Py_DECREF(result_array); 111 | return NULL; 112 | } 113 | } else { 114 | PyArrayObject* arr = (PyArrayObject*)ts_obj; 115 | if (!check_array_valid(arr)) { 116 | arr = (PyArrayObject*)PyArray_FROM_OTF(ts_obj, NPY_DOUBLE, 117 | NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED | NPY_ARRAY_FORCECAST); 118 | if (!arr) { 119 | Py_DECREF(result_array); 120 | return NULL; 121 | } 122 | memcpy(ts_data, PyArray_DATA(arr), ts_len * sizeof(double)); 123 | Py_DECREF(arr); 124 | } else { 125 | memcpy(ts_data, PyArray_DATA(arr), ts_len * sizeof(double)); 126 | } 127 | } 128 | 129 | // Fill state space matrix using optimized parallel processing 130 | const npy_intp block_size = std::max(1, 1024 / E); // Optimize cache usage 131 | #pragma omp parallel 132 | { 133 | #pragma omp for schedule(static) collapse(2) 134 | for (npy_intp i = 0; i < rows; i += block_size) { 135 | for (npy_intp j = 0; j < E; j++) { 136 | const npy_intp block_end = std::min(i + block_size, rows); 137 | #pragma omp simd 138 | for (npy_intp k = i; k < block_end; k++) { 139 | const npy_intp idx = k + j * tau; 140 | const double val = ts_data[idx]; 141 | data[k * E + j] = std::isfinite(val) ? val : std::numeric_limits::quiet_NaN(); 142 | } 143 | } 144 | } 145 | } 146 | 147 | return (PyObject*)result_array; 148 | } 149 | 150 | // Method definition 151 | static PyMethodDef StateSpaceMethods[] = { 152 | {"statespace", stateSpace, METH_VARARGS, 153 | "Create state space matrix from time series using embedding parameters E and tau"}, 154 | {NULL, NULL, 0, NULL} 155 | }; 156 | 157 | // Module definition 158 | static struct PyModuleDef statespacemodule = { 159 | PyModuleDef_HEAD_INIT, 160 | "statespace", 161 | "Optimized state space embedding module", 162 | -1, 163 | StateSpaceMethods 164 | }; 165 | 166 | // Module initialization 167 | PyMODINIT_FUNC PyInit_statespace(void) { 168 | import_array(); 169 | return PyModule_Create(&statespacemodule); 170 | } 171 | 172 | -------------------------------------------------------------------------------- /pattern_causality/cpp/pastNNs.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Pre-allocated buffer size 10 | constexpr size_t INITIAL_BUFFER_SIZE = 1024; 11 | 12 | // Reusable buffer 13 | static std::vector> candidate_buffer; 14 | static std::vector nn_indices_buffer; 15 | static std::vector dists_buffer; 16 | 17 | static PyObject* pastNNs(PyObject* self, PyObject* args) { 18 | int ccspan, nnspan, i, h; 19 | PyObject *mx_obj, *dx_obj, *smx_obj, *psmx_obj; 20 | 21 | if (!PyArg_ParseTuple(args, "iiOOOOii", &ccspan, &nnspan, 22 | &mx_obj, &dx_obj, &smx_obj, &psmx_obj, &i, &h)) { 23 | return NULL; 24 | } 25 | 26 | // Convert inputs to numpy arrays 27 | PyArrayObject* mx_array = (PyArrayObject*)PyArray_FROM_OTF(mx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 28 | PyArrayObject* dx_array = (PyArrayObject*)PyArray_FROM_OTF(dx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 29 | PyArrayObject* smx_array = (PyArrayObject*)PyArray_FROM_OTF(smx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 30 | PyArrayObject* psmx_array = (PyArrayObject*)PyArray_FROM_OTF(psmx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 31 | 32 | if (!mx_array || !dx_array || !smx_array || !psmx_array) { 33 | Py_XDECREF(mx_array); 34 | Py_XDECREF(dx_array); 35 | Py_XDECREF(smx_array); 36 | Py_XDECREF(psmx_array); 37 | return NULL; 38 | } 39 | 40 | // Get array dimensions 41 | const npy_intp* mx_dims = PyArray_DIMS(mx_array); 42 | const npy_intp* smx_dims = PyArray_DIMS(smx_array); 43 | const npy_intp mx_stride_0 = PyArray_STRIDE(mx_array, 0) / sizeof(double); 44 | const npy_intp smx_stride_0 = PyArray_STRIDE(smx_array, 0) / sizeof(double); 45 | const npy_intp psmx_stride = PyArray_STRIDE(psmx_array, 0) / sizeof(double); 46 | 47 | // Get data pointers 48 | double* mx_data = (double*)PyArray_DATA(mx_array); 49 | double* dx_data = (double*)PyArray_DATA(dx_array); 50 | double* smx_data = (double*)PyArray_DATA(smx_array); 51 | double* psmx_data = (double*)PyArray_DATA(psmx_array); 52 | 53 | // Find valid indices 54 | std::vector valid_indices; 55 | valid_indices.reserve(mx_dims[0]); 56 | 57 | for (npy_intp j = 0; j < i - ccspan; j++) { 58 | bool valid = true; 59 | // Check for NaN in state space 60 | for (npy_intp k = 0; k < mx_dims[1]; k++) { 61 | if (std::isnan(mx_data[j * mx_stride_0 + k])) { 62 | valid = false; 63 | break; 64 | } 65 | } 66 | // Check for NaN in distance matrix 67 | if (valid && std::isnan(dx_data[i * mx_dims[0] + j])) { 68 | valid = false; 69 | } 70 | if (valid) { 71 | valid_indices.push_back(j); 72 | } 73 | } 74 | 75 | // Sort indices by distance 76 | std::vector> distances; 77 | distances.reserve(valid_indices.size()); 78 | 79 | for (int idx : valid_indices) { 80 | distances.push_back({dx_data[i * mx_dims[0] + idx], idx}); 81 | } 82 | 83 | std::sort(distances.begin(), distances.end()); 84 | 85 | // Take only nnspan nearest neighbors 86 | const size_t sort_size = std::min(static_cast(nnspan), distances.size()); 87 | 88 | // Create output arrays 89 | npy_intp out_dims[] = {static_cast(sort_size)}; 90 | PyObject* times = PyArray_SimpleNew(1, out_dims, NPY_LONG); 91 | PyObject* dists = PyArray_SimpleNew(1, out_dims, NPY_DOUBLE); 92 | 93 | npy_intp sig_dims[] = {static_cast(sort_size), smx_dims[1]}; 94 | PyObject* signatures = PyArray_SimpleNew(2, sig_dims, NPY_DOUBLE); 95 | 96 | npy_intp pat_dims[] = {static_cast(sort_size), 1}; 97 | PyObject* patterns = PyArray_SimpleNew(2, pat_dims, NPY_DOUBLE); 98 | 99 | npy_intp coord_dims[] = {static_cast(sort_size), mx_dims[1]}; 100 | PyObject* coordinates = PyArray_SimpleNew(2, coord_dims, NPY_DOUBLE); 101 | 102 | if (!times || !dists || !signatures || !patterns || !coordinates) { 103 | Py_XDECREF(times); 104 | Py_XDECREF(dists); 105 | Py_XDECREF(signatures); 106 | Py_XDECREF(patterns); 107 | Py_XDECREF(coordinates); 108 | Py_DECREF(mx_array); 109 | Py_DECREF(dx_array); 110 | Py_DECREF(smx_array); 111 | Py_DECREF(psmx_array); 112 | return NULL; 113 | } 114 | 115 | // Fill output arrays 116 | long* times_data = (long*)PyArray_DATA((PyArrayObject*)times); 117 | double* dists_data = (double*)PyArray_DATA((PyArrayObject*)dists); 118 | double* signatures_data = (double*)PyArray_DATA((PyArrayObject*)signatures); 119 | double* patterns_data = (double*)PyArray_DATA((PyArrayObject*)patterns); 120 | double* coordinates_data = (double*)PyArray_DATA((PyArrayObject*)coordinates); 121 | 122 | for (size_t j = 0; j < sort_size; j++) { 123 | const int idx = distances[j].second; 124 | times_data[j] = idx; 125 | dists_data[j] = distances[j].first; 126 | 127 | // Copy signatures 128 | for (npy_intp k = 0; k < smx_dims[1]; k++) { 129 | signatures_data[j * smx_dims[1] + k] = smx_data[idx * smx_stride_0 + k]; 130 | } 131 | 132 | // Copy pattern 133 | patterns_data[j] = psmx_data[idx * psmx_stride]; 134 | 135 | // Copy coordinates 136 | for (npy_intp k = 0; k < mx_dims[1]; k++) { 137 | coordinates_data[j * mx_dims[1] + k] = mx_data[idx * mx_stride_0 + k]; 138 | } 139 | } 140 | 141 | // Create return dictionary 142 | PyObject* result = PyDict_New(); 143 | if (!result) { 144 | Py_DECREF(times); 145 | Py_DECREF(dists); 146 | Py_DECREF(signatures); 147 | Py_DECREF(patterns); 148 | Py_DECREF(coordinates); 149 | Py_DECREF(mx_array); 150 | Py_DECREF(dx_array); 151 | Py_DECREF(smx_array); 152 | Py_DECREF(psmx_array); 153 | return NULL; 154 | } 155 | 156 | PyDict_SetItemString(result, "times", times); 157 | PyDict_SetItemString(result, "dists", dists); 158 | PyDict_SetItemString(result, "signatures", signatures); 159 | PyDict_SetItemString(result, "patterns", patterns); 160 | PyDict_SetItemString(result, "coordinates", coordinates); 161 | 162 | Py_DECREF(times); 163 | Py_DECREF(dists); 164 | Py_DECREF(signatures); 165 | Py_DECREF(patterns); 166 | Py_DECREF(coordinates); 167 | Py_DECREF(mx_array); 168 | Py_DECREF(dx_array); 169 | Py_DECREF(smx_array); 170 | Py_DECREF(psmx_array); 171 | 172 | return result; 173 | } 174 | 175 | static PyMethodDef PastNNsMethods[] = { 176 | {"pastNNs", (PyCFunction)pastNNs, METH_VARARGS, 177 | "Get information about past nearest neighbors"}, 178 | {NULL, NULL, 0, NULL} 179 | }; 180 | 181 | static struct PyModuleDef pastnnsmodule = { 182 | PyModuleDef_HEAD_INIT, 183 | "pastNNs", 184 | "Past nearest neighbors calculation module", 185 | -1, 186 | PastNNsMethods 187 | }; 188 | 189 | PyMODINIT_FUNC PyInit_pastNNs(void) { 190 | import_array(); 191 | return PyModule_Create(&pastnnsmodule); 192 | } 193 | -------------------------------------------------------------------------------- /pattern_causality/cpp/natureOfCausality.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | static PyObject* natureOfCausality(PyObject* self, PyObject* args) { 8 | PyObject *pc_obj, *dur_obj, *hashed_obj, *x_obj; 9 | PyObject* weighted_obj; 10 | 11 | if (!PyArg_ParseTuple(args, "OOOOO", &pc_obj, &dur_obj, &hashed_obj, &x_obj, &weighted_obj)) { 12 | return NULL; 13 | } 14 | 15 | // Convert inputs to numpy arrays 16 | PyArrayObject* pc_arr = (PyArrayObject*)PyArray_FROM_OTF(pc_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 17 | PyArrayObject* dur_arr = (PyArrayObject*)PyArray_FROM_OTF(dur_obj, NPY_LONG, NPY_ARRAY_IN_ARRAY); 18 | PyArrayObject* hashed_arr = (PyArrayObject*)PyArray_FROM_OTF(hashed_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 19 | PyArrayObject* x_arr = (PyArrayObject*)PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 20 | 21 | if (!pc_arr || !dur_arr || !hashed_arr || !x_arr) { 22 | Py_XDECREF(pc_arr); 23 | Py_XDECREF(dur_arr); 24 | Py_XDECREF(hashed_arr); 25 | Py_XDECREF(x_arr); 26 | return NULL; 27 | } 28 | 29 | const bool weighted = PyObject_IsTrue(weighted_obj); 30 | 31 | // Get array dimensions 32 | const npy_intp* pc_dims = PyArray_DIMS(pc_arr); 33 | const npy_intp pc_stride_row = PyArray_STRIDE(pc_arr, 0) / sizeof(double); 34 | const npy_intp pc_stride_col = PyArray_STRIDE(pc_arr, 1) / sizeof(double); 35 | const npy_intp x_size = PyArray_SIZE(x_arr); 36 | 37 | // Create output arrays 38 | npy_intp dims[] = {x_size}; 39 | PyArrayObject* positive_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 40 | PyArrayObject* negative_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 41 | PyArrayObject* dark_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 42 | PyArrayObject* no_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE); 43 | 44 | if (!positive_causality || !negative_causality || !dark_causality || !no_causality) { 45 | Py_XDECREF(pc_arr); 46 | Py_XDECREF(dur_arr); 47 | Py_XDECREF(hashed_arr); 48 | Py_XDECREF(x_arr); 49 | Py_XDECREF(positive_causality); 50 | Py_XDECREF(negative_causality); 51 | Py_XDECREF(dark_causality); 52 | Py_XDECREF(no_causality); 53 | return NULL; 54 | } 55 | 56 | // Get data pointers for direct memory access 57 | double* pos_data = (double*)PyArray_DATA(positive_causality); 58 | double* neg_data = (double*)PyArray_DATA(negative_causality); 59 | double* dark_data = (double*)PyArray_DATA(dark_causality); 60 | double* no_data = (double*)PyArray_DATA(no_causality); 61 | double* pc_data = (double*)PyArray_DATA(pc_arr); 62 | long* dur_data = (long*)PyArray_DATA(dur_arr); 63 | 64 | // Initialize all arrays with NaN 65 | const double nan_value = std::numeric_limits::quiet_NaN(); 66 | for(npy_intp i = 0; i < x_size; i++) { 67 | pos_data[i] = neg_data[i] = dark_data[i] = no_data[i] = nan_value; 68 | } 69 | 70 | const npy_intp dur_size = PyArray_SIZE(dur_arr); 71 | const npy_intp hashed_size = PyArray_SIZE(hashed_arr); 72 | const npy_intp mean_pattern = hashed_size / 2; 73 | const double eps = std::numeric_limits::epsilon(); 74 | 75 | // Main computation loop 76 | for (npy_intp d = 0; d < dur_size; d++) { 77 | const long i = dur_data[d]; 78 | 79 | bool found_valid = false; 80 | bool has_causality = false; 81 | double pos_val = 0.0; 82 | double neg_val = 0.0; 83 | double dark_val = 0.0; 84 | int valid_count = 0; 85 | 86 | // First pass: check if we have any valid values and count total non-NaN values 87 | for (npy_intp row = 0; row < pc_dims[0]; row++) { 88 | for (npy_intp col = 0; col < pc_dims[1]; col++) { 89 | const double pc_val = pc_data[row * pc_stride_row + col * pc_stride_col + i]; 90 | if (!std::isnan(pc_val)) { 91 | found_valid = true; 92 | valid_count++; 93 | } 94 | } 95 | } 96 | 97 | // Only proceed with causality calculation if we found valid values 98 | if (found_valid) { 99 | // Second pass: calculate causalities 100 | for (npy_intp row = 0; row < pc_dims[0]; row++) { 101 | for (npy_intp col = 0; col < pc_dims[1]; col++) { 102 | const double pc_val = pc_data[row * pc_stride_row + col * pc_stride_col + i]; 103 | 104 | if (!std::isnan(pc_val) && std::abs(pc_val) > eps) { 105 | has_causality = true; 106 | 107 | // Center diagonal element contributes to dark causality 108 | if (row == col && row == mean_pattern) { 109 | dark_val += weighted ? pc_val : 1.0; 110 | } 111 | // Other diagonal elements contribute to positive causality 112 | else if (row == col) { 113 | pos_val += weighted ? pc_val : 1.0; 114 | } 115 | // Anti-diagonal elements contribute to negative causality 116 | else if (row + col == hashed_size - 1) { 117 | neg_val += weighted ? pc_val : 1.0; 118 | } 119 | // All other elements contribute to dark causality 120 | else { 121 | dark_val += weighted ? pc_val : 1.0; 122 | } 123 | } 124 | } 125 | } 126 | 127 | // Set values only if we found valid data 128 | if (valid_count > 0) { 129 | no_data[i] = has_causality ? 0.0 : 1.0; 130 | pos_data[i] = pos_val; 131 | neg_data[i] = neg_val; 132 | dark_data[i] = dark_val; 133 | } 134 | } 135 | } 136 | 137 | // Create return dictionary 138 | PyObject* result = PyDict_New(); 139 | if (!result) { 140 | Py_XDECREF(pc_arr); 141 | Py_XDECREF(dur_arr); 142 | Py_XDECREF(hashed_arr); 143 | Py_XDECREF(x_arr); 144 | Py_XDECREF(positive_causality); 145 | Py_XDECREF(negative_causality); 146 | Py_XDECREF(dark_causality); 147 | Py_XDECREF(no_causality); 148 | return NULL; 149 | } 150 | 151 | PyDict_SetItemString(result, "noCausality", (PyObject*)no_causality); 152 | PyDict_SetItemString(result, "Positive", (PyObject*)positive_causality); 153 | PyDict_SetItemString(result, "Negative", (PyObject*)negative_causality); 154 | PyDict_SetItemString(result, "Dark", (PyObject*)dark_causality); 155 | 156 | Py_DECREF(pc_arr); 157 | Py_DECREF(dur_arr); 158 | Py_DECREF(hashed_arr); 159 | Py_DECREF(x_arr); 160 | Py_DECREF(positive_causality); 161 | Py_DECREF(negative_causality); 162 | Py_DECREF(dark_causality); 163 | Py_DECREF(no_causality); 164 | 165 | return result; 166 | } 167 | 168 | static PyMethodDef NatureOfCausalityMethods[] = { 169 | {"natureOfCausality", natureOfCausality, METH_VARARGS, 170 | "Calculate nature of causality from PC matrix"}, 171 | {NULL, NULL, 0, NULL} 172 | }; 173 | 174 | static struct PyModuleDef natureOfCausalitymodule = { 175 | PyModuleDef_HEAD_INIT, 176 | "natureOfCausality", 177 | NULL, 178 | -1, 179 | NatureOfCausalityMethods 180 | }; 181 | 182 | PyMODINIT_FUNC PyInit_natureOfCausality(void) { 183 | import_array(); 184 | return PyModule_Create(&natureOfCausalitymodule); 185 | } 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pattern_causality_py 2 | 3 | [![PyPI version](https://badge.fury.io/py/pattern-causality.svg)](https://badge.fury.io/py/pattern-causality) 4 | [![PyPI Downloads](https://static.pepy.tech/badge/pattern-causality)](https://pepy.tech/project/pattern-causality) 5 | [![Tests](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/tests.yml/badge.svg)](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/tests.yml) 6 | [![Lint](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/lint.yml/badge.svg)](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/lint.yml) 7 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) 8 | [![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/) 9 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 10 | 11 | ## Overview 12 | 13 | `pattern_causality` is a comprehensive Python library that implements the Pattern Causality algorithm for analyzing causal relationships in time series data. This package provides efficient tools for detecting and quantifying causality patterns between multiple time series, with a particular focus on nonlinear complex systems. 14 | 15 | ## Key Features 16 | 17 | - **Efficient C++ Implementation**: Core algorithms implemented in C++ for maximum performance 18 | - **Comprehensive Analysis Tools**: 19 | - Basic pattern causality analysis 20 | - Multivariate time series analysis 21 | - Cross-validation capabilities 22 | - Parameter optimization 23 | - Effect metrics calculation 24 | - **Built-in Dataset**: Includes climate indices dataset for demonstration 25 | - **OpenMP Support**: Parallel processing for improved performance 26 | - **Extensive Testing**: Comprehensive test suite with high coverage 27 | 28 | ## System Requirements 29 | 30 | - Python 3.8 or later 31 | - C++ compiler with C++11 support 32 | - OpenMP support (for parallel processing) 33 | - NumPy 1.19.0 or later 34 | - Pandas 1.0.0 or later 35 | 36 | ## Changelog 37 | 38 | ### Version 1.0.3 (2024-02-15) 39 | - Fixed integer type conversion issue in natureOfCausality function for Windows compatibility 40 | - Improved type handling for array data in pattern causality calculations 41 | - Enhanced cross-platform compatibility for integer types 42 | 43 | ### Version 1.0.2 (2024-02-15) 44 | - Changed default behavior to use relative differences (relative=True by default) 45 | - Added relative parameter to signaturespace for choosing between relative and absolute differences 46 | - Enhanced documentation for the new parameter 47 | - Improved backward compatibility with absolute difference mode (relative=False) 48 | 49 | ### Version 1.0.1 (2024-02-14) 50 | - Fixed type conversion issue in natureOfCausality function 51 | - Improved compatibility with different system architectures by using np.int_ 52 | - Enhanced stability for array data type handling 53 | - Fixed Python 3.8 compatibility issue with numpy integer types 54 | 55 | ## Installation 56 | 57 | ### Via pip (Recommended) 58 | ```bash 59 | pip install pattern-causality 60 | ``` 61 | 62 | ### Via pip + git 63 | ```bash 64 | pip install git+https://github.com/skstavroglou/pattern_causality_py.git 65 | ``` 66 | 67 | ### From Source 68 | #### Prerequisites 69 | 70 | #### On Ubuntu/Debian: 71 | ```bash 72 | sudo apt-get update 73 | sudo apt-get install -y g++ python3-dev libomp-dev build-essential 74 | ``` 75 | 76 | #### On macOS: 77 | ```bash 78 | brew install libomp 79 | ``` 80 | 81 | ### Installing the Package 82 | 83 | ```bash 84 | # Install required Python packages 85 | python -m pip install numpy pandas 86 | 87 | # Install pattern-causality 88 | python -m pip install -e . 89 | ``` 90 | 91 | ## Usage Examples 92 | 93 | ### Basic Usage 94 | 95 | ```python 96 | from pattern_causality import pattern_causality, load_data 97 | 98 | # Load the included climate indices dataset 99 | data = load_data() 100 | 101 | # Initialize pattern causality analyzer 102 | pc = pattern_causality(verbose=True) 103 | 104 | # Analyze causality between NAO and AAO indices 105 | result = pc.pc_lightweight( 106 | X=data["NAO"].values, 107 | Y=data["AAO"].values, 108 | E=3, # embedding dimension 109 | tau=1, # time delay 110 | metric="euclidean", 111 | h=1, # prediction horizon 112 | weighted=True, # use weighted calculations 113 | relative=True # use relative differences (default) 114 | ) 115 | 116 | print(result) 117 | ``` 118 | 119 | ### Multivariate Analysis 120 | 121 | ```python 122 | # Analyze causality patterns across multiple variables 123 | matrix_result = pc.pc_matrix( 124 | dataset=data.drop(columns=["Date"]), 125 | E=3, 126 | tau=1, 127 | metric="euclidean", 128 | h=1, 129 | weighted=True, 130 | relative=True # Using relative differences (default) 131 | ) 132 | 133 | print("Pattern Causality Matrix Results:") 134 | print(matrix_result) 135 | ``` 136 | 137 | ### Parameter Optimization 138 | 139 | ```python 140 | # Find optimal parameters 141 | optimal_params = pc.optimal_parameters_search( 142 | Emax=5, 143 | tau_max=3, 144 | metric="euclidean", 145 | h=1, 146 | dataset=data.drop(columns=["Date"]) 147 | ) 148 | 149 | print("Optimal Parameters:") 150 | print(optimal_params) 151 | ``` 152 | 153 | ### Cross Validation 154 | 155 | ```python 156 | # Perform cross-validation 157 | cv_results = pc.pc_cross_validation( 158 | X=data["NAO"].values, 159 | Y=data["AAO"].values, 160 | E=3, 161 | tau=1, 162 | metric="euclidean", 163 | h=1, 164 | weighted=True, 165 | numberset=[100, 200, 300] 166 | ) 167 | 168 | print("Cross-validation Results:") 169 | print(cv_results) 170 | ``` 171 | 172 | ## Development 173 | 174 | ### Setting Up Development Environment 175 | 176 | 1. Clone the repository: 177 | ```bash 178 | git clone https://github.com/skstavroglou/pattern_causality_py.git 179 | cd pattern_causality_py 180 | ``` 181 | 182 | 2. Create and activate a virtual environment: 183 | ```bash 184 | python -m venv venv 185 | source venv/bin/activate # On Unix/macOS 186 | # or 187 | .\venv\Scripts\activate # On Windows 188 | ``` 189 | 190 | 3. Install development dependencies: 191 | ```bash 192 | python -m pip install -e ".[dev]" 193 | ``` 194 | 195 | ### Running Tests 196 | 197 | ```bash 198 | # Run tests with coverage 199 | python -m pytest tests/ --cov=pattern_causality -v 200 | ``` 201 | 202 | ### Code Style 203 | 204 | The project uses: 205 | - Black for code formatting 206 | - isort for import sorting 207 | - flake8 for linting 208 | - mypy for type checking 209 | 210 | To check code style: 211 | ```bash 212 | black . 213 | isort . 214 | flake8 . 215 | mypy pattern_causality 216 | ``` 217 | 218 | ## Contributing 219 | 220 | Contributions are welcome! Please follow these steps: 221 | 222 | 1. Fork the repository 223 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`) 224 | 3. Make your changes 225 | 4. Run the test suite 226 | 5. Commit your changes (`git commit -m 'Add amazing feature'`) 227 | 6. Push to the branch (`git push origin feature/amazing-feature`) 228 | 7. Open a Pull Request 229 | 230 | ## References 231 | 232 | - Stavroglou, S. K., Pantelous, A. A., Stanley, H. E., & Zuev, K. M. (2019). Hidden interactions in financial markets. _Proceedings of the National Academy of Sciences, 116(22)_, 10646-10651. 233 | - Stavroglou, S. K., Pantelous, A. A., Stanley, H. E., & Zuev, K. M. (2020). Unveiling causal interactions in complex systems. _Proceedings of the National Academy of Sciences, 117(14)_, 7599-7605. 234 | - Stavroglou, S. K., Ayyub, B. M., Kallinterakis, V., Pantelous, A. A., & Stanley, H. E. (2021). A novel causal risk‐based decision‐making methodology: The case of coronavirus. _Risk Analysis, 41(5)_, 814-830. 235 | 236 | ## License 237 | 238 | This project is licensed under the BSD 3-Clause License - see the [LICENSE](LICENSE) file for details. -------------------------------------------------------------------------------- /pattern_causality/cpp/predictionY.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Pre-compute factorials for common cases 10 | static constexpr size_t MAX_FACTORIAL_CACHE = 10; 11 | static const std::array factorial_cache = []() { 12 | std::array cache{}; 13 | cache[0] = 1; 14 | for(size_t i = 1; i < MAX_FACTORIAL_CACHE; ++i) { 15 | cache[i] = cache[i-1] * i; 16 | } 17 | return cache; 18 | }(); 19 | 20 | // Optimized factorial calculation with cache 21 | static inline int factorial(int n) { 22 | if (n < 0) return 1; // Handle error case 23 | if (n < MAX_FACTORIAL_CACHE) { 24 | return factorial_cache[n]; 25 | } 26 | int result = factorial_cache[MAX_FACTORIAL_CACHE - 1]; 27 | for(int i = MAX_FACTORIAL_CACHE; i <= n; ++i) { 28 | result *= i; 29 | } 30 | return result; 31 | } 32 | 33 | // Optimized hashing function with SIMD hints 34 | static inline int hashing(const std::vector& vec) { 35 | int hash = 0; 36 | const size_t size = vec.size(); 37 | #pragma omp simd reduction(+:hash) 38 | for (size_t i = 0; i < size; i++) { 39 | hash += vec[i] * factorial(i + 2); 40 | } 41 | return hash; 42 | } 43 | 44 | // Thread-local storage for reusable vectors 45 | thread_local std::vector p_vec_buffer; 46 | 47 | static inline int pattern_vector_difference(const std::vector& sVec) { 48 | // Quick check for NaN values 49 | for (size_t i = 0; i < sVec.size(); ++i) { 50 | if (std::isnan(sVec[i])) { 51 | return 0; 52 | } 53 | } 54 | 55 | // Reuse pre-allocated vector 56 | if (p_vec_buffer.capacity() < sVec.size()) { 57 | p_vec_buffer.reserve(sVec.size()); 58 | } 59 | p_vec_buffer.clear(); 60 | 61 | // Convert to pattern values 62 | for (size_t i = 0; i < sVec.size(); ++i) { 63 | p_vec_buffer.push_back(sVec[i] > 0 ? 3 : (sVec[i] < 0 ? 1 : 2)); 64 | } 65 | 66 | return hashing(p_vec_buffer); 67 | } 68 | 69 | static PyObject* predictionY(PyObject* self, PyObject* args, PyObject* kwargs) { 70 | long E; 71 | PyObject* projNNy; 72 | PyObject* zeroTolerance_obj = Py_None; 73 | 74 | // Use char* instead of const char* for PyArg_ParseTupleAndKeywords compatibility 75 | static char* const_cast_kwlist[] = { 76 | const_cast("E"), 77 | const_cast("projNNy"), 78 | const_cast("zeroTolerance"), 79 | nullptr 80 | }; 81 | 82 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "lO|O", const_cast_kwlist, 83 | &E, &projNNy, &zeroTolerance_obj)) { 84 | return NULL; 85 | } 86 | 87 | // Optimize default value calculation 88 | const double zeroTolerance = (zeroTolerance_obj == Py_None) ? 89 | (E + 1.0) / 2.0 : 90 | PyFloat_AsDouble(zeroTolerance_obj); 91 | 92 | if (PyErr_Occurred()) return NULL; 93 | 94 | // Get dictionary items with error checking 95 | PyObject* signatures = PyDict_GetItemString(projNNy, "signatures"); 96 | PyObject* weights = PyDict_GetItemString(projNNy, "weights"); 97 | 98 | if (!signatures || !weights) { 99 | PyErr_SetString(PyExc_KeyError, "projNNy must contain 'signatures' and 'weights' keys"); 100 | return NULL; 101 | } 102 | 103 | // Convert to numpy arrays with error checking 104 | PyArrayObject* signatures_array = (PyArrayObject*)PyArray_FROM_OTF(signatures, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 105 | PyArrayObject* weights_array = (PyArrayObject*)PyArray_FROM_OTF(weights, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 106 | 107 | if (!signatures_array || !weights_array) { 108 | Py_XDECREF(signatures_array); 109 | Py_XDECREF(weights_array); 110 | PyErr_SetString(PyExc_TypeError, "Failed to convert signatures or weights to numpy array"); 111 | return NULL; 112 | } 113 | 114 | // Pre-allocate vector with proper size 115 | std::vector predictedSignatureY; 116 | predictedSignatureY.reserve(E >= 3 ? E - 1 : 1); 117 | 118 | double* sig_data = (double*)PyArray_DATA(signatures_array); 119 | double* weights_data = (double*)PyArray_DATA(weights_array); 120 | npy_intp* sig_dims = PyArray_DIMS(signatures_array); 121 | 122 | if (E >= 3) { 123 | predictedSignatureY.resize(E - 1, 0.0); 124 | const npy_intp rows = sig_dims[0]; 125 | const npy_intp cols = sig_dims[1]; 126 | 127 | // Optimize main calculation loop 128 | #pragma omp parallel for 129 | for(long part = 1; part <= E - 1; part++) { 130 | int zero_count = 0; 131 | double sum = 0.0; 132 | 133 | // Vectorized inner loop 134 | #pragma omp simd reduction(+:zero_count,sum) 135 | for(npy_intp i = 0; i < rows; i++) { 136 | const double sig_val = sig_data[i * cols + (part-1)]; 137 | zero_count += (sig_val == 0.0); 138 | sum += sig_val * weights_data[i]; 139 | } 140 | 141 | predictedSignatureY[part-1] = (zero_count > zeroTolerance) ? 0.0 : sum; 142 | } 143 | } else { 144 | predictedSignatureY.resize(1, 0.0); 145 | const npy_intp total_elements = PyArray_SIZE(signatures_array); 146 | 147 | int zero_count = 0; 148 | double sum = 0.0; 149 | 150 | // Vectorized calculation for E < 3 case 151 | #pragma omp simd reduction(+:zero_count,sum) 152 | for(npy_intp i = 0; i < total_elements; i++) { 153 | zero_count += (sig_data[i] == 0.0); 154 | sum += sig_data[i] * weights_data[i]; 155 | } 156 | 157 | predictedSignatureY[0] = (zero_count > zeroTolerance) ? 0.0 : sum; 158 | } 159 | 160 | // Calculate pattern value 161 | const int pattern_value = pattern_vector_difference(predictedSignatureY); 162 | 163 | // Create return objects 164 | npy_intp sig_dims_out[] = {static_cast(predictedSignatureY.size())}; 165 | PyObject* predictedSignatureY_array = PyArray_SimpleNew(1, sig_dims_out, NPY_DOUBLE); 166 | if (!predictedSignatureY_array) { 167 | Py_DECREF(signatures_array); 168 | Py_DECREF(weights_array); 169 | return NULL; 170 | } 171 | 172 | // Fast memory copy 173 | memcpy(PyArray_DATA((PyArrayObject*)predictedSignatureY_array), 174 | predictedSignatureY.data(), 175 | predictedSignatureY.size() * sizeof(double)); 176 | 177 | PyObject* predictedPatternY = PyLong_FromLong(pattern_value); 178 | if (!predictedPatternY) { 179 | Py_DECREF(signatures_array); 180 | Py_DECREF(weights_array); 181 | Py_DECREF(predictedSignatureY_array); 182 | return NULL; 183 | } 184 | 185 | // Create return dictionary 186 | PyObject* return_dict = PyDict_New(); 187 | if (!return_dict || 188 | PyDict_SetItemString(return_dict, "predictedSignatureY", predictedSignatureY_array) < 0 || 189 | PyDict_SetItemString(return_dict, "predictedPatternY", predictedPatternY) < 0) { 190 | Py_XDECREF(return_dict); 191 | Py_DECREF(signatures_array); 192 | Py_DECREF(weights_array); 193 | Py_DECREF(predictedPatternY); 194 | Py_DECREF(predictedSignatureY_array); 195 | return NULL; 196 | } 197 | 198 | // Cleanup 199 | Py_DECREF(signatures_array); 200 | Py_DECREF(weights_array); 201 | Py_DECREF(predictedPatternY); 202 | Py_DECREF(predictedSignatureY_array); 203 | 204 | return return_dict; 205 | } 206 | 207 | static PyMethodDef PredictionYMethods[] = { 208 | {"predictionY", (PyCFunction)predictionY, METH_VARARGS | METH_KEYWORDS, 209 | "Predict Y signature and pattern based on projected nearest neighbors"}, 210 | {NULL, NULL, 0, NULL} 211 | }; 212 | 213 | static struct PyModuleDef predictionymodule = { 214 | PyModuleDef_HEAD_INIT, 215 | "predictionY", 216 | "Prediction Y calculation module", 217 | -1, 218 | PredictionYMethods 219 | }; 220 | 221 | PyMODINIT_FUNC PyInit_predictionY(void) { 222 | import_array(); 223 | return PyModule_Create(&predictionymodule); 224 | } 225 | -------------------------------------------------------------------------------- /pattern_causality/cpp/projectedNNs.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Thread-local storage for reusable vectors 10 | thread_local std::vector weights_2_buffer; 11 | thread_local std::vector exp_weights_buffer; 12 | 13 | // Optimized weights calculation with SIMD support 14 | static PyObject* weights_relative_to_distance(PyObject* dists_vec_obj) { 15 | PyArrayObject* dists_vec = (PyArrayObject*)PyArray_FROM_OTF(dists_vec_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY); 16 | if (!dists_vec) return NULL; 17 | 18 | const npy_intp n = PyArray_SIZE(dists_vec); 19 | const double* dists_data = (double*)PyArray_DATA(dists_vec); 20 | 21 | // Calculate sum using SIMD 22 | double w_total = 0.0; 23 | #pragma omp simd reduction(+:w_total) 24 | for(npy_intp i = 0; i < n; i++) { 25 | w_total += dists_data[i]; 26 | } 27 | 28 | // Handle zero case 29 | w_total = (w_total == 0.0) ? 0.0001 : w_total; 30 | const double w_total_inv = 1.0 / w_total; 31 | 32 | // Reuse pre-allocated vectors 33 | if (weights_2_buffer.size() < n) { 34 | weights_2_buffer.resize(n); 35 | exp_weights_buffer.resize(n); 36 | } 37 | 38 | // Calculate weights_2 using SIMD 39 | #pragma omp simd 40 | for(npy_intp i = 0; i < n; i++) { 41 | weights_2_buffer[i] = dists_data[i] * w_total_inv; 42 | } 43 | 44 | // Calculate exponentials using SIMD 45 | double exp_sum = 0.0; 46 | #pragma omp simd reduction(+:exp_sum) 47 | for(npy_intp i = 0; i < n; i++) { 48 | exp_weights_buffer[i] = std::exp(-weights_2_buffer[i]); 49 | exp_sum += exp_weights_buffer[i]; 50 | } 51 | 52 | // Prepare output array 53 | npy_intp dims[] = {n}; 54 | PyObject* weights = PyArray_SimpleNew(1, dims, NPY_DOUBLE); 55 | if (!weights) { 56 | Py_DECREF(dists_vec); 57 | return NULL; 58 | } 59 | 60 | // Calculate final weights using SIMD 61 | const double exp_sum_inv = 1.0 / exp_sum; 62 | double* weights_data = (double*)PyArray_DATA((PyArrayObject*)weights); 63 | #pragma omp simd 64 | for(npy_intp i = 0; i < n; i++) { 65 | weights_data[i] = exp_weights_buffer[i] * exp_sum_inv; 66 | } 67 | 68 | Py_DECREF(dists_vec); 69 | return weights; 70 | } 71 | 72 | // Optimized projectedNNs function 73 | static PyObject* projectedNNs(PyObject* self, PyObject* args) { 74 | PyObject *my_obj, *dy_obj, *smy_obj, *psmy_obj, *times_x_obj; 75 | int i, h; 76 | 77 | if (!PyArg_ParseTuple(args, "OOOOOii", &my_obj, &dy_obj, &smy_obj, 78 | &psmy_obj, ×_x_obj, &i, &h)) { 79 | return NULL; 80 | } 81 | 82 | // Convert input arrays with error checking 83 | PyArrayObject* arrays[] = { 84 | (PyArrayObject*)PyArray_FROM_OTF(my_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY), 85 | (PyArrayObject*)PyArray_FROM_OTF(dy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY), 86 | (PyArrayObject*)PyArray_FROM_OTF(smy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY), 87 | (PyArrayObject*)PyArray_FROM_OTF(psmy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY), 88 | (PyArrayObject*)PyArray_FROM_OTF(times_x_obj, NPY_LONG, NPY_ARRAY_IN_ARRAY) 89 | }; 90 | 91 | // Check for conversion errors 92 | for (int j = 0; j < 5; j++) { 93 | if (!arrays[j]) { 94 | for (int k = 0; k < j; k++) { 95 | Py_DECREF(arrays[k]); 96 | } 97 | return NULL; 98 | } 99 | } 100 | 101 | // Get array dimensions once 102 | const npy_intp n_times = PyArray_SIZE(arrays[4]); 103 | const npy_intp dy_cols = PyArray_SHAPE(arrays[1])[1]; 104 | const npy_intp sig_cols = PyArray_SHAPE(arrays[2])[1]; 105 | const npy_intp pat_cols = PyArray_SHAPE(arrays[3])[1]; 106 | const npy_intp coord_cols = PyArray_SHAPE(arrays[0])[1]; 107 | 108 | // Pre-allocate all output arrays 109 | npy_intp dims[] = {n_times}; 110 | PyObject* projected_times = PyArray_SimpleNew(1, dims, NPY_LONG); 111 | PyObject* distances = PyArray_SimpleNew(1, dims, NPY_DOUBLE); 112 | 113 | npy_intp sig_dims[] = {n_times, sig_cols}; 114 | npy_intp pat_dims[] = {n_times, pat_cols}; 115 | npy_intp coord_dims[] = {n_times, coord_cols}; 116 | 117 | PyObject* signatures = PyArray_SimpleNew(2, sig_dims, NPY_DOUBLE); 118 | PyObject* patterns = PyArray_SimpleNew(2, pat_dims, NPY_DOUBLE); 119 | PyObject* coordinates = PyArray_SimpleNew(2, coord_dims, NPY_DOUBLE); 120 | 121 | // Check memory allocation 122 | if (!projected_times || !distances || !signatures || !patterns || !coordinates) { 123 | for (auto arr : arrays) Py_DECREF(arr); 124 | Py_XDECREF(projected_times); 125 | Py_XDECREF(distances); 126 | Py_XDECREF(signatures); 127 | Py_XDECREF(patterns); 128 | Py_XDECREF(coordinates); 129 | return NULL; 130 | } 131 | 132 | // Get data pointers 133 | long* times_data = (long*)PyArray_DATA(arrays[4]); 134 | double* dy_data = (double*)PyArray_DATA(arrays[1]); 135 | double* smy_data = (double*)PyArray_DATA(arrays[2]); 136 | double* psmy_data = (double*)PyArray_DATA(arrays[3]); 137 | double* my_data = (double*)PyArray_DATA(arrays[0]); 138 | 139 | long* proj_times_data = (long*)PyArray_DATA((PyArrayObject*)projected_times); 140 | double* dist_data = (double*)PyArray_DATA((PyArrayObject*)distances); 141 | double* sig_data = (double*)PyArray_DATA((PyArrayObject*)signatures); 142 | double* pat_data = (double*)PyArray_DATA((PyArrayObject*)patterns); 143 | double* coord_data = (double*)PyArray_DATA((PyArrayObject*)coordinates); 144 | 145 | // Calculate projected times and distances using SIMD 146 | #pragma omp parallel for simd schedule(static) 147 | for(npy_intp j = 0; j < n_times; j++) { 148 | const long proj_time = times_data[j] + h; 149 | proj_times_data[j] = proj_time; 150 | dist_data[j] = dy_data[i * dy_cols + proj_time]; 151 | } 152 | 153 | // Calculate weights 154 | PyObject* weights = weights_relative_to_distance(distances); 155 | if (!weights) { 156 | for (auto arr : arrays) Py_DECREF(arr); 157 | Py_DECREF(projected_times); 158 | Py_DECREF(distances); 159 | Py_DECREF(signatures); 160 | Py_DECREF(patterns); 161 | Py_DECREF(coordinates); 162 | return NULL; 163 | } 164 | 165 | // Copy data using parallel processing where beneficial 166 | #pragma omp parallel for collapse(2) schedule(static) 167 | for(npy_intp j = 0; j < n_times; j++) { 168 | for(npy_intp k = 0; k < sig_cols; k++) { 169 | const long proj_time = proj_times_data[j]; 170 | sig_data[j * sig_cols + k] = smy_data[proj_time * sig_cols + k]; 171 | } 172 | } 173 | 174 | #pragma omp parallel for collapse(2) schedule(static) 175 | for(npy_intp j = 0; j < n_times; j++) { 176 | for(npy_intp k = 0; k < pat_cols; k++) { 177 | const long proj_time = proj_times_data[j]; 178 | pat_data[j * pat_cols + k] = psmy_data[proj_time * pat_cols + k]; 179 | } 180 | } 181 | 182 | #pragma omp parallel for collapse(2) schedule(static) 183 | for(npy_intp j = 0; j < n_times; j++) { 184 | for(npy_intp k = 0; k < coord_cols; k++) { 185 | const long proj_time = proj_times_data[j]; 186 | coord_data[j * coord_cols + k] = my_data[proj_time * coord_cols + k]; 187 | } 188 | } 189 | 190 | // Build return dictionary 191 | PyObject* return_dict = PyDict_New(); 192 | if (!return_dict) { 193 | for (auto arr : arrays) Py_DECREF(arr); 194 | Py_DECREF(projected_times); 195 | Py_DECREF(distances); 196 | Py_DECREF(weights); 197 | Py_DECREF(signatures); 198 | Py_DECREF(patterns); 199 | Py_DECREF(coordinates); 200 | return NULL; 201 | } 202 | 203 | // Set dictionary items 204 | const char* keys[] = {"i", "times_projected", "dists", "weights", 205 | "signatures", "patterns", "coordinates"}; 206 | PyObject* values[] = {PyLong_FromLong(i), projected_times, distances, 207 | weights, signatures, patterns, coordinates}; 208 | 209 | for (int j = 0; j < 7; j++) { 210 | if (PyDict_SetItemString(return_dict, keys[j], values[j]) < 0) { 211 | for (auto arr : arrays) Py_DECREF(arr); 212 | for (auto val : values) Py_DECREF(val); 213 | Py_DECREF(return_dict); 214 | return NULL; 215 | } 216 | Py_DECREF(values[j]); 217 | } 218 | 219 | // Cleanup input arrays 220 | for (auto arr : arrays) { 221 | Py_DECREF(arr); 222 | } 223 | 224 | return return_dict; 225 | } 226 | 227 | static PyMethodDef ProjectedNNsMethods[] = { 228 | {"projectedNNs", projectedNNs, METH_VARARGS, 229 | "Get information about projected nearest neighbors"}, 230 | {NULL, NULL, 0, NULL} 231 | }; 232 | 233 | static struct PyModuleDef projectednnsmodule = { 234 | PyModuleDef_HEAD_INIT, 235 | "projectedNNs", 236 | "Projected nearest neighbors calculation module", 237 | -1, 238 | ProjectedNNsMethods 239 | }; 240 | 241 | PyMODINIT_FUNC PyInit_projectedNNs(void) { 242 | import_array(); 243 | return PyModule_Create(&projectednnsmodule); 244 | } 245 | -------------------------------------------------------------------------------- /pattern_causality/cpp/fillPCMatrix.cpp: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // Include SIMD headers based on architecture 8 | #ifdef __ARM_NEON 9 | #include 10 | #elif defined(__x86_64__) || defined(_M_X64) 11 | #include 12 | #endif 13 | 14 | // Optimized norm calculation using available SIMD instructions 15 | static double norm_vec(PyObject* x) { 16 | PyArrayObject* arr = (PyArrayObject*)PyArray_FROM_OTF(x, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED); 17 | if (!arr) { 18 | return 0.0; 19 | } 20 | 21 | double sum = 0.0; 22 | double* data = (double*)PyArray_DATA(arr); 23 | npy_intp size = PyArray_SIZE(arr); 24 | 25 | #ifdef __ARM_NEON 26 | // ARM NEON implementation (processes 2 doubles at a time) 27 | float64x2_t sum_vec = vdupq_n_f64(0.0); 28 | npy_intp i; 29 | 30 | for(i = 0; i <= size - 2; i += 2) { 31 | float64x2_t v = vld1q_f64(data + i); 32 | sum_vec = vfmaq_f64(sum_vec, v, v); 33 | } 34 | 35 | sum = vgetq_lane_f64(sum_vec, 0) + vgetq_lane_f64(sum_vec, 1); 36 | 37 | for(; i < size; i++) { 38 | sum += data[i] * data[i]; 39 | } 40 | #elif defined(__AVX__) 41 | // x86 AVX implementation (processes 4 doubles at a time) 42 | __m256d sum_vec = _mm256_setzero_pd(); 43 | npy_intp i; 44 | 45 | for(i = 0; i <= size - 4; i += 4) { 46 | __m256d v = _mm256_load_pd(data + i); 47 | sum_vec = _mm256_add_pd(sum_vec, _mm256_mul_pd(v, v)); 48 | } 49 | 50 | // Horizontal sum 51 | __m128d sum128 = _mm_add_pd(_mm256_extractf128_pd(sum_vec, 0), 52 | _mm256_extractf128_pd(sum_vec, 1)); 53 | sum = _mm_cvtsd_f64(sum128) + _mm_cvtsd_f64(_mm_unpackhi_pd(sum128, sum128)); 54 | 55 | for(; i < size; i++) { 56 | sum += data[i] * data[i]; 57 | } 58 | #else 59 | // Fallback to scalar operations with OpenMP SIMD 60 | #pragma omp simd reduction(+:sum) 61 | for(npy_intp i = 0; i < size; i++) { 62 | sum += data[i] * data[i]; 63 | } 64 | #endif 65 | 66 | Py_DECREF(arr); 67 | return sqrt(sum); 68 | } 69 | 70 | static PyObject* fillPCMatrix(PyObject* self, PyObject* args, PyObject* kwargs) { 71 | PyObject *predictedPatternY_obj, *realPatternY_obj, *predictedSignatureY_obj; 72 | PyObject *realSignatureY_obj, *patternX_obj, *signatureX_obj; 73 | PyObject* weighted_obj; 74 | 75 | static const char* const kwlist[] = { 76 | "weighted", "predictedPatternY", "realPatternY", 77 | "predictedSignatureY", "realSignatureY", 78 | "patternX", "signatureX", NULL 79 | }; 80 | 81 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOOOOOO", const_cast(kwlist), 82 | &weighted_obj, 83 | &predictedPatternY_obj, &realPatternY_obj, 84 | &predictedSignatureY_obj, &realSignatureY_obj, 85 | &patternX_obj, &signatureX_obj)) { 86 | return NULL; 87 | } 88 | 89 | const bool weighted = PyObject_IsTrue(weighted_obj); 90 | 91 | // Convert inputs to numpy arrays with optimization flags 92 | const int requirements = NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED; 93 | PyArrayObject* pred_pattern_arr = (PyArrayObject*)PyArray_FROM_OTF(predictedPatternY_obj, NPY_DOUBLE, requirements); 94 | PyArrayObject* real_pattern_arr = (PyArrayObject*)PyArray_FROM_OTF(realPatternY_obj, NPY_DOUBLE, requirements); 95 | PyArrayObject* pattern_x_arr = (PyArrayObject*)PyArray_FROM_OTF(patternX_obj, NPY_DOUBLE, requirements); 96 | 97 | if (!pred_pattern_arr || !real_pattern_arr || !pattern_x_arr) { 98 | Py_XDECREF(pred_pattern_arr); 99 | Py_XDECREF(real_pattern_arr); 100 | Py_XDECREF(pattern_x_arr); 101 | PyErr_SetString(PyExc_TypeError, "Could not convert input to numpy array"); 102 | return NULL; 103 | } 104 | 105 | // Get array data and sizes 106 | const double* const pred_pattern = (const double*)PyArray_DATA(pred_pattern_arr); 107 | const double* const real_pattern = (const double*)PyArray_DATA(real_pattern_arr); 108 | const double* const pattern_x = (const double*)PyArray_DATA(pattern_x_arr); 109 | 110 | const npy_intp size_pred = PyArray_SIZE(pred_pattern_arr); 111 | const npy_intp size_real = PyArray_SIZE(real_pattern_arr); 112 | const npy_intp size_x = PyArray_SIZE(pattern_x_arr); 113 | 114 | // Quick size checks first 115 | if (size_pred == 0 || size_x == 0) { 116 | Py_DECREF(pred_pattern_arr); 117 | Py_DECREF(real_pattern_arr); 118 | Py_DECREF(pattern_x_arr); 119 | PyErr_SetString(PyExc_ValueError, size_pred == 0 ? 120 | "The length of the predicted pattern of Y is ZERO" : 121 | "The length of the causal pattern of X is ZERO"); 122 | return NULL; 123 | } 124 | 125 | // Check for NaN values using available SIMD instructions 126 | bool has_nan = false; 127 | #ifdef __ARM_NEON 128 | npy_intp i; 129 | for(i = 0; i <= size_pred - 2 && !has_nan; i += 2) { 130 | float64x2_t v = vld1q_f64(pred_pattern + i); 131 | uint64x2_t cmp = vceqq_f64(v, v); 132 | if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) { 133 | has_nan = true; 134 | break; 135 | } 136 | } 137 | #elif defined(__AVX__) 138 | npy_intp i; 139 | for(i = 0; i <= size_pred - 4 && !has_nan; i += 4) { 140 | __m256d v = _mm256_load_pd(pred_pattern + i); 141 | if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) { 142 | has_nan = true; 143 | break; 144 | } 145 | } 146 | #else 147 | npy_intp i = 0; 148 | #endif 149 | 150 | // Handle remaining elements and non-SIMD case 151 | for(; i < size_pred && !has_nan; i++) { 152 | if(std::isnan(pred_pattern[i])) { 153 | has_nan = true; 154 | break; 155 | } 156 | } 157 | 158 | if (!has_nan) { 159 | #ifdef __ARM_NEON 160 | for(i = 0; i <= size_real - 2 && !has_nan; i += 2) { 161 | float64x2_t v = vld1q_f64(real_pattern + i); 162 | uint64x2_t cmp = vceqq_f64(v, v); 163 | if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) { 164 | has_nan = true; 165 | break; 166 | } 167 | } 168 | #elif defined(__AVX__) 169 | for(i = 0; i <= size_real - 4 && !has_nan; i += 4) { 170 | __m256d v = _mm256_load_pd(real_pattern + i); 171 | if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) { 172 | has_nan = true; 173 | break; 174 | } 175 | } 176 | #else 177 | i = 0; 178 | #endif 179 | 180 | for(; i < size_real && !has_nan; i++) { 181 | if(std::isnan(real_pattern[i])) { 182 | has_nan = true; 183 | break; 184 | } 185 | } 186 | } 187 | 188 | if (!has_nan) { 189 | #ifdef __ARM_NEON 190 | for(i = 0; i <= size_x - 2 && !has_nan; i += 2) { 191 | float64x2_t v = vld1q_f64(pattern_x + i); 192 | uint64x2_t cmp = vceqq_f64(v, v); 193 | if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) { 194 | has_nan = true; 195 | break; 196 | } 197 | } 198 | #elif defined(__AVX__) 199 | for(i = 0; i <= size_x - 4 && !has_nan; i += 4) { 200 | __m256d v = _mm256_load_pd(pattern_x + i); 201 | if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) { 202 | has_nan = true; 203 | break; 204 | } 205 | } 206 | #else 207 | i = 0; 208 | #endif 209 | 210 | for(; i < size_x && !has_nan; i++) { 211 | if(std::isnan(pattern_x[i])) { 212 | has_nan = true; 213 | break; 214 | } 215 | } 216 | } 217 | 218 | if (has_nan) { 219 | Py_DECREF(pred_pattern_arr); 220 | Py_DECREF(real_pattern_arr); 221 | Py_DECREF(pattern_x_arr); 222 | return Py_BuildValue("{s:O,s:O}", "real", Py_None, "predicted", Py_None); 223 | } 224 | 225 | // Check if patterns are equal using available SIMD 226 | bool patterns_equal = (size_pred == size_real); 227 | if (patterns_equal) { 228 | #ifdef __ARM_NEON 229 | for(i = 0; i <= size_pred - 2 && patterns_equal; i += 2) { 230 | float64x2_t v1 = vld1q_f64(pred_pattern + i); 231 | float64x2_t v2 = vld1q_f64(real_pattern + i); 232 | uint64x2_t cmp = vceqq_f64(v1, v2); 233 | if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) { 234 | patterns_equal = false; 235 | break; 236 | } 237 | } 238 | #elif defined(__AVX__) 239 | for(i = 0; i <= size_pred - 4 && patterns_equal; i += 4) { 240 | __m256d v1 = _mm256_load_pd(pred_pattern + i); 241 | __m256d v2 = _mm256_load_pd(real_pattern + i); 242 | if (_mm256_movemask_pd(_mm256_cmp_pd(v1, v2, _CMP_NEQ_OQ))) { 243 | patterns_equal = false; 244 | break; 245 | } 246 | } 247 | #else 248 | i = 0; 249 | #endif 250 | 251 | for(; i < size_pred && patterns_equal; i++) { 252 | if(pred_pattern[i] != real_pattern[i]) { 253 | patterns_equal = false; 254 | break; 255 | } 256 | } 257 | } 258 | 259 | double predictedCausalityStrength, realCausalityStrength; 260 | 261 | if(patterns_equal) { 262 | if(weighted) { 263 | // Pre-calculate norms 264 | const double pred_norm = norm_vec(predictedSignatureY_obj); 265 | const double real_norm = norm_vec(realSignatureY_obj); 266 | const double sig_x_norm = norm_vec(signatureX_obj); 267 | 268 | if(sig_x_norm > std::numeric_limits::epsilon()) { 269 | const double pred_ratio = pred_norm / sig_x_norm; 270 | const double real_ratio = real_norm / sig_x_norm; 271 | predictedCausalityStrength = std::erf(pred_ratio); 272 | realCausalityStrength = std::erf(real_ratio); 273 | } else { 274 | predictedCausalityStrength = realCausalityStrength = 1.0; 275 | } 276 | } else { 277 | predictedCausalityStrength = realCausalityStrength = 1.0; 278 | } 279 | } else { 280 | predictedCausalityStrength = realCausalityStrength = 0.0; 281 | } 282 | 283 | // Clean up 284 | Py_DECREF(pred_pattern_arr); 285 | Py_DECREF(real_pattern_arr); 286 | Py_DECREF(pattern_x_arr); 287 | 288 | // Return results 289 | return Py_BuildValue("{s:d,s:d}", 290 | "real", realCausalityStrength, 291 | "predicted", predictedCausalityStrength); 292 | } 293 | 294 | static PyMethodDef FillPCMatrixMethods[] = { 295 | {"fillPCMatrix", (PyCFunction)fillPCMatrix, METH_VARARGS | METH_KEYWORDS, 296 | "Fill pattern causality matrix with causality strengths"}, 297 | {NULL, NULL, 0, NULL} 298 | }; 299 | 300 | static struct PyModuleDef fillpcmatrixmodule = { 301 | PyModuleDef_HEAD_INIT, 302 | "fillPCMatrix", 303 | "Fill pattern causality matrix module", 304 | -1, 305 | FillPCMatrixMethods 306 | }; 307 | 308 | PyMODINIT_FUNC PyInit_fillPCMatrix(void) { 309 | import_array(); 310 | return PyModule_Create(&fillpcmatrixmodule); 311 | } -------------------------------------------------------------------------------- /pattern_causality/data/Climate_Indices.csv: -------------------------------------------------------------------------------- 1 | Date,AO,AAO,NAO,PNA 2 | 1979-01-01,-2.2328,0.2088,-1.38,-0.69 3 | 1979-02-01,-0.6967,0.3563,-0.67,-1.82 4 | 1979-03-01,-0.8141,0.8992,0.78,0.38 5 | 1979-04-01,-1.1568,0.6776,-1.71,0.09 6 | 1979-05-01,-0.2501,0.7237,-1.03,1.35 7 | 1979-06-01,0.9332,1.7,1.6,-1.64 8 | 1979-07-01,0.0385,2.4121,0.83,0.99 9 | 1979-08-01,-0.6841,0.5455,0.96,0.7 10 | 1979-09-01,-0.0459,0.6295,1.01,1.28 11 | 1979-10-01,-1.2434,0.1598,-0.3,1.53 12 | 1979-11-01,0.4751,-0.4225,0.53,0.54 13 | 1979-12-01,1.2948,-0.9507,1.0,-0.38 14 | 1980-01-01,-2.0657,-0.447,-0.75,-0.28 15 | 1980-02-01,-0.9337,-0.9797,0.05,1.74 16 | 1980-03-01,-1.4333,-1.4244,-0.31,-0.35 17 | 1980-04-01,-0.4191,-2.0682,1.29,1.96 18 | 1980-05-01,-1.1548,-0.4787,-1.5,-0.28 19 | 1980-06-01,0.7215,0.2857,-0.37,-0.86 20 | 1980-07-01,-0.6222,-1.9439,-0.42,-0.42 21 | 1980-08-01,-0.1852,-0.9968,-2.24,-1.99 22 | 1980-09-01,0.3126,-1.7008,0.66,-0.05 23 | 1980-10-01,-0.5212,0.5774,-1.77,2.45 24 | 1980-11-01,-1.361,-2.0129,-0.37,1.35 25 | 1980-12-01,-0.0573,-0.3563,0.78,-0.27 26 | 1981-01-01,-0.1163,0.2305,0.37,2.42 27 | 1981-02-01,-0.3316,0.0393,0.92,0.38 28 | 1981-03-01,-1.6447,-0.9655,-1.19,1.6 29 | 1981-04-01,0.4304,-1.462,0.36,-1.02 30 | 1981-05-01,0.1796,-0.344,0.2,1.98 31 | 1981-06-01,-0.4379,0.3517,-0.45,-0.2 32 | 1981-07-01,0.5605,-0.9859,0.05,-0.64 33 | 1981-08-01,-0.2441,-2.1183,0.39,-1.5 34 | 1981-09-01,-1.0401,-1.5094,-1.45,0.15 35 | 1981-10-01,-1.1675,-0.2603,-1.35,-1.45 36 | 1981-11-01,-0.1877,0.6256,-0.38,1.26 37 | 1981-12-01,-1.2157,1.1164,-0.02,-0.12 38 | 1982-01-01,-0.8834,-0.5544,-0.89,-0.86 39 | 1982-02-01,0.9739,0.2772,1.15,-0.68 40 | 1982-03-01,1.0741,1.6035,1.15,-1.4 41 | 1982-04-01,1.4538,1.5314,0.1,-1.89 42 | 1982-05-01,-0.2087,0.1179,-0.53,-0.7 43 | 1982-06-01,-1.1801,0.9201,-1.63,1.93 44 | 1982-07-01,0.0048,-0.4147,1.15,0.97 45 | 1982-08-01,0.3622,0.7793,0.26,0.34 46 | 1982-09-01,0.5577,1.58,1.76,1.05 47 | 1982-10-01,-0.211,-0.7023,-0.74,-0.93 48 | 1982-11-01,0.6609,-0.8492,1.6,-0.48 49 | 1982-12-01,0.9672,-1.9337,1.78,0.75 50 | 1983-01-01,1.3591,-1.3404,1.59,1.18 51 | 1983-02-01,-1.8059,-1.0807,-0.53,1.3 52 | 1983-03-01,-0.5671,0.1663,0.95,2.38 53 | 1983-04-01,-0.7378,0.1494,-0.85,1.26 54 | 1983-05-01,-0.4409,-0.4372,-0.07,-0.19 55 | 1983-06-01,0.3125,-0.2628,0.99,1.87 56 | 1983-07-01,0.1305,1.1141,1.19,1.33 57 | 1983-08-01,1.0978,0.792,1.61,0.53 58 | 1983-09-01,0.1669,-0.696,-1.12,-1.59 59 | 1983-10-01,1.3689,1.1935,0.65,0.09 60 | 1983-11-01,-0.6879,0.7274,-0.98,1.84 61 | 1983-12-01,0.1862,0.4755,0.29,-0.31 62 | 1984-01-01,0.905,-1.0975,1.66,0.97 63 | 1984-02-01,-0.3027,-0.5437,0.72,0.77 64 | 1984-03-01,-2.386,0.2509,-0.37,1.41 65 | 1984-04-01,-0.2836,-0.2042,-0.28,1.7 66 | 1984-05-01,0.4792,-1.2374,0.54,0.32 67 | 1984-06-01,0.0073,0.4261,-0.42,-0.44 68 | 1984-07-01,0.0189,0.8896,-0.07,-2.34 69 | 1984-08-01,0.4657,-0.5484,1.15,-1.11 70 | 1984-09-01,-0.4128,0.327,0.17,0.03 71 | 1984-10-01,-0.2703,-0.0094,-0.07,-0.63 72 | 1984-11-01,-0.9659,-0.0241,-0.06,0.42 73 | 1984-12-01,0.446,-1.4756,0.0,-1.6 74 | 1985-01-01,-2.8057,-0.7948,-1.61,1.63 75 | 1985-02-01,-1.4398,0.2155,-0.49,-0.52 76 | 1985-03-01,0.5514,-0.1336,0.2,-0.92 77 | 1985-04-01,0.6524,0.0315,0.32,-1.06 78 | 1985-05-01,-0.4322,-0.0661,-0.49,-1.03 79 | 1985-06-01,-0.3466,-0.3307,-0.8,1.1 80 | 1985-07-01,-0.3896,1.9137,1.22,0.25 81 | 1985-08-01,-0.0014,0.5948,-0.48,-0.5 82 | 1985-09-01,0.1144,1.5073,-0.52,-0.71 83 | 1985-10-01,1.0351,0.4708,0.9,-1.51 84 | 1985-11-01,-1.2175,1.0847,-0.67,-1.9 85 | 1985-12-01,-1.9476,1.2403,0.22,1.39 86 | 1986-01-01,-0.5676,0.1578,1.11,0.97 87 | 1986-02-01,-2.9041,-1.588,-1.0,0.53 88 | 1986-03-01,1.9308,-0.7696,1.71,0.83 89 | 1986-04-01,0.103,-0.0867,-0.59,0.09 90 | 1986-05-01,0.3669,-1.8466,0.85,-0.13 91 | 1986-06-01,0.5346,-0.6194,1.22,0.26 92 | 1986-07-01,-0.0083,0.0892,0.12,0.07 93 | 1986-08-01,-0.8263,-0.157,-1.09,-1.45 94 | 1986-09-01,-0.0234,0.8487,-1.12,-0.29 95 | 1986-10-01,1.4246,0.3057,1.55,0.92 96 | 1986-11-01,0.9257,-0.2225,2.29,-0.73 97 | 1986-12-01,0.0598,0.8863,0.99,1.37 98 | 1987-01-01,-1.1476,-0.9504,-1.15,1.0 99 | 1987-02-01,-1.4732,-0.7077,-0.73,0.65 100 | 1987-03-01,-1.7465,-0.1327,0.14,1.17 101 | 1987-04-01,0.387,-0.2856,2.0,1.83 102 | 1987-05-01,0.3252,0.0386,0.98,-1.26 103 | 1987-06-01,-0.7103,-0.7019,-1.82,0.42 104 | 1987-07-01,-0.4663,-1.5313,0.52,-0.01 105 | 1987-08-01,-0.8357,1.4852,-0.83,0.77 106 | 1987-09-01,0.2865,-0.7989,-1.22,-3.07 107 | 1987-10-01,-0.08,0.4555,0.14,0.53 108 | 1987-11-01,-0.5358,1.0604,0.18,1.26 109 | 1987-12-01,-0.5339,0.2723,0.32,0.8 110 | 1988-01-01,0.2647,-0.6117,1.02,0.53 111 | 1988-02-01,-1.0662,0.5508,0.76,1.25 112 | 1988-03-01,-0.1971,-0.219,-0.17,0.69 113 | 1988-04-01,-0.5607,-0.0768,-1.17,1.4 114 | 1988-05-01,-0.8461,-0.7486,0.63,0.6 115 | 1988-06-01,0.0605,-1.0549,0.88,1.13 116 | 1988-07-01,-0.1434,0.576,-0.35,2.16 117 | 1988-08-01,0.2546,-0.7449,0.04,-0.59 118 | 1988-09-01,1.0393,-0.6885,-0.99,-1.11 119 | 1988-10-01,0.0324,-2.314,-1.08,0.66 120 | 1988-11-01,-0.0347,0.401,-0.34,0.13 121 | 1988-12-01,1.6788,1.0745,0.61,0.63 122 | 1989-01-01,3.106,0.6184,1.17,-0.72 123 | 1989-02-01,3.2793,0.8489,2.0,-1.06 124 | 1989-03-01,1.5303,0.6321,1.85,-1.3 125 | 1989-04-01,-0.2502,-0.5731,0.28,-0.54 126 | 1989-05-01,0.8888,2.6906,1.38,-0.14 127 | 1989-06-01,0.345,1.9948,-0.27,-0.63 128 | 1989-07-01,0.8656,1.4576,0.97,-0.18 129 | 1989-08-01,0.5509,-0.1319,0.01,-0.24 130 | 1989-09-01,0.7031,-0.1212,2.05,0.54 131 | 1989-10-01,0.9907,0.1358,-0.03,-1.13 132 | 1989-11-01,0.0338,0.572,0.16,-0.72 133 | 1989-12-01,-0.6437,-0.445,-1.15,0.87 134 | 1990-01-01,1.0007,-0.3521,1.04,-0.34 135 | 1990-02-01,3.4016,1.1507,1.41,-1.86 136 | 1990-03-01,2.99,0.4142,1.46,0.34 137 | 1990-04-01,1.8788,-1.8786,2.0,-0.39 138 | 1990-05-01,0.9428,-1.8034,-1.53,0.21 139 | 1990-06-01,0.3043,0.0931,-0.02,-1.41 140 | 1990-07-01,-0.2958,-1.2151,0.53,0.17 141 | 1990-08-01,-0.1802,0.4657,0.97,1.79 142 | 1990-09-01,-0.2104,1.4816,1.06,-0.5 143 | 1990-10-01,0.6603,0.139,0.23,-0.97 144 | 1990-11-01,0.5206,-0.359,-0.24,-1.73 145 | 1990-12-01,1.2767,-0.3117,0.22,-1.32 146 | 1991-01-01,0.7232,0.8689,0.86,0.66 147 | 1991-02-01,-0.876,-0.8517,1.04,1.07 148 | 1991-03-01,-0.5268,0.5223,-0.2,-0.94 149 | 1991-04-01,0.5302,-0.6394,0.29,0.64 150 | 1991-05-01,0.4865,-0.5386,0.08,-0.81 151 | 1991-06-01,-0.1154,-1.1546,-0.82,-0.97 152 | 1991-07-01,-0.188,-1.2202,-0.49,0.75 153 | 1991-08-01,0.7969,0.0355,1.23,-0.25 154 | 1991-09-01,-0.1122,-0.513,0.48,1.43 155 | 1991-10-01,-0.2519,-0.6232,-0.19,-2.28 156 | 1991-11-01,0.2847,-0.8042,0.48,0.24 157 | 1991-12-01,1.6132,-2.0675,0.46,0.47 158 | 1992-01-01,0.55,0.0726,-0.13,1.28 159 | 1992-02-01,1.1217,-1.6268,1.07,0.29 160 | 1992-03-01,0.9842,-1.0103,0.87,0.77 161 | 1992-04-01,-0.5205,-0.4393,1.86,-0.05 162 | 1992-05-01,1.3414,-2.032,2.63,1.27 163 | 1992-06-01,-0.302,-2.1933,0.2,1.17 164 | 1992-07-01,0.1911,-0.5662,0.16,0.85 165 | 1992-08-01,0.5353,-0.3495,0.85,-0.34 166 | 1992-09-01,-0.6403,0.435,-0.44,-1.31 167 | 1992-10-01,-0.3659,-0.3194,-1.76,0.21 168 | 1992-11-01,0.717,0.1218,1.19,0.85 169 | 1992-12-01,1.6267,0.2436,0.47,-1.23 170 | 1993-01-01,3.4953,-2.0206,1.6,-0.65 171 | 1993-02-01,0.1845,0.437,0.5,0.55 172 | 1993-03-01,0.7643,-0.3776,0.67,1.27 173 | 1993-04-01,-0.4354,0.0872,0.97,1.54 174 | 1993-05-01,-1.6075,1.2599,-0.78,2.66 175 | 1993-06-01,-0.5195,1.2179,-0.59,0.52 176 | 1993-07-01,-0.5107,1.9571,-3.18,0.15 177 | 1993-08-01,-0.393,1.0829,0.12,-0.14 178 | 1993-09-01,-0.3606,1.061,-0.57,-1.12 179 | 1993-10-01,-0.565,0.7481,-0.71,-0.3 180 | 1993-11-01,1.0018,0.3237,2.56,-0.36 181 | 1993-12-01,-0.1041,1.0281,1.56,0.72 182 | 1994-01-01,-0.2879,0.7227,1.04,-0.12 183 | 1994-02-01,-0.8615,1.157,0.46,-0.73 184 | 1994-03-01,1.881,0.6933,1.26,0.54 185 | 1994-04-01,0.2247,-0.0525,1.14,-0.47 186 | 1994-05-01,-0.1154,-0.1527,-0.57,0.45 187 | 1994-06-01,1.6063,-1.6819,1.52,-1.38 188 | 1994-07-01,0.3507,-0.4922,1.31,0.38 189 | 1994-08-01,0.8275,1.9099,0.38,-1.34 190 | 1994-09-01,-0.0841,-0.947,-1.32,-2.38 191 | 1994-10-01,0.174,-0.5778,-0.97,-0.23 192 | 1994-11-01,1.7794,-0.7926,0.64,-1.67 193 | 1994-12-01,0.8938,0.9327,2.02,0.69 194 | 1995-01-01,-0.1538,1.4485,0.93,0.66 195 | 1995-02-01,1.4289,0.5329,1.14,0.73 196 | 1995-03-01,0.3932,-0.1544,1.25,0.33 197 | 1995-04-01,-0.9631,0.6488,-0.85,0.29 198 | 1995-05-01,-0.8912,1.3967,-1.49,0.01 199 | 1995-06-01,-0.1118,-0.802,0.13,0.74 200 | 1995-07-01,-0.2171,-3.0097,-0.22,-0.34 201 | 1995-08-01,0.5436,-0.6965,0.69,-0.3 202 | 1995-09-01,-0.549,1.1733,0.31,1.52 203 | 1995-10-01,0.075,-0.057,0.19,-0.05 204 | 1995-11-01,-0.7233,0.1429,-1.38,-0.75 205 | 1995-12-01,-2.1271,1.4697,-1.67,0.92 206 | 1996-01-01,-1.2004,0.3321,-0.12,-0.02 207 | 1996-02-01,0.1632,-0.525,-0.07,-0.3 208 | 1996-03-01,-1.4832,0.5435,-0.24,-0.47 209 | 1996-04-01,-1.5251,0.115,-0.17,0.77 210 | 1996-05-01,-0.2264,0.9832,-1.06,0.32 211 | 1996-06-01,0.4967,-0.252,0.56,-1.21 212 | 1996-07-01,0.7146,0.0209,0.67,0.64 213 | 1996-08-01,0.1247,-1.5019,1.02,-0.9 214 | 1996-09-01,-1.14,-1.3144,-0.86,-0.28 215 | 1996-10-01,0.1825,0.9657,-0.33,-0.76 216 | 1996-11-01,0.1364,-1.6669,-0.56,-0.45 217 | 1996-12-01,-1.7208,-0.0231,-1.41,-1.23 218 | 1997-01-01,-0.4568,0.3689,-0.49,0.63 219 | 1997-02-01,1.8887,-0.2442,1.7,0.56 220 | 1997-03-01,1.0908,0.701,1.46,-1.3 221 | 1997-04-01,0.3236,-0.4576,-1.02,0.55 222 | 1997-05-01,-0.9611,1.0281,-0.28,0.78 223 | 1997-06-01,-0.815,-0.4576,-1.47,-0.34 224 | 1997-07-01,-0.4306,0.7797,0.34,0.56 225 | 1997-08-01,0.1206,0.7684,0.83,-0.52 226 | 1997-09-01,0.1945,0.1222,0.61,0.31 227 | 1997-10-01,-0.6997,-0.5947,-1.7,-0.26 228 | 1997-11-01,-0.6611,-1.9046,-0.9,0.91 229 | 1997-12-01,-0.0711,-0.8355,-0.96,1.16 230 | 1998-01-01,-2.0806,0.4125,0.39,0.74 231 | 1998-02-01,-0.1832,0.3896,-0.11,0.89 232 | 1998-03-01,-0.2544,0.7359,0.87,1.01 233 | 1998-04-01,-0.0379,1.9273,-0.68,1.12 234 | 1998-05-01,0.4286,-0.0381,-1.32,-2.22 235 | 1998-06-01,-0.7107,1.031,-2.72,-0.02 236 | 1998-07-01,-0.2117,1.45,-0.48,2.24 237 | 1998-08-01,0.6503,0.9041,-0.02,-0.57 238 | 1998-09-01,-1.0499,-0.1224,-2.0,0.55 239 | 1998-10-01,0.2943,0.3995,-0.29,0.55 240 | 1998-11-01,-1.4494,0.8172,-0.28,0.74 241 | 1998-12-01,1.3534,1.4352,0.87,-0.09 242 | 1999-01-01,0.1103,0.9991,0.77,0.16 243 | 1999-02-01,0.4821,0.4559,0.29,-0.12 244 | 1999-03-01,-1.4916,0.1804,0.23,0.69 245 | 1999-04-01,0.2844,0.9494,-0.95,0.3 246 | 1999-05-01,0.2259,1.639,0.92,-0.15 247 | 1999-06-01,0.707,-1.3249,1.12,0.3 248 | 1999-07-01,-0.002,0.3156,-0.9,-0.54 249 | 1999-08-01,-0.6721,0.0419,0.39,1.97 250 | 1999-09-01,0.0591,-0.0121,0.36,0.44 251 | 1999-10-01,-0.0058,1.6535,0.2,0.41 252 | 1999-11-01,0.6109,0.9006,0.65,0.48 253 | 1999-12-01,1.0431,1.7838,1.61,0.21 254 | 2000-01-01,1.2702,1.2734,0.6,-0.82 255 | 2000-02-01,1.0758,0.6197,1.7,1.12 256 | 2000-03-01,-0.4514,0.1331,0.77,1.28 257 | 2000-04-01,-0.2785,0.2327,-0.03,-0.35 258 | 2000-05-01,0.9691,1.1271,1.58,-0.28 259 | 2000-06-01,0.5861,0.1172,-0.03,-1.22 260 | 2000-07-01,-0.6494,0.0586,-1.03,-2.28 261 | 2000-08-01,0.1439,-0.6735,-0.29,-0.48 262 | 2000-09-01,0.3949,-1.8529,-0.21,-1.4 263 | 2000-10-01,0.3168,0.347,0.92,0.25 264 | 2000-11-01,-1.5815,-1.5371,-0.92,0.74 265 | 2000-12-01,-2.3544,-1.2903,-0.58,1.23 266 | 2001-01-01,-0.9588,-0.4709,0.25,1.51 267 | 2001-02-01,-0.6224,-0.2649,0.45,-0.16 268 | 2001-03-01,-1.6865,-0.5548,-1.26,0.7 269 | 2001-04-01,0.906,0.515,0.0,-0.47 270 | 2001-05-01,0.452,-0.2622,-0.02,-0.11 271 | 2001-06-01,-0.0153,0.3861,-0.2,-0.94 272 | 2001-07-01,-0.031,-0.9283,-0.25,0.06 273 | 2001-08-01,0.5205,0.9103,-0.07,-0.11 274 | 2001-09-01,-0.7066,1.1614,-0.65,0.1 275 | 2001-10-01,0.7075,1.2771,-0.24,-0.22 276 | 2001-11-01,0.8186,0.9958,0.63,1.09 277 | 2001-12-01,-1.3224,1.4736,-0.83,0.56 278 | 2002-01-01,1.3813,0.7469,0.44,-0.04 279 | 2002-02-01,1.3035,1.3341,1.1,0.14 280 | 2002-03-01,0.902,-1.8235,0.69,-1.3 281 | 2002-04-01,0.7484,0.165,1.18,-2.22 282 | 2002-05-01,0.4014,-2.7985,-0.22,-0.73 283 | 2002-06-01,0.5727,-1.112,0.38,-0.05 284 | 2002-07-01,0.3276,-0.5909,0.62,0.88 285 | 2002-08-01,-0.2285,-0.0994,0.38,0.64 286 | 2002-09-01,-0.0427,-0.8645,-0.7,0.77 287 | 2002-10-01,-1.4885,-2.564,-2.28,-0.65 288 | 2002-11-01,-1.4251,-0.9235,-0.18,1.54 289 | 2002-12-01,-1.5921,1.3085,-0.94,1.59 290 | 2003-01-01,-0.4717,-0.9879,0.16,1.29 291 | 2003-02-01,0.1278,-0.3569,0.62,0.73 292 | 2003-03-01,0.933,-0.1877,0.32,-0.07 293 | 2003-04-01,-0.1781,0.2243,-0.18,0.14 294 | 2003-05-01,1.0167,0.3845,0.01,-2.2 295 | 2003-06-01,-0.1021,-0.7745,-0.07,-0.6 296 | 2003-07-01,0.0753,0.727,0.13,1.23 297 | 2003-08-01,-0.2804,0.678,-0.07,-0.28 298 | 2003-09-01,0.4666,-0.3231,0.01,0.76 299 | 2003-10-01,-0.6698,-0.0249,-1.26,0.97 300 | 2003-11-01,0.6424,-0.7117,0.86,-1.72 301 | 2003-12-01,0.2652,-1.3229,0.64,0.86 302 | 2004-01-01,-1.6858,0.8071,-0.29,0.41 303 | 2004-02-01,-1.5285,-1.1819,-0.14,1.06 304 | 2004-03-01,0.3181,0.4317,1.02,0.3 305 | 2004-04-01,-0.4094,0.151,1.15,0.51 306 | 2004-05-01,-0.0943,0.4596,0.19,-1.76 307 | 2004-06-01,-0.2359,1.1954,-0.89,-0.37 308 | 2004-07-01,-0.2005,1.4743,1.13,0.09 309 | 2004-08-01,-0.7202,-0.0712,-0.48,1.55 310 | 2004-09-01,0.855,0.2536,0.38,-0.08 311 | 2004-10-01,-0.5154,-0.0425,-1.1,-1.39 312 | 2004-11-01,0.6783,-0.2422,0.73,0.31 313 | 2004-12-01,1.2301,-0.9729,1.21,0.26 314 | 2005-01-01,0.3562,-0.1287,1.52,0.02 315 | 2005-02-01,-1.2706,1.2435,-0.06,0.14 316 | 2005-03-01,-1.3479,0.1583,-1.83,0.88 317 | 2005-04-01,-0.0462,0.3554,-0.3,1.28 318 | 2005-05-01,-0.7634,-0.2973,-1.25,1.74 319 | 2005-06-01,-0.3832,-1.4277,-0.05,0.29 320 | 2005-07-01,-0.0302,-0.252,-0.51,0.43 321 | 2005-08-01,0.0261,0.2282,0.37,0.72 322 | 2005-09-01,0.8024,0.2407,0.63,1.63 323 | 2005-10-01,0.0298,0.031,-0.98,0.9 324 | 2005-11-01,0.2277,-0.5515,-0.31,-0.76 325 | 2005-12-01,-2.1039,-1.9678,-0.44,1.38 326 | 2006-01-01,-0.1705,0.3389,1.2651,0.4337 327 | 2006-02-01,-0.1558,-0.2113,-0.5106,-0.1119 328 | 2006-03-01,-1.6038,0.5006,-1.2779,-0.2744 329 | 2006-04-01,0.1383,-0.1693,1.2353,0.4461 330 | 2006-05-01,0.1558,1.6954,-1.1444,-1.2712 331 | 2006-06-01,1.0708,0.438,0.8412,-1.0634 332 | 2006-07-01,0.1027,0.9255,0.9024,1.834 333 | 2006-08-01,-0.2652,-1.7271,-1.7264,-1.4073 334 | 2006-09-01,0.6065,-0.3241,-1.6219,0.424 335 | 2006-10-01,-1.0291,0.8792,-2.2439,-0.838 336 | 2006-11-01,0.5213,0.1013,0.4368,-1.3886 337 | 2006-12-01,2.2817,0.6384,1.3366,1.8647 338 | 2007-01-01,2.0338,-0.0828,0.2227,0.7169 339 | 2007-02-01,-1.3069,0.0749,-0.47,-0.0874 340 | 2007-03-01,1.1821,-0.5701,1.4425,0.1808 341 | 2007-04-01,0.5443,-1.0352,0.1694,1.245 342 | 2007-05-01,0.8937,-0.6119,0.6638,-0.0571 343 | 2007-06-01,-0.555,-1.198,-1.3064,-0.3786 344 | 2007-07-01,-0.3965,-2.6307,-0.5794,2.2087 345 | 2007-08-01,-0.0337,-0.1079,-0.139,1.9775 346 | 2007-09-01,0.1789,0.0305,0.7213,1.8826 347 | 2007-10-01,0.3835,-0.4337,0.4465,0.5504 348 | 2007-11-01,-0.5187,-0.9838,0.5756,0.6926 349 | 2007-12-01,0.8211,1.9293,0.3436,0.1395 350 | 2008-01-01,0.819,1.2085,0.8901,-0.3179 351 | 2008-02-01,0.9381,1.1474,0.7346,0.5006 352 | 2008-03-01,0.5856,0.5875,0.0761,-0.3225 353 | 2008-04-01,-0.455,-0.8734,-1.0655,-0.9707 354 | 2008-05-01,-1.2047,-0.4898,-1.728,1.3274 355 | 2008-06-01,-0.0898,1.3484,-1.3895,-1.7529 356 | 2008-07-01,-0.4799,0.3202,-1.2741,-0.0988 357 | 2008-08-01,-0.0805,0.087,-1.1602,0.9209 358 | 2008-09-01,-0.3266,1.3856,1.0169,1.1173 359 | 2008-10-01,1.6758,1.2149,-0.0434,0.8563 360 | 2008-11-01,0.0922,0.9197,-0.3199,1.1104 361 | 2008-12-01,0.6478,1.1938,-0.2765,-1.4097 362 | 2009-01-01,0.7997,0.9626,-0.0075,0.606 363 | 2009-02-01,-0.6723,0.4558,0.0565,-0.9476 364 | 2009-03-01,0.1213,0.6046,0.5723,-1.0245 365 | 2009-04-01,0.9725,0.0292,-0.2044,0.2112 366 | 2009-05-01,1.1937,-0.7328,1.6827,-0.5809 367 | 2009-06-01,-1.3507,-0.47,-1.2074,0.3505 368 | 2009-07-01,-1.3559,-1.2342,-2.1529,1.2175 369 | 2009-08-01,-0.0537,-0.6856,-0.1938,0.6496 370 | 2009-09-01,0.8745,-0.0165,1.5089,1.2891 371 | 2009-10-01,-1.5399,0.0848,-1.0322,0.431 372 | 2009-11-01,0.4589,-1.9151,-0.0248,0.2057 373 | 2009-12-01,-3.4128,0.6068,-1.9257,0.3386 374 | 2010-01-01,-2.5868,-0.7569,-1.1086,1.2536 375 | 2010-02-01,-4.2657,-0.7752,-1.9845,0.5788 376 | 2010-03-01,-0.4321,0.1083,-0.8831,2.0197 377 | 2010-04-01,-0.2745,0.3774,-0.7199,1.5365 378 | 2010-05-01,-0.9186,1.021,-1.487,-0.9322 379 | 2010-06-01,-0.013,2.0707,-0.816,-0.2042 380 | 2010-07-01,0.4353,2.4237,-0.425,1.3955 381 | 2010-08-01,-0.1166,1.5101,-1.2227,1.1019 382 | 2010-09-01,-0.8646,0.4018,-0.7948,1.3359 383 | 2010-10-01,-0.467,1.3349,-0.9283,1.7967 384 | 2010-11-01,-0.3757,1.5163,-1.6158,-0.845 385 | 2010-12-01,-2.631,0.2054,-1.8472,-1.7774 386 | 2011-01-01,-1.6831,0.0524,-0.8768,1.2855 387 | 2011-02-01,1.5754,1.0738,0.7012,-1.5807 388 | 2011-03-01,1.4241,-0.2962,0.6129,0.6887 389 | 2011-04-01,2.2748,-0.87,2.4775,-1.5859 390 | 2011-05-01,-0.0351,1.2657,-0.0623,0.1896 391 | 2011-06-01,-0.8578,-0.099,-1.2795,0.2236 392 | 2011-07-01,-0.4716,-1.3842,-1.5122,-0.355 393 | 2011-08-01,-1.0626,-1.202,-1.3475,1.4217 394 | 2011-09-01,0.6647,-1.25,0.536,-0.4196 395 | 2011-10-01,0.7998,0.3882,0.3935,0.6273 396 | 2011-11-01,1.4592,-0.9075,1.3602,-0.7886 397 | 2011-12-01,2.2208,2.5735,2.5213,0.3635 398 | 2012-01-01,-0.2197,1.5833,1.1739,0.6035 399 | 2012-02-01,-0.0363,-0.2831,0.4201,0.7501 400 | 2012-03-01,1.0371,0.2748,1.2655,0.1075 401 | 2012-04-01,-0.0346,0.6656,0.471,0.1542 402 | 2012-05-01,0.1684,0.1528,-0.9068,-0.4228 403 | 2012-06-01,-0.6724,-0.1971,-2.5309,-0.5009 404 | 2012-07-01,0.1678,1.2593,-1.3215,-0.1625 405 | 2012-08-01,0.014,0.4888,-0.9832,-0.1709 406 | 2012-09-01,0.7722,0.5624,-0.5861,-0.4217 407 | 2012-10-01,-1.514,-0.4442,-2.062,-1.1764 408 | 2012-11-01,-0.1106,-1.7009,-0.5782,-1.095 409 | 2012-12-01,-1.7486,-0.7635,0.1706,-1.0097 410 | 2013-01-01,-0.6095,0.071,0.3453,0.5458 411 | 2013-02-01,-1.0074,0.7156,-0.4531,0.4483 412 | 2013-03-01,-3.1854,1.3748,-1.6119,0.038 413 | 2013-04-01,0.3222,0.6106,0.687,-1.5574 414 | 2013-05-01,0.494,0.3596,0.5691,-0.3636 415 | 2013-06-01,0.5487,-0.2711,0.5208,-0.4348 416 | 2013-07-01,-0.0111,0.9453,0.6722,-0.3068 417 | 2013-08-01,0.1542,-1.5609,0.9702,-0.0293 418 | 2013-09-01,-0.4609,-1.6583,0.2406,0.5445 419 | 2013-10-01,0.2628,-0.4576,-1.2801,-0.3405 420 | 2013-11-01,2.029,0.1888,0.9008,-1.1833 421 | 2013-12-01,1.4749,0.0613,0.9457,-0.8582 422 | 2014-01-01,-0.9688,-0.6826,0.2903,0.9683 423 | 2014-02-01,0.0438,0.3222,1.3352,-0.9513 424 | 2014-03-01,1.2058,0.467,0.7983,-0.2085 425 | 2014-04-01,0.972,0.6137,0.3052,0.2465 426 | 2014-05-01,0.4642,-0.4452,-0.9225,-0.7633 427 | 2014-06-01,-0.5074,0.8412,-0.9701,-1.4583 428 | 2014-07-01,-0.4889,0.247,0.1754,0.9871 429 | 2014-08-01,-0.3715,-0.0589,-1.6815,1.3699 430 | 2014-09-01,0.1019,-1.1189,1.6163,0.9892 431 | 2014-10-01,-1.1344,-0.039,-1.2706,0.887 432 | 2014-11-01,-0.5303,-0.5192,0.678,0.6782 433 | 2014-12-01,0.4129,1.3222,1.8575,0.6689 434 | 2015-01-01,1.0916,0.6747,1.7887,0.6124 435 | 2015-02-01,1.0426,1.2165,1.3228,0.59 436 | 2015-03-01,1.8374,0.7733,1.4497,-0.2299 437 | 2015-04-01,1.2157,1.0286,0.7253,-0.1502 438 | 2015-05-01,0.7628,0.4156,0.1455,-0.1578 439 | 2015-06-01,0.427,0.7113,-0.0668,-0.1731 440 | 2015-07-01,-1.1079,1.678,-3.179,0.7612 441 | 2015-08-01,-0.689,1.0618,-0.7604,0.0789 442 | 2015-09-01,-0.1645,0.5424,-0.6485,-0.9181 443 | 2015-10-01,-0.2501,-0.1699,0.4372,1.775 444 | 2015-11-01,1.945,0.6954,1.7438,-0.1927 445 | 2015-12-01,1.4441,-0.0589,2.2436,0.7771 446 | 2016-01-01,-1.4487,1.3918,0.1165,2.0213 447 | 2016-02-01,-0.0235,1.0933,1.5803,1.4822 448 | 2016-03-01,0.2802,2.0383,0.7344,0.7278 449 | 2016-04-01,-1.0511,0.0969,0.3751,0.8657 450 | 2016-05-01,-0.0357,0.0124,-0.7736,-1.0611 451 | 2016-06-01,0.3129,2.5655,-0.4325,-0.7011 452 | 2016-07-01,0.0848,0.4066,-1.7603,1.0193 453 | 2016-08-01,0.4724,-0.7385,-1.6453,-0.8759 454 | 2016-09-01,0.781,2.3328,0.611,0.182 455 | 2016-10-01,-1.9173,-0.1772,0.4113,1.2393 456 | 2016-11-01,-0.6109,-1.5077,-0.1637,1.517 457 | 2016-12-01,1.7864,-0.7112,0.4783,-0.3534 458 | 2017-01-01,0.942,-0.9822,0.4782,0.28 459 | 2017-02-01,0.3399,-0.0149,1.0048,0.1848 460 | 2017-03-01,1.3654,0.1561,0.737,0.2682 461 | 2017-04-01,-0.0887,0.6192,1.7325,0.3966 462 | 2017-05-01,-0.7301,1.0528,-1.911,-0.3112 463 | 2017-06-01,0.4017,0.5464,0.045,1.0051 464 | 2017-07-01,0.6342,0.7284,1.2556,1.8642 465 | 2017-08-01,0.15,0.7641,-1.0977,0.2317 466 | 2017-09-01,-0.4924,1.2961,-0.6127,-0.3308 467 | 2017-10-01,0.6903,-0.5676,0.1859,-0.398 468 | 2017-11-01,-0.0776,0.7706,-0.0049,-2.0568 469 | 2017-12-01,-0.059,0.9839,0.8816,0.8877 470 | 2018-01-01,-0.2808,1.2752,1.4423,0.3995 471 | 2018-02-01,0.1127,1.0411,1.5778,-1.0256 472 | 2018-03-01,-0.9411,0.1405,-0.9269,-0.886 473 | 2018-04-01,0.5439,-1.1662,1.2411,-0.9091 474 | 2018-05-01,1.1796,-0.0766,2.1208,-1.3363 475 | 2018-06-01,0.3799,-0.0119,1.088,0.5115 476 | 2018-07-01,0.6118,0.3768,1.3893,-0.3674 477 | 2018-08-01,0.8361,-0.3428,1.967,1.2674 478 | 2018-09-01,0.5845,1.4584,1.6736,1.4366 479 | 2018-10-01,0.4128,0.5303,0.9345,0.21 480 | 2018-11-01,-1.1162,0.9907,-0.1113,0.2415 481 | 2018-12-01,0.1097,0.9299,0.6116,0.8604 482 | 2019-01-01,-0.7132,0.6767,0.592,0.8342 483 | 2019-02-01,1.1495,-0.4996,0.2914,-1.076 484 | 2019-03-01,2.1161,0.7446,1.2321,0.2502 485 | 2019-04-01,-0.2553,0.3358,0.466,-0.6116 486 | 2019-05-01,-1.2313,0.3346,-2.623,-0.2907 487 | 2019-06-01,-0.6013,1.465,-1.0886,0.1212 488 | 2019-07-01,-0.8897,-0.3897,-1.4255,1.1116 489 | 2019-08-01,-0.7218,-1.0804,-1.1684,1.1786 490 | 2019-09-01,0.3062,0.563,-0.1641,2.0 491 | 2019-10-01,-0.0822,-0.9252,-1.4134,-1.0241 492 | 2019-11-01,-1.1934,-1.8398,0.2785,-0.0565 493 | 2019-12-01,0.4121,-1.3599,1.2016,0.1809 494 | 2020-01-01,2.419,-0.2312,1.3432,-0.2369 495 | 2020-02-01,3.4172,0.2746,1.257,0.1722 496 | 2020-03-01,2.6414,1.4264,1.0126,-2.171 497 | 2020-04-01,0.9281,-0.4752,-1.0224,-1.1756 498 | 2020-05-01,-0.0271,0.5766,-0.4098,0.2107 499 | 2020-06-01,-0.1218,1.0708,-0.1469,0.7038 500 | 2020-07-01,-0.4118,-0.5464,-1.2262,1.7276 501 | 2020-08-01,-0.3812,-0.7205,0.1217,1.8218 502 | 2020-09-01,0.6314,0.1943,0.985,0.7536 503 | 2020-10-01,-0.0717,1.2636,-0.6547,-1.1312 504 | 2020-11-01,2.0864,0.8132,2.5445,0.2351 505 | 2020-12-01,-1.736,1.4813,-0.3024,1.5844 506 | 2021-01-01,-2.4836,1.0446,-1.1087,0.1861 507 | 2021-02-01,-1.1907,1.3435,0.1361,-0.3074 508 | 2021-03-01,2.1092,0.0858,0.7299,-0.9688 509 | 2021-04-01,-0.2044,0.8274,-1.4251,-1.0486 510 | 2021-05-01,-0.1606,0.3138,-1.2386,-1.3469 511 | 2021-06-01,0.8446,1.1792,0.7655,0.666 512 | 2021-07-01,0.6302,-0.4595,0.026,0.5624 513 | 2021-08-01,-0.2093,-0.2268,-0.2825,0.9547 514 | 2021-09-01,-0.2516,1.3364,-0.2137,0.4433 515 | 2021-10-01,-0.1458,0.4532,-2.2899,1.134 516 | 2021-11-01,0.093,1.3211,-0.1846,0.7215 517 | 2021-12-01,0.1981,2.1548,0.2885,-2.5584 518 | 2022-01-01,0.8483,0.8252,1.0778,1.0093 519 | 2022-02-01,1.5444,0.6432,1.683,0.6577 520 | 2022-03-01,0.3052,0.5584,0.7677,0.1348 521 | 2022-04-01,-0.6026,0.5319,-0.3646,-0.735 522 | 2022-05-01,1.2235,0.0965,0.7062,-0.8263 523 | 2022-06-01,-0.0742,-0.8713,-0.118,-0.3128 524 | 2022-07-01,0.0249,0.4467,-0.0936,2.5404 525 | 2022-08-01,-0.17,0.7313,1.4699,0.7893 526 | 2022-09-01,-0.6552,1.4685,-1.6105,0.2093 527 | 2022-10-01,1.3457,0.3303,-0.7175,0.1662 528 | 2022-11-01,0.3389,1.7134,0.6922,-0.7341 529 | 2022-12-01,-2.7192,1.7004,-0.1456,-0.662 530 | 2023-01-01,-0.6743,2.3037,1.2503,0.2148 531 | 2023-02-01,1.6004,0.5536,0.9227,-0.64 532 | 2023-03-01,0.2803,-0.2582,-1.1088,-1.6341 533 | 2023-04-01,-0.9731,-0.9207,-0.6284,-0.4242 534 | 2023-05-01,1.1343,1.4518,0.3864,-0.8575 535 | 2023-06-01,-0.2862,-0.4379,-0.5784,0.687 536 | 2023-07-01,-0.1545,-0.8183,-2.1746,1.1454 537 | -------------------------------------------------------------------------------- /pattern_causality/pattern_causality.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """Pattern Causality Analysis Package. 4 | 5 | This module implements pattern causality analysis methods for time series data. 6 | It provides tools for analyzing causal relationships between variables using pattern-based approaches. 7 | 8 | The package includes methods for: 9 | - Basic pattern causality analysis 10 | - Multivariate time series analysis 11 | - Cross-validation and parameter optimization 12 | - Effect metrics calculation and visualization 13 | 14 | Example: 15 | Basic usage example:: 16 | 17 | >>> from pattern_causality import pattern_causality 18 | >>> pc = pattern_causality(verbose=True) 19 | >>> result = pc.pc_lightweight(X, Y, E=3, tau=1) 20 | """ 21 | 22 | from __future__ import annotations 23 | 24 | # Standard library imports 25 | import time 26 | from typing import ( 27 | Dict, 28 | List, 29 | Optional, 30 | Protocol, 31 | Sequence, 32 | Tuple, 33 | TypeVar, 34 | Union, 35 | ) 36 | from dataclasses import dataclass 37 | from importlib.metadata import version, metadata 38 | 39 | # Third-party imports 40 | import numpy as np 41 | import pandas as pd 42 | 43 | # Local imports - using relative imports 44 | try: 45 | from utils.databank import databank 46 | from utils.distancematrix import distancematrix 47 | from utils.fcp import fcp 48 | from utils.fillPCMatrix import fillPCMatrix 49 | from utils.natureOfCausality import natureOfCausality 50 | from utils.pastNNs import pastNNs 51 | from utils.patternhashing import patternhashing 52 | from utils.patternspace import patternspace 53 | from utils.predictionY import predictionY 54 | from utils.projectedNNs import projectedNNs 55 | from utils.signaturespace import signaturespace 56 | from utils.statespace import statespace 57 | except ImportError as e: 58 | import warnings 59 | warnings.warn(f"Failed to import C++ modules: {str(e)}") 60 | # You might want to provide Python fallbacks here if available 61 | 62 | # Package metadata 63 | __version__ = version("pattern-causality") 64 | __author__ = metadata("pattern-causality").get("Author") 65 | __email__ = metadata("pattern-causality").get("Author-email") 66 | __license__ = metadata("pattern-causality").get("License") 67 | __copyright__ = f"Copyright (c) 2024 {__author__}" 68 | __all__ = ['pattern_causality'] 69 | 70 | # Type aliases 71 | T = TypeVar('T') 72 | ArrayLike = Union[List[T], np.ndarray, pd.Series] 73 | DatasetType = Union[pd.DataFrame, np.ndarray, List[T]] 74 | 75 | 76 | @dataclass 77 | class PCMatrixResult: 78 | """Data class for storing pattern causality matrix results. 79 | 80 | Attributes: 81 | positive (np.ndarray): Matrix of positive causality values 82 | negative (np.ndarray): Matrix of negative causality values 83 | dark (np.ndarray): Matrix of dark causality values 84 | items (list): List of variable names corresponding to matrix indices 85 | """ 86 | positive: np.ndarray 87 | negative: np.ndarray 88 | dark: np.ndarray 89 | items: list 90 | 91 | 92 | class pattern_causality: 93 | """Pattern Causality Analysis Class for Time Series Data. 94 | 95 | This class implements various pattern causality analysis methods for time series data. 96 | All methods return pandas DataFrames for consistency and ease of use. 97 | 98 | The class provides a comprehensive set of tools for analyzing causal relationships 99 | in time series data using pattern-based approaches. 100 | 101 | Attributes: 102 | verbose (bool): Whether to print detailed information during computation 103 | 104 | Methods: 105 | pc_lightweight: Basic pattern causality analysis for two time series 106 | pc_matrix: Calculate pattern causality matrix for multivariate time series 107 | pc_effect: Calculate effect metrics from pattern causality matrices 108 | pc_accuracy: Calculate pattern causality accuracy metrics 109 | pc_full_details: Detailed pattern causality analysis with time point information 110 | pc_cross_validation: Perform cross validation for pattern causality analysis 111 | optimal_parameters_search: Search for optimal E and tau parameters 112 | to_matrix: Convert flattened causality results to matrix format 113 | format_effects: Format effect results into matrices for visualization 114 | 115 | Note: 116 | All methods are designed to handle NaN values and invalid inputs gracefully. 117 | Error messages and warnings are provided when appropriate. 118 | """ 119 | 120 | def __init__(self, verbose: bool = False): 121 | """Initialize pattern_causality class 122 | 123 | Args: 124 | verbose: Whether to print detailed information during computation 125 | """ 126 | self.verbose = verbose 127 | 128 | @staticmethod 129 | def __version__(): 130 | """Return the current version of the package""" 131 | from importlib.metadata import version 132 | return version("pattern-causality") 133 | 134 | def __repr__(self) -> str: 135 | """Return string representation of the class""" 136 | return f"pattern_causality(verbose={self.verbose})" 137 | 138 | def __str__(self) -> str: 139 | """Return string description of the class""" 140 | return "Pattern Causality Analysis Class for Time Series Data" 141 | 142 | def _print_if_verbose(self, message: str, verbose: bool = None) -> None: 143 | """ 144 | Helper method to print messages when verbose is True 145 | 146 | Args: 147 | message: Message to print 148 | verbose: Override class-level verbose setting 149 | """ 150 | verbose = self.verbose if verbose is None else verbose 151 | if verbose: 152 | print(message) 153 | 154 | def _calculate_basic_stats(self, X: Union[List, np.ndarray], Y: Union[List, np.ndarray]) -> Dict: 155 | """Calculate basic statistics for time series""" 156 | X, Y = np.array(X), np.array(Y) 157 | stats = { 158 | "X_mean": np.mean(X), 159 | "X_std": np.std(X), 160 | "Y_mean": np.mean(Y), 161 | "Y_std": np.std(Y), 162 | "correlation": np.corrcoef(X, Y)[0, 1], 163 | "X_length": len(X), 164 | "missing_values": np.sum(np.isnan(X)) + np.sum(np.isnan(Y)) 165 | } 166 | return stats 167 | 168 | @staticmethod 169 | def _validate_input(X: Union[List, np.ndarray, pd.Series], 170 | Y: Union[List, np.ndarray, pd.Series]) -> tuple: 171 | """Validate and convert input time series to lists""" 172 | # Convert to numpy array first for type checking 173 | if isinstance(X, pd.Series): 174 | X = X.values 175 | elif isinstance(X, list): 176 | X = np.array(X) 177 | 178 | if isinstance(Y, pd.Series): 179 | Y = Y.values 180 | elif isinstance(Y, list): 181 | Y = np.array(Y) 182 | 183 | # Check if numeric 184 | if not np.issubdtype(X.dtype, np.number) or not np.issubdtype(Y.dtype, np.number): 185 | raise TypeError("All elements must be numeric") 186 | 187 | # Convert to list for processing 188 | return X.tolist(), Y.tolist() 189 | 190 | @staticmethod 191 | def _validate_dataset(dataset: Union[pd.DataFrame, np.ndarray, List]) -> pd.DataFrame: 192 | """Validate and convert dataset to DataFrame with numeric values""" 193 | if isinstance(dataset, np.ndarray): 194 | if not np.issubdtype(dataset.dtype, np.number): 195 | raise TypeError("All elements in array must be numeric") 196 | return pd.DataFrame(dataset) 197 | elif isinstance(dataset, list): 198 | arr = np.array(dataset).T 199 | if not np.issubdtype(arr.dtype, np.number): 200 | raise TypeError("All elements in list must be numeric") 201 | return pd.DataFrame(arr) 202 | elif isinstance(dataset, pd.DataFrame): 203 | if not all(dataset.dtypes.apply(lambda x: np.issubdtype(x, np.number))): 204 | raise TypeError("All columns in DataFrame must be numeric") 205 | return dataset 206 | else: 207 | raise TypeError("dataset must be a DataFrame, numpy array, or list") 208 | 209 | @staticmethod 210 | def _validate_pc_matrix_result(result: Dict) -> bool: 211 | """Validate that the input is a result from pc_matrix""" 212 | required_keys = {"positive", "negative", "dark", "items"} 213 | 214 | if not all(key in result for key in required_keys): 215 | return False 216 | 217 | matrices = [result["positive"], result["negative"], result["dark"]] 218 | if not all(isinstance(m, np.ndarray) for m in matrices): 219 | return False 220 | 221 | shapes = [m.shape for m in matrices] 222 | if not all(len(shape) == 2 and shape[0] == shape[1] for shape in shapes): 223 | return False 224 | 225 | if not isinstance(result["items"], list) or len(result["items"]) != matrices[0].shape[0]: 226 | return False 227 | 228 | return True 229 | 230 | def pc_lightweight(self, 231 | X: Union[List, np.ndarray, pd.Series], 232 | Y: Union[List, np.ndarray, pd.Series], 233 | E: int, 234 | tau: int, 235 | metric: str = "euclidean", 236 | h: int = 1, 237 | weighted: bool = False, 238 | relative: bool = True, 239 | verbose: bool = None) -> pd.DataFrame: 240 | """ 241 | Pattern Causality Lightweight implementation 242 | 243 | Args: 244 | X: Input time series (causal variable) 245 | Y: Input time series (affected variable) 246 | E: Embedding dimension 247 | tau: Time delay 248 | metric: Distance metric to use 249 | h: Prediction horizon 250 | weighted: Whether to use weighted calculations 251 | relative: Whether to use relative differences (default: False for absolute) 252 | verbose: Override class-level verbose setting 253 | 254 | Returns: 255 | DataFrame containing causality metrics 256 | """ 257 | verbose = self.verbose if verbose is None else verbose 258 | start_time = time.time() 259 | 260 | X, Y = self._validate_input(X, Y) 261 | 262 | if verbose: 263 | stats = self._calculate_basic_stats(X, Y) 264 | self._print_if_verbose(f"\nInput Statistics:", verbose) 265 | self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose) 266 | self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose) 267 | self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose) 268 | self._print_if_verbose(f"Series length: {stats['X_length']}", verbose) 269 | if stats['missing_values'] > 0: 270 | self._print_if_verbose(f"Warning: {stats['missing_values']} missing values detected", verbose) 271 | 272 | # Initialize constants 273 | NNSPAN = E + 1 274 | CCSPAN = (E - 1) * tau 275 | hashedpatterns = patternhashing(E) 276 | 277 | if hashedpatterns is None or len(hashedpatterns) == 0: 278 | raise ValueError(f"Failed to generate hash patterns for E={E}") 279 | 280 | self._print_if_verbose(f"\nInitializing computation with E={E}, tau={tau}, h={h}", verbose) 281 | 282 | # Calculate shadow attractors 283 | self._print_if_verbose("Calculating state space and signatures...", verbose) 284 | Mx = statespace(X, E, tau) 285 | My = statespace(Y, E, tau) 286 | SMx = signaturespace(Mx, E, relative=relative) 287 | SMy = signaturespace(My, E, relative=relative) 288 | PSMx = patternspace(SMx, E) 289 | PSMy = patternspace(SMy, E) 290 | Dx = distancematrix(Mx, metric=metric) 291 | Dy = distancematrix(My, metric=metric) 292 | 293 | # Check time series length 294 | FCP = fcp(E, tau, h, X) 295 | al_loop_dur = range(FCP - 1, len(X) - (E - 1) * tau - h + 1) 296 | total_steps = len(al_loop_dur) 297 | 298 | self._print_if_verbose(f"\nProcessing time series...", verbose) 299 | self._print_if_verbose(f"Total time points to analyze: {total_steps}\n", verbose) 300 | 301 | # Initialize causality matrix 302 | predictedPCMatrix = databank("array", [3 ** (E - 1), 3 ** (E - 1), len(Y)]) 303 | real_loop = None 304 | processed_points = 0 305 | valid_points = 0 306 | processable_points = 0 # Points that can be processed (no NaN, within bounds) 307 | 308 | # Main computation loop 309 | for i in al_loop_dur: 310 | processed_points += 1 311 | 312 | # Update progress every 10% 313 | progress_interval = max(1, total_steps // 10) 314 | if verbose and processed_points % progress_interval == 0: 315 | progress_percent = min(100, (processed_points/total_steps) * 100) # Ensure we don't exceed 100% 316 | self._print_if_verbose(f"Progress: {processed_points}/{total_steps} points processed ({progress_percent:.1f}%)", verbose) 317 | 318 | if i + h >= len(My): 319 | continue 320 | 321 | # Check if point can be processed (no NaN values) 322 | if not np.any(np.isnan(Mx[i, :])) and not np.any(np.isnan(My[i + h, :])): 323 | processable_points += 1 324 | NNx = pastNNs(CCSPAN, NNSPAN, Mx, Dx, SMx, PSMx, i, h) 325 | 326 | if NNx is not None and not np.any(np.isnan(NNx["dists"])): 327 | if not np.any(np.isnan(Dy[i, NNx["times"] + h])): 328 | valid_points += 1 329 | if real_loop is None: 330 | real_loop = i 331 | else: 332 | real_loop = np.append(real_loop, i) 333 | 334 | projNNy = projectedNNs(My, Dy, SMy, PSMy, NNx["times"], i, h) 335 | predicted_result = predictionY(E=E, projNNy=projNNy, zeroTolerance=E-1) 336 | 337 | # Get patterns and signatures 338 | predictedSignatureY = predicted_result["predictedSignatureY"] 339 | predictedPatternY = predicted_result["predictedPatternY"] 340 | signatureX = SMx[i, :] 341 | patternX = PSMx[i] 342 | realSignatureY = SMy[i + h, :] 343 | realPatternY = PSMy[i + h] 344 | 345 | # Calculate PC matrix values 346 | pc = fillPCMatrix( 347 | weighted=weighted, 348 | predictedPatternY=predictedPatternY, 349 | realPatternY=realPatternY, 350 | predictedSignatureY=predictedSignatureY, 351 | realSignatureY=realSignatureY, 352 | patternX=patternX, 353 | signatureX=signatureX 354 | ) 355 | 356 | # Find pattern indices 357 | tolerance = 1e-10 358 | hashedpatterns = np.array(hashedpatterns, dtype=np.float64) 359 | patternX_val = np.float64(patternX.item()) 360 | predictedPatternY_val = np.float64(predictedPatternY) 361 | 362 | patternX_matches = np.where(np.abs(hashedpatterns - patternX_val) < tolerance)[0] 363 | predictedPatternY_matches = np.where(np.abs(hashedpatterns - predictedPatternY_val) < tolerance)[0] 364 | 365 | if len(patternX_matches) > 0 and len(predictedPatternY_matches) > 0: 366 | patternX_idx = patternX_matches[0] 367 | predictedPatternY_idx = predictedPatternY_matches[0] 368 | predictedPCMatrix[patternX_idx, predictedPatternY_idx, i] = pc["predicted"] 369 | 370 | # Print final progress update 371 | if verbose and processed_points > 0: 372 | self._print_if_verbose(f"Progress: {total_steps}/{total_steps} points processed (100.0%)\n", verbose) 373 | 374 | # Calculate causality metrics 375 | self._print_if_verbose("Calculating final causality metrics...", verbose) 376 | # Convert real_loop to integer type compatible with C++ NPY_LONG 377 | if real_loop is not None: 378 | real_loop = np.asarray(real_loop, dtype=np.int32) 379 | causality = natureOfCausality(predictedPCMatrix, real_loop, hashedpatterns, X, weighted) 380 | 381 | # Calculate percentages 382 | totalCausPercent = 1 - np.nanmean(causality["noCausality"]) 383 | mask = causality["noCausality"][real_loop] != 1 384 | 385 | if np.any(mask): 386 | valid_indices = real_loop[mask] 387 | valid_pos = causality["Positive"][valid_indices] 388 | valid_neg = causality["Negative"][valid_indices] 389 | valid_dark = causality["Dark"][valid_indices] 390 | 391 | valid_pos = valid_pos[~np.isnan(valid_pos)] 392 | valid_neg = valid_neg[~np.isnan(valid_neg)] 393 | valid_dark = valid_dark[~np.isnan(valid_dark)] 394 | 395 | posiCausPercent = np.mean(valid_pos) if len(valid_pos) > 0 else 0.0 396 | negaCausPercent = np.mean(valid_neg) if len(valid_neg) > 0 else 0.0 397 | darkCausPercent = np.mean(valid_dark) if len(valid_dark) > 0 else 0.0 398 | 399 | if weighted: 400 | total = posiCausPercent + negaCausPercent + darkCausPercent 401 | if total > 0: 402 | posiCausPercent /= total 403 | negaCausPercent /= total 404 | darkCausPercent /= total 405 | else: 406 | posiCausPercent = negaCausPercent = darkCausPercent = 0.0 407 | 408 | end_time = time.time() 409 | if verbose: 410 | self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose) 411 | self._print_if_verbose("\nProcessing Summary:", verbose) 412 | self._print_if_verbose(f"Total points analyzed: {total_steps}", verbose) 413 | self._print_if_verbose(f"Points with valid data: {processable_points}", verbose) 414 | self._print_if_verbose(f"Successfully processed: {valid_points}/{processable_points} ({(valid_points/processable_points)*100:.1f}%)", verbose) 415 | self._print_if_verbose("\nResults:", verbose) 416 | self._print_if_verbose(f"Total Causality: {totalCausPercent:.3f}", verbose) 417 | self._print_if_verbose(f"Positive Causality: {posiCausPercent:.3f}", verbose) 418 | self._print_if_verbose(f"Negative Causality: {negaCausPercent:.3f}", verbose) 419 | self._print_if_verbose(f"Dark Causality: {darkCausPercent:.3f}", verbose) 420 | 421 | return pd.DataFrame({ 422 | "Total Causality": [totalCausPercent], 423 | "Positive Causality": [posiCausPercent], 424 | "Negative Causality": [negaCausPercent], 425 | "Dark Causality": [darkCausPercent] 426 | }) 427 | 428 | def pc_matrix(self, 429 | dataset: Union[pd.DataFrame, np.ndarray, List], 430 | E: int, 431 | tau: int, 432 | metric: str = "euclidean", 433 | h: int = 1, 434 | weighted: bool = False, 435 | relative: bool = True, 436 | verbose: bool = None) -> pd.DataFrame: 437 | """ 438 | Calculate pattern causality matrix for multivariate time series 439 | 440 | Args: 441 | dataset: Input dataset 442 | E: Embedding dimension 443 | tau: Time delay 444 | metric: Distance metric to use 445 | h: Prediction horizon 446 | weighted: Whether to use weighted calculations 447 | relative: Whether to use relative differences (default: False for absolute) 448 | verbose: Override class-level verbose setting 449 | 450 | Returns: 451 | pd.DataFrame: Flattened causality matrix where: 452 | - Each row represents a pair of variables (from_var, to_var) 453 | - Columns are ['from_var', 'to_var', 'positive', 'negative', 'dark'] 454 | - NaN values indicate self-causality (when from_var == to_var) 455 | """ 456 | verbose = self.verbose if verbose is None else verbose 457 | start_time = time.time() 458 | 459 | dataset = self._validate_dataset(dataset) 460 | n_cols = dataset.shape[1] 461 | 462 | if verbose: 463 | self._print_if_verbose(f"\nAnalyzing dataset with {n_cols} variables", verbose) 464 | self._print_if_verbose(f"Parameters: E={E}, tau={tau}, h={h}", verbose) 465 | 466 | # Basic dataset statistics 467 | self._print_if_verbose("\nDataset Statistics:", verbose) 468 | for i in range(n_cols): 469 | col = dataset.iloc[:, i] 470 | self._print_if_verbose(f"Variable {i}: mean={col.mean():.3f}, std={col.std():.3f}", verbose) 471 | 472 | # Get variable names 473 | items = dataset.columns.tolist() if dataset.columns is not None else [f"Var_{i}" for i in range(n_cols)] 474 | 475 | # Initialize results list 476 | results = [] 477 | 478 | total_pairs = n_cols * (n_cols - 1) 479 | processed_pairs = 0 480 | 481 | for i in range(n_cols): 482 | X = dataset.iloc[:, i].values.tolist() 483 | 484 | for j in range(n_cols): 485 | if i != j: 486 | processed_pairs += 1 487 | if verbose: 488 | self._print_if_verbose(f"\nAnalyzing pair ({items[i]}, {items[j]}) - Progress: {processed_pairs}/{total_pairs}", verbose) 489 | 490 | if fcp(E, tau, h, X): 491 | Y = dataset.iloc[:, j].values.tolist() 492 | if fcp(E, tau, h, Y): 493 | temp = self.pc_lightweight( 494 | X=X, 495 | Y=Y, 496 | E=E, 497 | tau=tau, 498 | metric=metric, 499 | h=h, 500 | weighted=weighted, 501 | relative=relative, 502 | verbose=False 503 | ) 504 | 505 | # Store results in flattened format 506 | results.append({ 507 | 'from_var': items[i], 508 | 'to_var': items[j], 509 | 'positive': temp["Positive Causality"].values[0], 510 | 'negative': temp["Negative Causality"].values[0], 511 | 'dark': temp["Dark Causality"].values[0] 512 | }) 513 | 514 | if verbose: 515 | self._print_if_verbose(f"Results for ({items[i]}, {items[j]}):", verbose) 516 | self._print_if_verbose(f" Positive: {results[-1]['positive']:.3f}", verbose) 517 | self._print_if_verbose(f" Negative: {results[-1]['negative']:.3f}", verbose) 518 | self._print_if_verbose(f" Dark: {results[-1]['dark']:.3f}", verbose) 519 | else: 520 | # Add NaN values for self-causality 521 | results.append({ 522 | 'from_var': items[i], 523 | 'to_var': items[j], 524 | 'positive': np.nan, 525 | 'negative': np.nan, 526 | 'dark': np.nan 527 | }) 528 | 529 | if verbose: 530 | end_time = time.time() 531 | self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose) 532 | 533 | # Create DataFrame from results 534 | result_df = pd.DataFrame(results) 535 | 536 | # Optional: Sort by from_var and to_var for consistency 537 | result_df = result_df.sort_values(['from_var', 'to_var']).reset_index(drop=True) 538 | 539 | return result_df 540 | 541 | def to_matrix(self, flat_df: pd.DataFrame) -> Dict[str, pd.DataFrame]: 542 | """Convert flattened causality results to matrix format 543 | 544 | Args: 545 | flat_df: Flattened DataFrame from pc_matrix method 546 | 547 | Returns: 548 | Dictionary containing three matrices: 549 | - 'positive': Matrix of positive causality values 550 | - 'negative': Matrix of negative causality values 551 | - 'dark': Matrix of dark causality values 552 | """ 553 | # Get unique variable names 554 | variables = sorted(list(set(flat_df['from_var'].unique()) | set(flat_df['to_var'].unique()))) 555 | n = len(variables) 556 | 557 | # Initialize matrices 558 | matrices = { 559 | 'positive': pd.DataFrame(np.nan, index=variables, columns=variables), 560 | 'negative': pd.DataFrame(np.nan, index=variables, columns=variables), 561 | 'dark': pd.DataFrame(np.nan, index=variables, columns=variables) 562 | } 563 | 564 | # Fill matrices 565 | for _, row in flat_df.iterrows(): 566 | matrices['positive'].loc[row['from_var'], row['to_var']] = row['positive'] 567 | matrices['negative'].loc[row['from_var'], row['to_var']] = row['negative'] 568 | matrices['dark'].loc[row['from_var'], row['to_var']] = row['dark'] 569 | 570 | return matrices 571 | 572 | def format_effects(self, effect_df: pd.DataFrame) -> Dict[str, pd.DataFrame]: 573 | """Format effect results into matrices suitable for visualization 574 | 575 | This method transforms the effect results from pc_effect into a matrix format 576 | that is particularly suitable for visualization and analysis. The output matrices 577 | contain information about received and exerted effects, as well as their differences. 578 | 579 | Args: 580 | effect_df: DataFrame from pc_effect method containing causality effect metrics 581 | 582 | Returns: 583 | Dictionary containing three matrices: 584 | - 'positive': Matrix with columns [Received, Exerted, Difference] 585 | - 'negative': Matrix with columns [Received, Exerted, Difference] 586 | - 'dark': Matrix with columns [Received, Exerted, Difference] 587 | Each row represents a variable. 588 | 589 | Example: 590 | >>> pc = pattern_causality() 591 | >>> effects = pc.pc_effect(matrix_results) 592 | >>> effect_matrices = pc.format_effects(effects) 593 | >>> # Scatter plot for positive causality 594 | >>> plt.figure(figsize=(10, 6)) 595 | >>> plt.scatter(effect_matrices['positive']['Received'], 596 | ... effect_matrices['positive']['Exerted']) 597 | >>> plt.xlabel('Received Effects') 598 | >>> plt.ylabel('Exerted Effects') 599 | >>> plt.title('Positive Causality: Received vs Exerted Effects') 600 | >>> plt.show() 601 | 602 | Raises: 603 | KeyError: If required columns are missing from effect_df 604 | ValueError: If effect_df has invalid structure 605 | """ 606 | # Remove the 'Mean' row if it exists 607 | if 'Mean' in effect_df.index: 608 | effect_df = effect_df.drop('Mean') 609 | 610 | # Initialize the three matrices 611 | matrices = { 612 | 'positive': pd.DataFrame(index=effect_df.index), 613 | 'negative': pd.DataFrame(index=effect_df.index), 614 | 'dark': pd.DataFrame(index=effect_df.index) 615 | } 616 | 617 | # Fill the matrices 618 | for causality_type in ['positive', 'negative', 'dark']: 619 | type_cap = causality_type.capitalize() 620 | matrices[causality_type]['Received'] = effect_df[f'{type_cap}_Received'] 621 | matrices[causality_type]['Exerted'] = effect_df[f'{type_cap}_Exerted'] 622 | matrices[causality_type]['Difference'] = effect_df[f'{type_cap}_Difference'] 623 | 624 | return matrices 625 | 626 | def pc_effect(self, 627 | pcmatrix: pd.DataFrame, 628 | verbose: bool = None) -> pd.DataFrame: 629 | """ 630 | Calculate effect metrics from pattern causality matrices 631 | 632 | Args: 633 | pcmatrix: DataFrame from pc_matrix function (flattened format) 634 | verbose: Override class-level verbose setting 635 | 636 | Returns: 637 | pd.DataFrame: Effect metrics for each variable with columns: 638 | [Positive/Negative/Dark]_[Received/Exerted/Difference] 639 | """ 640 | verbose = self.verbose if verbose is None else verbose 641 | 642 | # Convert to matrix format first 643 | matrices = self.to_matrix(pcmatrix) 644 | 645 | if verbose: 646 | self._print_if_verbose("\nCalculating causality effects...", verbose) 647 | n_vars = len(matrices['positive']) 648 | self._print_if_verbose(f"Number of variables: {n_vars}", verbose) 649 | 650 | # Initialize results dictionary 651 | results = {} 652 | variables = matrices['positive'].index 653 | 654 | # Calculate metrics for each causality type 655 | for causality_type in ['positive', 'negative', 'dark']: 656 | matrix = matrices[causality_type].values 657 | 658 | # Calculate metrics 659 | received = np.nansum(matrix, axis=0) * 100 # Sum along rows (received effects) 660 | exerted = np.nansum(matrix, axis=1) * 100 # Sum along columns (exerted effects) 661 | diff = received - exerted 662 | 663 | # Store results 664 | results[f'{causality_type.capitalize()}_Received'] = received 665 | results[f'{causality_type.capitalize()}_Exerted'] = exerted 666 | results[f'{causality_type.capitalize()}_Difference'] = diff 667 | 668 | if verbose: 669 | self._print_if_verbose(f"\n{causality_type.capitalize()} Effects:", verbose) 670 | self._print_if_verbose(f"Mean Received: {np.nanmean(received):.2f}%", verbose) 671 | self._print_if_verbose(f"Mean Exerted: {np.nanmean(exerted):.2f}%", verbose) 672 | self._print_if_verbose(f"Mean Difference: {np.nanmean(diff):.2f}%", verbose) 673 | 674 | # Add detailed statistics for top effects 675 | if np.any(~np.isnan(received)): 676 | self._print_if_verbose("\nTop Variables by Effect:", verbose) 677 | self._print_if_verbose("Received Effects:", verbose) 678 | sorted_idx = np.argsort(received)[-3:] 679 | for idx in sorted_idx[::-1]: 680 | self._print_if_verbose(f" {variables[idx]}: {received[idx]:.2f}%", verbose) 681 | 682 | self._print_if_verbose("Exerted Effects:", verbose) 683 | sorted_idx = np.argsort(exerted)[-3:] 684 | for idx in sorted_idx[::-1]: 685 | self._print_if_verbose(f" {variables[idx]}: {exerted[idx]:.2f}%", verbose) 686 | 687 | # Create DataFrame with results 688 | result_df = pd.DataFrame(results, index=variables) 689 | 690 | # Add summary row 691 | summary = pd.DataFrame({ 692 | col: np.nanmean(result_df[col]) 693 | for col in result_df.columns 694 | }, index=['Mean']) 695 | 696 | result_df = pd.concat([result_df, summary]) 697 | 698 | return result_df 699 | 700 | def pc_accuracy(self, 701 | dataset: Union[pd.DataFrame, np.ndarray, List], 702 | E: int, 703 | tau: int, 704 | metric: str, 705 | h: int, 706 | weighted: bool, 707 | relative: bool = True) -> pd.DataFrame: 708 | """ 709 | Calculate pattern causality accuracy metrics for a dataset. 710 | 711 | Args: 712 | dataset: Input dataset (DataFrame, numpy array, or list) 713 | E: Embedding dimension 714 | tau: Time delay 715 | metric: Distance metric to use 716 | h: Prediction horizon 717 | weighted: Whether to use weighted calculations 718 | relative: Whether to use relative differences (default: False for absolute) 719 | 720 | Returns: 721 | pd.DataFrame: Accuracy metrics with shape (1, 6) 722 | Columns: ['E', 'tau', 'total', 'positive', 'negative', 'dark'] 723 | """ 724 | # Convert input to numpy array properly 725 | dataset = self._validate_dataset(dataset) 726 | n_cols = dataset.shape[1] 727 | 728 | couplingsTotal = databank("matrix", [n_cols, n_cols]) 729 | couplingsPosi = databank("matrix", [n_cols, n_cols]) 730 | couplingsNega = databank("matrix", [n_cols, n_cols]) 731 | couplingsDark = databank("matrix", [n_cols, n_cols]) 732 | 733 | # Calculate causality for each pair of variables 734 | for i in range(n_cols): 735 | for j in range(n_cols): 736 | if i != j: 737 | X_list = dataset.iloc[:, i].values.tolist() 738 | Y_list = dataset.iloc[:, j].values.tolist() 739 | 740 | # Check if enough data points for causality calculation 741 | if fcp(E, tau, h, X_list) and fcp(E, tau, h, Y_list): 742 | # Calculate pattern causality 743 | results = self.pc_lightweight( 744 | X_list, Y_list, E, tau, metric, h, weighted, relative 745 | ) 746 | 747 | # Store results 748 | couplingsTotal[i, j] = results["Total Causality"].values[0] 749 | couplingsPosi[i, j] = results["Positive Causality"].values[0] 750 | couplingsNega[i, j] = results["Negative Causality"].values[0] 751 | couplingsDark[i, j] = results["Dark Causality"].values[0] 752 | 753 | # Calculate mean metrics 754 | results = pd.DataFrame({ 755 | 'E': [E], 756 | 'tau': [tau], 757 | 'total': [np.nanmean(couplingsTotal)], 758 | 'positive': [np.nanmean(couplingsPosi)], 759 | 'negative': [np.nanmean(couplingsNega)], 760 | 'dark': [np.nanmean(couplingsDark)] 761 | }) 762 | 763 | return results 764 | 765 | def optimal_parameters_search(self, 766 | Emax: int, 767 | tau_max: int, 768 | metric: str = "euclidean", 769 | h: int = 1, 770 | weighted: bool = False, 771 | relative: bool = True, 772 | dataset: Union[pd.DataFrame, np.ndarray, List] = None, 773 | verbose: bool = None) -> pd.DataFrame: 774 | """ 775 | Search for optimal parameters E and tau for pattern causality analysis. 776 | 777 | Args: 778 | Emax: Maximum embedding dimension (must be > 2) 779 | tau_max: Maximum time delay 780 | metric: Distance metric to use 781 | h: Prediction horizon 782 | weighted: Whether to use weighted calculations 783 | relative: Whether to use relative differences (default: False for absolute) 784 | dataset: Input dataset (DataFrame, numpy array, or list) 785 | verbose: Override class-level verbose setting 786 | 787 | Returns: 788 | DataFrame containing accuracy results for different parameter combinations 789 | """ 790 | verbose = self.verbose if verbose is None else verbose 791 | start_time = time.time() 792 | 793 | if dataset is None: 794 | raise ValueError("Dataset must be provided") 795 | 796 | if Emax < 3: 797 | raise ValueError("Please enter the Emax with the number > 2") 798 | 799 | # Validate dataset 800 | dataset = self._validate_dataset(dataset) 801 | 802 | if verbose: 803 | self._print_if_verbose(f"\nStarting parameter search:", verbose) 804 | self._print_if_verbose(f"Dataset shape: {dataset.shape}", verbose) 805 | self._print_if_verbose(f"E range: 2 to {Emax}", verbose) 806 | self._print_if_verbose(f"tau range: 1 to {tau_max}", verbose) 807 | 808 | E_array = range(2, Emax + 1) 809 | tau_array = range(1, tau_max + 1) 810 | total_combinations = len(E_array) * len(tau_array) 811 | 812 | if verbose: 813 | self._print_if_verbose(f"Total parameter combinations to test: {total_combinations}", verbose) 814 | 815 | # Initialize matrices using databank 816 | tests_total = databank("matrix", [len(E_array), len(tau_array)]) 817 | tests_posi = databank("matrix", [len(E_array), len(tau_array)]) 818 | tests_nega = databank("matrix", [len(E_array), len(tau_array)]) 819 | tests_dark = databank("matrix", [len(E_array), len(tau_array)]) 820 | 821 | combinations_tested = 0 822 | best_score = -np.inf 823 | best_params = None 824 | 825 | # Main calculation loop 826 | for i, E in enumerate(E_array): 827 | for j, tau in enumerate(tau_array): 828 | combinations_tested += 1 829 | if verbose: 830 | self._print_if_verbose(f"\nTesting combination {combinations_tested}/{total_combinations}", verbose) 831 | self._print_if_verbose(f"Parameters: E={E}, tau={tau}", verbose) 832 | 833 | temp = self.pc_accuracy( 834 | dataset=dataset, 835 | E=E, 836 | tau=tau, 837 | metric=metric, 838 | h=h, 839 | weighted=weighted, 840 | relative=relative 841 | ) 842 | 843 | # Store results 844 | total_score = temp["total"].values[0] 845 | tests_total[i, j] = total_score 846 | tests_posi[i, j] = temp["positive"].values[0] 847 | tests_nega[i, j] = temp["negative"].values[0] 848 | tests_dark[i, j] = temp["dark"].values[0] 849 | 850 | # Track best parameters 851 | if total_score > best_score: 852 | best_score = total_score 853 | best_params = (E, tau) 854 | 855 | if verbose: 856 | self._print_if_verbose(f"Results:", verbose) 857 | self._print_if_verbose(f" Total: {total_score:.3f}", verbose) 858 | self._print_if_verbose(f" Positive: {temp['positive'].values[0]:.3f}", verbose) 859 | self._print_if_verbose(f" Negative: {temp['negative'].values[0]:.3f}", verbose) 860 | self._print_if_verbose(f" Dark: {temp['dark'].values[0]:.3f}", verbose) 861 | 862 | # Process results 863 | accuracy_summary = [] 864 | for i, E in enumerate(E_array): 865 | for j, tau in enumerate(tau_array): 866 | row_data = { 867 | "E": E, 868 | "tau": tau, 869 | "Total": tests_total[i, j], 870 | "of which Positive": tests_posi[i, j], 871 | "of which Negative": tests_nega[i, j], 872 | "of which Dark": tests_dark[i, j], 873 | } 874 | accuracy_summary.append(row_data) 875 | 876 | # Create final DataFrame without custom index 877 | accuracy_df = pd.DataFrame(accuracy_summary) 878 | 879 | end_time = time.time() 880 | time_taken = end_time - start_time 881 | 882 | if verbose: 883 | self._print_if_verbose(f"\nParameter search completed in {time_taken:.2f} seconds", verbose) 884 | self._print_if_verbose(f"Best parameters found: E={best_params[0]}, tau={best_params[1]}", verbose) 885 | self._print_if_verbose(f"Best total score: {best_score:.3f}", verbose) 886 | 887 | # Additional statistics 888 | self._print_if_verbose("\nParameter Search Statistics:", verbose) 889 | self._print_if_verbose(f"Mean total score: {np.mean(tests_total):.3f}", verbose) 890 | self._print_if_verbose(f"Std total score: {np.std(tests_total):.3f}", verbose) 891 | self._print_if_verbose(f"Score range: [{np.min(tests_total):.3f}, {np.max(tests_total):.3f}]", verbose) 892 | 893 | return accuracy_df 894 | 895 | def pc_full_details(self, 896 | X: Union[List, np.ndarray, pd.Series], 897 | Y: Union[List, np.ndarray, pd.Series], 898 | E: int, 899 | tau: int, 900 | metric: str = "euclidean", 901 | h: int = 1, 902 | weighted: bool = False, 903 | relative: bool = True, 904 | verbose: bool = None) -> pd.DataFrame: 905 | """ 906 | Pattern Causality Full Details implementation 907 | 908 | Args: 909 | X: Input time series (causal variable) 910 | Y: Input time series (affected variable) 911 | E: Embedding dimension 912 | tau: Time delay 913 | metric: Distance metric to use 914 | h: Prediction horizon 915 | weighted: Whether to use weighted calculations 916 | relative: Whether to use relative differences (default: False for absolute) 917 | verbose: Override class-level verbose setting 918 | 919 | Returns: 920 | pd.DataFrame: Detailed causality metrics for each time point 921 | Columns: ['No Causality', 'Positive Causality', 922 | 'Negative Causality', 'Dark Causality'] 923 | Each row represents a time point. For weighted=True, 924 | values are erf calculation results. For weighted=False, 925 | exactly one column will have value 1 and others 0. 926 | Points outside the valid range will be NaN. 927 | """ 928 | verbose = self.verbose if verbose is None else verbose 929 | start_time = time.time() 930 | 931 | X, Y = self._validate_input(X, Y) 932 | 933 | if verbose: 934 | stats = self._calculate_basic_stats(X, Y) 935 | self._print_if_verbose(f"\nInput Statistics:", verbose) 936 | self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose) 937 | self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose) 938 | self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose) 939 | self._print_if_verbose(f"Series length: {stats['X_length']}", verbose) 940 | 941 | # Initialize constants 942 | NNSPAN = E + 1 943 | CCSPAN = (E - 1) * tau 944 | hashedpatterns = patternhashing(E) 945 | 946 | if hashedpatterns is None or len(hashedpatterns) == 0: 947 | raise ValueError(f"Failed to generate hash patterns for E={E}") 948 | 949 | if verbose: 950 | self._print_if_verbose(f"\nInitializing computation with E={E}, tau={tau}, h={h}", verbose) 951 | 952 | # Calculate shadow attractors 953 | Mx = statespace(X, E, tau) 954 | My = statespace(Y, E, tau) 955 | SMx = signaturespace(Mx, E, relative=relative) 956 | SMy = signaturespace(My, E, relative=relative) 957 | PSMx = patternspace(SMx, E) 958 | PSMy = patternspace(SMy, E) 959 | Dx = distancematrix(Mx, metric=metric) 960 | Dy = distancematrix(My, metric=metric) 961 | 962 | # Check time series length 963 | FCP = fcp(E, tau, h, X) 964 | al_loop_dur = range(FCP - 1, len(X) - (E - 1) * tau - h + 1) 965 | total_steps = len(al_loop_dur) 966 | 967 | if verbose: 968 | self._print_if_verbose(f"\nProcessing {total_steps} time points...", verbose) 969 | 970 | # Initialize causality matrix 971 | predictedPCMatrix = databank("array", [3 ** (E - 1), 3 ** (E - 1), len(Y)]) 972 | real_loop = None 973 | processed_points = 0 974 | valid_points = 0 975 | 976 | # Main computation loop 977 | for i in al_loop_dur: 978 | processed_points += 1 979 | if verbose and processed_points % max(1, total_steps // 10) == 0: 980 | self._print_if_verbose(f"Progress: {processed_points}/{total_steps} points processed ({(processed_points/total_steps)*100:.1f}%)", verbose) 981 | 982 | if i + h >= len(My): 983 | continue 984 | 985 | if not np.any(np.isnan(Mx[i, :])) and not np.any(np.isnan(My[i + h, :])): 986 | NNx = pastNNs(CCSPAN, NNSPAN, Mx, Dx, SMx, PSMx, i, h) 987 | 988 | if NNx is not None and not np.any(np.isnan(NNx["dists"])): 989 | if not np.any(np.isnan(Dy[i, NNx["times"] + h])): 990 | valid_points += 1 991 | if real_loop is None: 992 | real_loop = i 993 | else: 994 | real_loop = np.append(real_loop, i) 995 | 996 | projNNy = projectedNNs(My, Dy, SMy, PSMy, NNx["times"], i, h) 997 | predicted_result = predictionY(E=E, projNNy=projNNy, zeroTolerance=E-1) 998 | 999 | # Get patterns and signatures 1000 | predictedSignatureY = predicted_result["predictedSignatureY"] 1001 | predictedPatternY = predicted_result["predictedPatternY"] 1002 | signatureX = SMx[i, :] 1003 | patternX = PSMx[i] 1004 | realSignatureY = SMy[i + h, :] 1005 | realPatternY = PSMy[i + h] 1006 | 1007 | # Calculate PC matrix values 1008 | pc = fillPCMatrix( 1009 | weighted=weighted, 1010 | predictedPatternY=predictedPatternY, 1011 | realPatternY=realPatternY, 1012 | predictedSignatureY=predictedSignatureY, 1013 | realSignatureY=realSignatureY, 1014 | patternX=patternX, 1015 | signatureX=signatureX 1016 | ) 1017 | 1018 | # Find pattern indices 1019 | tolerance = 1e-10 1020 | hashedpatterns = np.array(hashedpatterns, dtype=np.float64) 1021 | patternX_val = np.float64(patternX.item()) 1022 | predictedPatternY_val = np.float64(predictedPatternY) 1023 | 1024 | patternX_matches = np.where(np.abs(hashedpatterns - patternX_val) < tolerance)[0] 1025 | predictedPatternY_matches = np.where(np.abs(hashedpatterns - predictedPatternY_val) < tolerance)[0] 1026 | 1027 | if len(patternX_matches) > 0 and len(predictedPatternY_matches) > 0: 1028 | patternX_idx = patternX_matches[0] 1029 | predictedPatternY_idx = predictedPatternY_matches[0] 1030 | predictedPCMatrix[patternX_idx, predictedPatternY_idx, i] = pc["predicted"] 1031 | 1032 | # Print final progress update 1033 | if verbose: 1034 | self._print_if_verbose(f"Progress: {total_steps}/{total_steps} points processed (100.0%)\n", verbose) 1035 | 1036 | # Calculate causality metrics 1037 | causality = natureOfCausality(predictedPCMatrix, real_loop, hashedpatterns, X, weighted) 1038 | 1039 | # Create DataFrame with NaN values 1040 | result_df = pd.DataFrame( 1041 | np.full((len(X), 4), np.nan), 1042 | columns=['No Causality', 'Positive Causality', 'Negative Causality', 'Dark Causality'] 1043 | ) 1044 | 1045 | # Calculate valid range 1046 | start_idx = FCP - 1 1047 | end_idx = len(X) - (E - 1) * tau - h 1048 | 1049 | # Fill in causality values for the valid range 1050 | if weighted: 1051 | # For weighted=True, use the actual values from causality 1052 | for i in range(len(X)): 1053 | if i in real_loop: 1054 | result_df.loc[i, 'No Causality'] = causality['noCausality'][i] 1055 | result_df.loc[i, 'Positive Causality'] = causality['Positive'][i] 1056 | result_df.loc[i, 'Negative Causality'] = causality['Negative'][i] 1057 | result_df.loc[i, 'Dark Causality'] = causality['Dark'][i] 1058 | else: 1059 | # For weighted=False, use binary values (0 or 1) 1060 | for i in range(len(X)): 1061 | if i in real_loop: 1062 | if causality['noCausality'][i] == 1: 1063 | result_df.loc[i, 'No Causality'] = 1 1064 | result_df.loc[i, ['Positive Causality', 'Negative Causality', 'Dark Causality']] = 0 1065 | elif causality['Positive'][i] == 1: 1066 | result_df.loc[i, 'Positive Causality'] = 1 1067 | result_df.loc[i, ['No Causality', 'Negative Causality', 'Dark Causality']] = 0 1068 | elif causality['Negative'][i] == 1: 1069 | result_df.loc[i, 'Negative Causality'] = 1 1070 | result_df.loc[i, ['No Causality', 'Positive Causality', 'Dark Causality']] = 0 1071 | elif causality['Dark'][i] == 1: 1072 | result_df.loc[i, 'Dark Causality'] = 1 1073 | result_df.loc[i, ['No Causality', 'Positive Causality', 'Negative Causality']] = 0 1074 | 1075 | # Add summary row (counting only non-NaN values) 1076 | summary = pd.DataFrame({ 1077 | 'No Causality': [np.sum(~np.isnan(result_df['No Causality']) & (result_df['No Causality'] > 0))], 1078 | 'Positive Causality': [np.sum(~np.isnan(result_df['Positive Causality']) & (result_df['Positive Causality'] > 0))], 1079 | 'Negative Causality': [np.sum(~np.isnan(result_df['Negative Causality']) & (result_df['Negative Causality'] > 0))], 1080 | 'Dark Causality': [np.sum(~np.isnan(result_df['Dark Causality']) & (result_df['Dark Causality'] > 0))] 1081 | }) 1082 | 1083 | result_df = pd.concat([result_df, summary]) 1084 | 1085 | if verbose: 1086 | end_time = time.time() 1087 | self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose) 1088 | self._print_if_verbose("\nCausality Summary:", verbose) 1089 | self._print_if_verbose(f"Valid range: points {start_idx} to {end_idx}", verbose) 1090 | self._print_if_verbose(f"No Causality Points: {int(summary['No Causality'].values[0])}", verbose) 1091 | self._print_if_verbose(f"Positive Causality Points: {int(summary['Positive Causality'].values[0])}", verbose) 1092 | self._print_if_verbose(f"Negative Causality Points: {int(summary['Negative Causality'].values[0])}", verbose) 1093 | self._print_if_verbose(f"Dark Causality Points: {int(summary['Dark Causality'].values[0])}", verbose) 1094 | 1095 | return result_df 1096 | 1097 | def pc_cross_validation(self, 1098 | X: Union[List, np.ndarray, pd.Series], 1099 | Y: Union[List, np.ndarray, pd.Series], 1100 | E: int, 1101 | tau: int, 1102 | metric: str, 1103 | h: int, 1104 | weighted: bool, 1105 | numberset: Sequence[int], 1106 | relative: bool = True, 1107 | verbose: bool = None) -> pd.DataFrame: 1108 | """ 1109 | Perform cross validation for pattern causality analysis 1110 | 1111 | Args: 1112 | X: Input time series (causal variable) 1113 | Y: Input time series (affected variable) 1114 | E: Embedding dimension 1115 | tau: Time delay 1116 | metric: Distance metric to use 1117 | h: Prediction horizon 1118 | weighted: Whether to use weighted calculations 1119 | numberset: Sequence of sample sizes to test 1120 | relative: Whether to use relative differences (default: False for absolute) 1121 | verbose: Override class-level verbose setting 1122 | 1123 | Returns: 1124 | DataFrame containing cross validation results 1125 | """ 1126 | verbose = self.verbose if verbose is None else verbose 1127 | start_time = time.time() 1128 | 1129 | if not isinstance(numberset, (list, tuple, np.ndarray)): 1130 | raise TypeError("Please enter the vector of the sample number.") 1131 | 1132 | X, Y = self._validate_input(X, Y) 1133 | X = np.array(X) 1134 | Y = np.array(Y) 1135 | 1136 | if max(numberset) > len(X): 1137 | raise ValueError("The sample number is larger than the dataset.") 1138 | 1139 | if verbose: 1140 | stats = self._calculate_basic_stats(X, Y) 1141 | self._print_if_verbose(f"\nCross Validation Setup:", verbose) 1142 | self._print_if_verbose(f"Total data points: {len(X)}", verbose) 1143 | self._print_if_verbose(f"Sample sizes to test: {numberset}", verbose) 1144 | self._print_if_verbose(f"Parameters: E={E}, tau={tau}, h={h}", verbose) 1145 | self._print_if_verbose(f"\nInput Statistics:", verbose) 1146 | self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose) 1147 | self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose) 1148 | self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose) 1149 | 1150 | numbers = np.sort(numberset) 1151 | positive = databank("vector", [len(numberset)]) 1152 | negative = databank("vector", [len(numberset)]) 1153 | dark = databank("vector", [len(numberset)]) 1154 | 1155 | total_samples = len(numbers) 1156 | 1157 | for i, n in enumerate(numbers): 1158 | if verbose: 1159 | self._print_if_verbose(f"\nProcessing sample size {n} ({i+1}/{total_samples})", verbose) 1160 | 1161 | sample_indices = np.random.choice(len(X), size=n, replace=False) 1162 | sample_x = X[sample_indices] 1163 | sample_y = Y[sample_indices] 1164 | 1165 | results = self.pc_lightweight( 1166 | X=sample_x, 1167 | Y=sample_y, 1168 | E=E, 1169 | tau=tau, 1170 | metric=metric, 1171 | h=h, 1172 | weighted=weighted, 1173 | relative=relative, 1174 | verbose=False # Suppress verbose output for individual calculations 1175 | ) 1176 | 1177 | positive[i] = results["Positive Causality"].values[0] 1178 | negative[i] = results["Negative Causality"].values[0] 1179 | dark[i] = results["Dark Causality"].values[0] 1180 | 1181 | if verbose: 1182 | self._print_if_verbose(f"Results for n={n}:", verbose) 1183 | self._print_if_verbose(f" Positive: {positive[i]:.3f}", verbose) 1184 | self._print_if_verbose(f" Negative: {negative[i]:.3f}", verbose) 1185 | self._print_if_verbose(f" Dark: {dark[i]:.3f}", verbose) 1186 | 1187 | results_df = pd.DataFrame({ 1188 | "positive": positive, 1189 | "negative": negative, 1190 | "dark": dark 1191 | }, index=numbers) 1192 | 1193 | if verbose: 1194 | end_time = time.time() 1195 | self._print_if_verbose(f"\nCross validation completed in {end_time - start_time:.2f} seconds", verbose) 1196 | self._print_if_verbose("\nSummary Statistics:", verbose) 1197 | self._print_if_verbose("Positive Causality:", verbose) 1198 | self._print_if_verbose(f" Mean: {np.mean(positive):.3f}", verbose) 1199 | self._print_if_verbose(f" Std: {np.std(positive):.3f}", verbose) 1200 | self._print_if_verbose("Negative Causality:", verbose) 1201 | self._print_if_verbose(f" Mean: {np.mean(negative):.3f}", verbose) 1202 | self._print_if_verbose(f" Std: {np.std(negative):.3f}", verbose) 1203 | self._print_if_verbose("Dark Causality:", verbose) 1204 | self._print_if_verbose(f" Mean: {np.mean(dark):.3f}", verbose) 1205 | self._print_if_verbose(f" Std: {np.std(dark):.3f}", verbose) 1206 | 1207 | return results_df 1208 | 1209 | @staticmethod 1210 | def load_data(file_path: str, sep: str = ",", header: Union[int, None] = 0) -> pd.DataFrame: 1211 | """Load data from a file into a pandas DataFrame. 1212 | 1213 | Args: 1214 | file_path: Path to the data file 1215 | sep: Separator used in the file (default: ",") 1216 | header: Row number to use as column names (default: 0) 1217 | Use None if there is no header 1218 | 1219 | Returns: 1220 | pd.DataFrame: Loaded data 1221 | 1222 | Raises: 1223 | FileNotFoundError: If the file does not exist 1224 | ValueError: If the file cannot be parsed 1225 | """ 1226 | try: 1227 | data = pd.read_csv(file_path, sep=sep, header=header) 1228 | return data 1229 | except FileNotFoundError: 1230 | raise FileNotFoundError(f"Data file not found: {file_path}") 1231 | except Exception as e: 1232 | raise ValueError(f"Error loading data: {str(e)}") 1233 | 1234 | @staticmethod 1235 | def get_supported_metrics() -> List[str]: 1236 | """Return list of supported distance metrics 1237 | 1238 | Returns: 1239 | List of supported metric names 1240 | """ 1241 | return ["euclidean", "manhattan", "chebyshev", "minkowski"] 1242 | 1243 | def get_parameter_ranges(self) -> Dict[str, Tuple[int, int]]: 1244 | """Return recommended parameter ranges 1245 | 1246 | Returns: 1247 | Dictionary with parameter names and their recommended ranges 1248 | """ 1249 | return { 1250 | "E": (2, 10), # Embedding dimension 1251 | "tau": (1, 5), # Time delay 1252 | "h": (1, 3) # Prediction horizon 1253 | } 1254 | 1255 | --------------------------------------------------------------------------------