├── tests
    ├── __init__.py
    ├── test_basic.py
    ├── test_utils.py
    └── test_advanced.py
├── .flake8
├── requirements.txt
├── MANIFEST.in
├── pattern_causality
    ├── cpp
    │   ├── __init__.py
    │   ├── fcp.cpp
    │   ├── patternhashing.cpp
    │   ├── patternspace.cpp
    │   ├── distancematrix.cpp
    │   ├── databank.cpp
    │   ├── signaturespace.cpp
    │   ├── statespace.cpp
    │   ├── pastNNs.cpp
    │   ├── natureOfCausality.cpp
    │   ├── predictionY.cpp
    │   ├── projectedNNs.cpp
    │   └── fillPCMatrix.cpp
    ├── __init__.py
    ├── datasets.py
    ├── data
    │   └── Climate_Indices.csv
    └── pattern_causality.py
├── LICENSE
├── pyproject.toml
├── meta.yaml
├── .github
    └── workflows
    │   ├── lint.yml
    │   └── tests.yml
├── setup.py
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | extend-ignore = E203
4 | exclude = .git,__pycache__,build,dist
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.19.0
2 | pandas>=1.0.0
3 | pytest>=6.0.0
4 | pytest-cov>=2.0.0
5 | black>=22.0.0
6 | isort>=5.0.0
7 | flake8>=4.0.0
8 | mypy>=0.900
9 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.md
 2 | include LICENSE
 3 | include setup.py
 4 | include requirements.txt
 5 | include pyproject.toml
 6 | 
 7 | # C++ source files
 8 | recursive-include pattern_causality/cpp *.cpp
 9 | recursive-include pattern_causality/cpp *.h
10 | recursive-include pattern_causality/cpp *.hpp
11 | 
12 | # Data files
13 | recursive-include pattern_causality/data *.csv
14 | 
15 | # Python source files
16 | recursive-include pattern_causality *.py
17 | 
18 | # Include all header files
19 | recursive-include . *.h
20 | recursive-include . *.hpp
21 | 
22 | # Exclude unnecessary files
23 | global-exclude *.py[cod]
24 | global-exclude __pycache__
25 | global-exclude *.so
26 | global-exclude *.dylib
27 | global-exclude .DS_Store
28 | global-exclude *.o
29 | global-exclude *.a
30 | global-exclude *.lib
31 | global-exclude *.dll
32 | global-exclude .git*
33 | global-exclude .vscode*
34 | global-exclude .idea*
35 | global-exclude *.egg-info


--------------------------------------------------------------------------------
/pattern_causality/cpp/__init__.py:
--------------------------------------------------------------------------------
 1 | """C++ implementations of pattern causality functions."""
 2 | 
 3 | try:
 4 |     from utils.databank import databank
 5 |     from utils.distancematrix import distancematrix
 6 |     from utils.fcp import fcp
 7 |     from utils.fillPCMatrix import fillPCMatrix
 8 |     from utils.natureOfCausality import natureOfCausality
 9 |     from utils.pastNNs import pastNNs
10 |     from utils.patternhashing import patternhashing
11 |     from utils.patternspace import patternspace
12 |     from utils.predictionY import predictionY
13 |     from utils.projectedNNs import projectedNNs
14 |     from utils.signaturespace import signaturespace
15 |     from utils.statespace import statespace
16 | except ImportError as e:
17 |     import warnings
18 |     warnings.warn(f"Failed to import C++ extensions: {str(e)}")
19 | 
20 | __all__ = [
21 |     "databank",
22 |     "distancematrix",
23 |     "fcp",
24 |     "fillPCMatrix",
25 |     "natureOfCausality",
26 |     "pastNNs",
27 |     "patternhashing",
28 |     "patternspace",
29 |     "predictionY",
30 |     "projectedNNs",
31 |     "signaturespace",
32 |     "statespace",
33 | ] 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2024, Stavros Stavroglou, Athanasios Pantelous, Hui Wang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/pattern_causality/__init__.py:
--------------------------------------------------------------------------------
 1 | """Pattern Causality Analysis Package.
 2 | 
 3 | This package provides tools for analyzing causal relationships in time series data.
 4 | """
 5 | 
 6 | from importlib.metadata import version, metadata
 7 | 
 8 | # Get package metadata
 9 | __version__ = version("pattern-causality")
10 | __author__ = metadata("pattern-causality").get("Author")
11 | __email__ = metadata("pattern-causality").get("Author-email")
12 | __license__ = metadata("pattern-causality").get("License")
13 | __copyright__ = f"Copyright (c) 2024 {__author__}"
14 | 
15 | # Import core classes
16 | from .pattern_causality import pattern_causality
17 | from .datasets import load_data, get_dataset_info
18 | 
19 | # Import C++ extensions
20 | try:
21 |     from utils.databank import databank
22 |     from utils.distancematrix import distancematrix
23 |     from utils.fcp import fcp
24 |     from utils.fillPCMatrix import fillPCMatrix
25 |     from utils.natureOfCausality import natureOfCausality
26 |     from utils.pastNNs import pastNNs
27 |     from utils.patternhashing import patternhashing
28 |     from utils.patternspace import patternspace
29 |     from utils.predictionY import predictionY
30 |     from utils.projectedNNs import projectedNNs
31 |     from utils.signaturespace import signaturespace
32 |     from utils.statespace import statespace
33 | except ImportError as e:
34 |     import warnings
35 |     warnings.warn(f"Failed to import C++ extensions: {str(e)}")
36 | 
37 | __all__ = [
38 |     "pattern_causality",
39 |     "load_data",
40 |     "get_dataset_info",
41 |     "databank",
42 |     "distancematrix",
43 |     "fcp",
44 |     "fillPCMatrix",
45 |     "natureOfCausality",
46 |     "pastNNs",
47 |     "patternhashing",
48 |     "patternspace",
49 |     "predictionY",
50 |     "projectedNNs",
51 |     "signaturespace",
52 |     "statespace",
53 | ]
54 | 


--------------------------------------------------------------------------------
/tests/test_basic.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import pandas as pd
 4 | from pattern_causality import pattern_causality, load_data
 5 | 
 6 | 
 7 | class TestBasicFunctionality(unittest.TestCase):
 8 |     @classmethod
 9 |     def setUpClass(cls):
10 |         """Load data once for all tests"""
11 |         cls.data = load_data()
12 |         cls.X = cls.data["NAO"].values
13 |         cls.Y = cls.data["AAO"].values
14 |         cls.pc = pattern_causality(verbose=False)
15 | 
16 |     def test_pc_lightweight_basic(self):
17 |         """Test basic functionality of pc_lightweight"""
18 |         result = self.pc.pc_lightweight(X=self.X, Y=self.Y, E=3, tau=1, h=1)
19 |         self.assertIsInstance(result, pd.DataFrame)
20 |         self.assertTrue("Total Causality" in result.columns)
21 |         self.assertTrue("Positive Causality" in result.columns)
22 |         self.assertTrue("Negative Causality" in result.columns)
23 |         self.assertTrue("Dark Causality" in result.columns)
24 | 
25 |     def test_input_validation(self):
26 |         """Test input validation"""
27 |         # Test with non-numeric data
28 |         with self.assertRaises(TypeError):
29 |             self.pc.pc_lightweight(X=["invalid"], Y=self.Y, E=3, tau=1, h=1)
30 | 
31 |         # Test with invalid dimensions
32 |         with self.assertRaises(ValueError):
33 |             self.pc.pc_lightweight(X=[], Y=self.Y, E=3, tau=1, h=1)
34 | 
35 |     def test_weighted_vs_unweighted(self):
36 |         """Test that weighted and unweighted calculations give different results"""
37 |         weighted_result = self.pc.pc_lightweight(
38 |             X=self.X, Y=self.Y, E=3, tau=1, h=1, weighted=True
39 |         )
40 | 
41 |         unweighted_result = self.pc.pc_lightweight(
42 |             X=self.X, Y=self.Y, E=3, tau=1, h=1, weighted=False
43 |         )
44 | 
45 |         # Results should be different
46 |         self.assertNotEqual(
47 |             weighted_result["Positive Causality"].values[0],
48 |             unweighted_result["Positive Causality"].values[0],
49 |         )
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     unittest.main()
54 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools>=45",
 4 |     "wheel",
 5 |     "numpy>=1.19.0",
 6 | ]
 7 | build-backend = "setuptools.build_meta"
 8 | 
 9 | [project]
10 | name = "pattern-causality"
11 | version = "1.0.3"
12 | description = "Pattern Causality Algorithm in Python"
13 | readme = "README.md"
14 | requires-python = ">=3.8"
15 | license = {text = "BSD License"}
16 | authors = [
17 |     {name = "Stavros Stavroglou", email = "stavros.k.stavroglou@gmail.com"},
18 |     {name = "Athanasios Pantelous", email = "Athanasios.Pantelous@monash.edu"},
19 |     {name = "Hui Wang", email = "huiw1128@gmail.com"},
20 | ]
21 | maintainers = [
22 |     {name = "Hui Wang", email = "huiw1128@gmail.com"},
23 | ]
24 | dependencies = [
25 |     "numpy>=1.19.0",
26 |     "pandas>=1.0.0",
27 | ]
28 | classifiers = [
29 |     "Programming Language :: Python :: 3",
30 |     "Programming Language :: Python :: 3.8",
31 |     "Programming Language :: Python :: 3.9",
32 |     "Programming Language :: Python :: 3.10",
33 |     "Programming Language :: Python :: 3.11",
34 | ]
35 | [project.urls]
36 | Homepage = "https://github.com/skstavroglou/pattern_causality_py"
37 | Repository = "https://github.com/skstavroglou/pattern_causality_py.git"
38 | Documentation = "https://github.com/skstavroglou/pattern_causality_py#readme"
39 | 
40 | [tool.setuptools]
41 | packages = ["pattern_causality"]
42 | 
43 | [tool.pytest.ini_options]
44 | testpaths = ["tests"]
45 | python_files = ["test_*.py"]
46 | addopts = "-v --cov=pattern_causality"
47 | 
48 | [project.optional-dependencies]
49 | dev = [
50 |     "pytest>=6.0",
51 |     "pytest-cov>=2.0",
52 |     "black>=22.0",
53 |     "isort>=5.0",
54 |     "flake8>=4.0",
55 | ]
56 | 
57 | [tool.black]
58 | line-length = 88
59 | target-version = ['py38']
60 | extend-exclude = '''
61 | # A regex preceded with ^/ will apply only to files and directories
62 | # in the root of the project.
63 | ^/pattern_causality/pattern_causality.py
64 | '''
65 | 
66 | [tool.isort]
67 | profile = "black"
68 | multi_line_output = 3
69 | line_length = 88
70 | skip = ["pattern_causality/pattern_causality.py"]
71 | 
72 | [tool.mypy]
73 | python_version = "3.8"
74 | ignore_missing_imports = true
75 | 


--------------------------------------------------------------------------------
/meta.yaml:
--------------------------------------------------------------------------------
 1 | {% set name = "pattern-causality" %}
 2 | {% set version = "1.0.3" %}
 3 | 
 4 | package:
 5 |   name: {{ name|lower }}
 6 |   version: {{ version }}
 7 | 
 8 | source:
 9 |   url: https://github.com/skstavroglou/pattern_causality_py/releases/download/v{{ version }}/pattern_causality-{{ version }}.tar.gz
10 |   sha256: 99d42253d559992eb0137356ad52036789f00dd87ee7f5b1e4e6ebbad5e55141
11 | 
12 | build:
13 |   number: 0
14 |   skip: true  # [win]
15 |   script: >-
16 |     {% if target_platform == "osx-64" %}
17 |     export SDKROOT=$(xcrun --sdk macosx --show-sdk-path) && export CC=${CC} && export CXX=${CXX} && export CFLAGS="${CFLAGS} -isysroot ${SDKROOT} -I${SDKROOT}/usr/include -mmacosx-version-min=10.9 -Xpreprocessor -fopenmp" && export CXXFLAGS="${CXXFLAGS} -stdlib=libc++ -isysroot ${SDKROOT} -I${SDKROOT}/usr/include -I${SDKROOT}/usr/include/c++/v1 -mmacosx-version-min=10.9 -Xpreprocessor -fopenmp" && export LDFLAGS="${LDFLAGS} -stdlib=libc++ -isysroot ${SDKROOT} -mmacosx-version-min=10.9 -lomp" && {{ PYTHON }} -m pip install . --no-deps -vv
18 |     {% else %}
19 |     {{ PYTHON }} -m pip install . --no-deps -vv
20 |     {% endif %}
21 | 
22 | requirements:
23 |   build:
24 |     - {{ compiler('c') }}
25 |     - {{ compiler('cxx') }}
26 |     - {{ stdlib('c') }}
27 |     - make  # [unix]
28 |     - cmake  # [win]
29 |     - llvm-openmp >=14.0.6  # [osx]
30 |   host:
31 |     - python
32 |     - pip
33 |     - numpy
34 |     - setuptools >=45
35 |     - wheel
36 |     - llvm-openmp >=14.0.6  # [osx]
37 |     - libcxx  # [osx]
38 |   run:
39 |     - python
40 |     - {{ pin_compatible('numpy') }}
41 |     - pandas >=1.0.0
42 |     - llvm-openmp >=14.0.6  # [osx]
43 |     - libcxx  # [osx]
44 | 
45 | test:
46 |   requires:
47 |     - pip
48 |     - pytest >=6.0
49 |     - pytest-cov >=2.0
50 |   source_files:
51 |     - tests
52 |     - setup.py
53 |     - pyproject.toml
54 |   imports:
55 |     - pattern_causality
56 |   commands:
57 |     - pip check
58 |     - pytest tests -v --import-mode=append
59 | 
60 | about:
61 |   home: https://github.com/skstavroglou/pattern_causality_py
62 |   license: BSD-3-Clause
63 |   license_family: BSD
64 |   license_file: LICENSE
65 |   summary: Pattern Causality Algorithm in Python
66 |   description: |
67 |     A comprehensive Python library that implements the Pattern Causality algorithm
68 |     for analyzing causal relationships in time series data. This package provides
69 |     efficient tools for detecting and quantifying causality patterns between
70 |     multiple time series, with a particular focus on nonlinear complex systems.
71 |   doc_url: https://github.com/skstavroglou/pattern_causality_py#readme
72 |   dev_url: https://github.com/skstavroglou/pattern_causality_py
73 | 
74 | extra:
75 |   recipe-maintainers:
76 |     - wanghui5801


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from utils.statespace import statespace
 4 | from utils.patternhashing import patternhashing
 5 | from utils.signaturespace import signaturespace
 6 | from utils.distancematrix import distancematrix
 7 | from utils.patternspace import patternspace
 8 | from utils.pastNNs import pastNNs
 9 | from utils.projectedNNs import projectedNNs
10 | from utils.predictionY import predictionY
11 | from utils.fillPCMatrix import fillPCMatrix
12 | from utils.natureOfCausality import natureOfCausality
13 | from utils.databank import databank
14 | from utils.fcp import fcp
15 | 
16 | 
17 | class TestUtils(unittest.TestCase):
18 |     def setUp(self):
19 |         """Set up test data"""
20 |         self.time_series = np.sin(np.linspace(0, 10, 100))
21 |         self.E = 3
22 |         self.tau = 1
23 | 
24 |     def test_statespace(self):
25 |         """Test state space creation"""
26 |         result = statespace(self.time_series.tolist(), self.E, self.tau)
27 |         self.assertIsInstance(result, np.ndarray)
28 |         expected_shape = (len(self.time_series) - (self.E - 1) * self.tau, self.E)
29 |         self.assertEqual(result.shape, expected_shape)
30 | 
31 |     def test_patternhashing(self):
32 |         """Test pattern hashing"""
33 |         result = patternhashing(self.E)
34 |         self.assertIsInstance(result, np.ndarray)
35 |         self.assertEqual(len(result), self.E ** 2)
36 | 
37 |     def test_distance_matrix(self):
38 |         """Test distance matrix calculation"""
39 |         state_space = statespace(self.time_series.tolist(), self.E, self.tau)
40 |         result = distancematrix(state_space, metric="euclidean")
41 |         self.assertIsInstance(result, np.ndarray)
42 |         self.assertEqual(result.shape, (len(state_space), len(state_space)))
43 | 
44 |     def test_fcp(self):
45 |         """Test first causality point calculation"""
46 |         result = fcp(self.E, self.tau, 1, self.time_series.tolist())
47 |         self.assertIsInstance(result, int)
48 |         self.assertGreater(result, 0)
49 | 
50 |     def test_databank(self):
51 |         """Test databank functionality"""
52 |         # Test vector creation
53 |         vector = databank("vector", [5])
54 |         self.assertIsInstance(vector, np.ndarray)
55 |         self.assertEqual(vector.shape, (5,))
56 | 
57 |         # Test matrix creation
58 |         matrix = databank("matrix", [3, 3])
59 |         self.assertIsInstance(matrix, np.ndarray)
60 |         self.assertEqual(matrix.shape, (3, 3))
61 | 
62 |         # Test array creation
63 |         array = databank("array", [2, 2, 2])
64 |         self.assertIsInstance(array, np.ndarray)
65 |         self.assertEqual(array.shape, (2, 2, 2))
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     unittest.main()
70 | 


--------------------------------------------------------------------------------
/tests/test_advanced.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | import pandas as pd
 4 | from pattern_causality import pattern_causality, load_data
 5 | 
 6 | 
 7 | class TestAdvancedFunctionality(unittest.TestCase):
 8 |     @classmethod
 9 |     def setUpClass(cls):
10 |         """Load data once for all tests"""
11 |         cls.data = load_data()
12 |         cls.X = cls.data["NAO"].values
13 |         cls.Y = cls.data["AAO"].values
14 |         cls.pc = pattern_causality(verbose=False)
15 | 
16 |     def test_pc_matrix(self):
17 |         """Test pc_matrix functionality"""
18 |         results = self.pc.pc_matrix(
19 |             dataset=self.data.drop(columns=["Date"]),
20 |             E=3,
21 |             tau=1,
22 |             metric="euclidean",
23 |             h=1,
24 |             weighted=True,
25 |         )
26 |         self.assertIsInstance(results, pd.DataFrame)
27 |         self.assertTrue("from_var" in results.columns)
28 |         self.assertTrue("to_var" in results.columns)
29 |         self.assertTrue("positive" in results.columns)
30 |         self.assertTrue("negative" in results.columns)
31 |         self.assertTrue("dark" in results.columns)
32 | 
33 |     def test_cross_validation(self):
34 |         """Test cross-validation functionality"""
35 |         cv_results = self.pc.pc_cross_validation(
36 |             X=self.X,
37 |             Y=self.Y,
38 |             E=3,
39 |             tau=1,
40 |             metric="euclidean",
41 |             h=1,
42 |             weighted=True,
43 |             numberset=[100, 200, 300],
44 |         )
45 |         self.assertIsInstance(cv_results, pd.DataFrame)
46 |         self.assertEqual(len(cv_results), 3)
47 |         self.assertTrue("positive" in cv_results.columns)
48 |         self.assertTrue("negative" in cv_results.columns)
49 |         self.assertTrue("dark" in cv_results.columns)
50 | 
51 |     def test_parameter_optimization(self):
52 |         """Test parameter optimization"""
53 |         result = self.pc.optimal_parameters_search(
54 |             Emax=3,
55 |             tau_max=2,
56 |             metric="euclidean",
57 |             h=1,
58 |             weighted=False,
59 |             dataset=self.data.drop(columns=["Date"]),
60 |         )
61 |         self.assertIsInstance(result, pd.DataFrame)
62 |         # Check if DataFrame contains necessary columns
63 |         expected_columns = [
64 |             "E",
65 |             "tau",
66 |             "Total",
67 |             "of which Positive",
68 |             "of which Negative",
69 |             "of which Dark",
70 |         ]
71 |         for col in expected_columns:
72 |             self.assertIn(col, result.columns)
73 |         # Check data types and ranges
74 |         self.assertTrue(all(result["E"] >= 2))
75 |         self.assertTrue(all(result["tau"] >= 1))
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     unittest.main()
80 | 


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v3
10 |       
11 |       - name: Set up Python
12 |         uses: actions/setup-python@v4
13 |         with:
14 |           python-version: "3.8"
15 |           
16 |       - name: Cache pip
17 |         uses: actions/cache@v3
18 |         with:
19 |           path: ~/.cache/pip
20 |           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', 'setup.py', 'pyproject.toml') }}
21 |           restore-keys: |
22 |             ${{ runner.os }}-pip-
23 |             
24 |       - name: Install system dependencies
25 |         run: |
26 |           sudo apt-get update
27 |           sudo apt-get install -y g++ python3-dev libomp-dev build-essential
28 |           
29 |       - name: Install Python dependencies
30 |         run: |
31 |           python -m pip install --upgrade pip setuptools wheel
32 |           python -m pip install numpy pandas
33 |           python -m pip install black isort flake8 mypy typing-extensions
34 |           python -m pip install build
35 |           
36 |       - name: Set up compiler environment
37 |         run: |
38 |           python_include=$(python3 -c 'import sysconfig; print(sysconfig.get_path("include"))')
39 |           numpy_include=$(python3 -c 'import numpy; print(numpy.get_include())')
40 |           echo "CFLAGS=-I${python_include} -I${numpy_include}" >> $GITHUB_ENV
41 |           echo "CXXFLAGS=-I${python_include} -I${numpy_include} -fopenmp -std=c++11 -O3 -Wall -fPIC" >> $GITHUB_ENV
42 |           echo "LDFLAGS=-fopenmp" >> $GITHUB_ENV
43 |           echo "NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION" >> $GITHUB_ENV
44 |           echo "CC=gcc" >> $GITHUB_ENV
45 |           echo "CXX=g++" >> $GITHUB_ENV
46 |           
47 |       - name: Create directories
48 |         run: |
49 |           mkdir -p pattern_causality/utils
50 |           mkdir -p pattern_causality/cpp
51 |           mkdir -p utils
52 |           touch pattern_causality/__init__.py
53 |           touch pattern_causality/utils/__init__.py
54 |           touch pattern_causality/cpp/__init__.py
55 |           touch utils/__init__.py
56 |           
57 |       - name: Build package
58 |         run: |
59 |           python -m pip install -v -e .
60 |           
61 |       - name: Format with Black
62 |         run: |
63 |           black . --check --diff
64 |         continue-on-error: true
65 |         
66 |       - name: Check imports with isort
67 |         run: |
68 |           isort . --check-only --diff
69 |         continue-on-error: true
70 |         
71 |       - name: Lint with flake8
72 |         run: |
73 |           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
74 |           flake8 . --count --exit-zero --max-complexity=10 --statistics
75 |         continue-on-error: true
76 |         
77 |       - name: Type check with mypy
78 |         run: |
79 |           mypy pattern_causality --ignore-missing-imports
80 |         continue-on-error: true
81 | 


--------------------------------------------------------------------------------
/pattern_causality/datasets.py:
--------------------------------------------------------------------------------
 1 | """Pattern Causality Datasets Module.
 2 | 
 3 | This module provides access to built-in datasets for pattern causality analysis.
 4 | The datasets included are:
 5 |     - Climate_Indices: A dataset containing climate oscillation indices for pattern causality analysis
 6 | """
 7 | 
 8 | from typing import Dict
 9 | import pandas as pd
10 | import os
11 | 
12 | 
13 | def load_data() -> pd.DataFrame:
14 |     """Load the Climate Indices dataset included with the package.
15 |     
16 |     This function loads the built-in Climate_Indices.csv dataset, which contains
17 |     climate oscillation indices data suitable for pattern causality analysis.
18 |     
19 |     Returns:
20 |         pd.DataFrame: A DataFrame containing the climate indices data with the following columns:
21 |             - Date: The date of the observation (YYYY-MM-DD)
22 |             - AO: Arctic Oscillation index
23 |             - AAO: Antarctic Oscillation index
24 |             - NAO: North Atlantic Oscillation index
25 |             - PNA: Pacific North American index
26 |             
27 |     Example:
28 |         >>> from pattern_causality import load_data
29 |         >>> data = load_data()
30 |         >>> print(data.shape)
31 |         (535, 5)
32 |     """
33 |     data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data", "Climate_Indices.csv")
34 |     return pd.read_csv(data_path)
35 | 
36 | 
37 | def get_dataset_info() -> Dict[str, str]:
38 |     """Get information about the built-in Climate Indices dataset.
39 |     
40 |     Returns:
41 |         Dict[str, str]: A dictionary containing dataset information with keys:
42 |             - description: General description of the dataset
43 |             - source: Source of the data
44 |             - citation: Citation information
45 |             - variables: Description of the variables
46 |             
47 |     Example:
48 |         >>> from pattern_causality import get_dataset_info
49 |         >>> info = get_dataset_info()
50 |         >>> print(info['description'])
51 |     """
52 |     return {
53 |         "description": "Climate Oscillation Indices dataset for pattern causality analysis",
54 |         "source": "NOAA Climate Prediction Center",
55 |         "citation": (
56 |             "Please cite the Pattern Causality package and the NOAA Climate "
57 |             "Prediction Center when using this dataset."
58 |         ),
59 |         "variables": (
60 |             "AO: Arctic Oscillation index - A climate pattern characterized by winds circulating "
61 |             "counterclockwise around the Arctic.\n"
62 |             "AAO: Antarctic Oscillation index - Also known as the Southern Annular Mode (SAM), "
63 |             "describing the north-south movement of the westerly wind belt around Antarctica.\n"
64 |             "NAO: North Atlantic Oscillation index - The atmospheric pressure difference between "
65 |             "the Azores and Iceland.\n"
66 |             "PNA: Pacific North American index - A climate pattern reflecting large-scale changes "
67 |             "in atmospheric wave patterns over North America."
68 |         )
69 |     }
70 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/fcp.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <limits.h>
  5 | 
  6 | static PyObject* fcp(PyObject* self, PyObject* args) {
  7 |     int E, tau, h;
  8 |     PyObject* X;
  9 |     
 10 |     // Parse input arguments
 11 |     if (!PyArg_ParseTuple(args, "iiiO", &E, &tau, &h, &X)) {
 12 |         return NULL;
 13 |     }
 14 | 
 15 |     // Validate input types
 16 |     if (!PyList_Check(X) && !PyArray_Check(X)) {
 17 |         PyErr_SetString(PyExc_TypeError, "X must be a list or numpy array");
 18 |         return NULL;
 19 |     }
 20 | 
 21 |     // Validate input values
 22 |     if (E < 2) {
 23 |         PyErr_SetString(PyExc_ValueError, "E must be >= 2");
 24 |         return NULL;
 25 |     }
 26 |     if (tau < 1) {
 27 |         PyErr_SetString(PyExc_ValueError, "tau must be >= 1");
 28 |         return NULL;
 29 |     }
 30 |     if (h < 0) {
 31 |         PyErr_SetString(PyExc_ValueError, "h must be >= 0");
 32 |         return NULL;
 33 |     }
 34 | 
 35 |     // Get length of input
 36 |     Py_ssize_t X_len;
 37 |     if (PyList_Check(X)) {
 38 |         X_len = PyList_Size(X);
 39 |     } else {
 40 |         PyArrayObject* arr = (PyArrayObject*)X;
 41 |         X_len = PyArray_SIZE(arr);
 42 |     }
 43 | 
 44 |     if (X_len < 1) {
 45 |         PyErr_SetString(PyExc_ValueError, "Input X cannot be empty");
 46 |         return NULL;
 47 |     }
 48 | 
 49 |     // Calculate constants with overflow checking
 50 |     if (E > (INT_MAX - 1) || tau > INT_MAX / (E - 1)) {
 51 |         PyErr_SetString(PyExc_OverflowError, "Parameters too large");
 52 |         return NULL;
 53 |     }
 54 | 
 55 |     int NNSPAN = E + 1;  // Former NN | Reserves a minimum number of nearest neighbors
 56 |     int CCSPAN = (E - 1) * tau;  // This will remove the common coordinate NNs  
 57 |     int PredSPAN = h;
 58 |     
 59 |     // Check for integer overflow in final calculation
 60 |     if (NNSPAN > INT_MAX - CCSPAN || 
 61 |         NNSPAN + CCSPAN > INT_MAX - PredSPAN || 
 62 |         NNSPAN + CCSPAN + PredSPAN > INT_MAX - 1) {
 63 |         PyErr_SetString(PyExc_OverflowError, "Integer overflow in FCP calculation");
 64 |         return NULL;
 65 |     }
 66 |     
 67 |     int FCP = 1 + NNSPAN + CCSPAN + PredSPAN;
 68 | 
 69 |     // Validate sufficient data points
 70 |     if (NNSPAN + CCSPAN + PredSPAN >= X_len - CCSPAN) {
 71 |         PyErr_SetString(PyExc_ValueError, 
 72 |             "The First Point to consider for Causality does not have sufficient "
 73 |             "Nearest Neighbors. Please Check parameters: "
 74 |             "E, lag, p as well as the length of X and Y");
 75 |         return NULL;
 76 |     }
 77 | 
 78 |     return PyLong_FromLong((long)FCP);
 79 | }
 80 | 
 81 | static PyMethodDef FcpMethods[] = {
 82 |     {"fcp", fcp, METH_VARARGS, "Calculate first causality point"},
 83 |     {NULL, NULL, 0, NULL}
 84 | };
 85 | 
 86 | static struct PyModuleDef fcpmodule = {
 87 |     PyModuleDef_HEAD_INIT,
 88 |     "utils.fcp",  // Changed back to "utils.fcp"
 89 |     "First causality point calculation module",
 90 |     -1,
 91 |     FcpMethods
 92 | };
 93 | 
 94 | PyMODINIT_FUNC PyInit_fcp(void) {
 95 |     import_array();  // Initialize NumPy
 96 |     
 97 |     PyObject* m = PyModule_Create(&fcpmodule);
 98 |     if (m == NULL) {
 99 |         return NULL;
100 |     }
101 |     
102 |     return m;
103 | }
104 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/patternhashing.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <vector>
  4 | #include <cmath>
  5 | #include <numpy/arrayobject.h>
  6 | #include <algorithm>
  7 | #include <set>
  8 | 
  9 | // Helper function for factorial calculation
 10 | static double factorial(int n) {
 11 |     if (n <= 1) return 1.0;
 12 |     double result = 1.0;
 13 |     for (int i = 2; i <= n; ++i) {
 14 |         result *= i;
 15 |     }
 16 |     return result;
 17 | }
 18 | 
 19 | // Helper function to generate possible patterns
 20 | static std::vector<std::vector<int>> possiblePatterns(int E) {
 21 |     if (E <= 1) {
 22 |         return std::vector<std::vector<int>>();
 23 |     }
 24 |     
 25 |     // Calculate total number of combinations
 26 |     const int numPatterns = pow(3, E-1);
 27 |     std::vector<std::vector<int>> patterns(numPatterns);
 28 |     
 29 |     // Generate patterns using R's expand.grid logic
 30 |     for (int i = 0; i < numPatterns; ++i) {
 31 |         std::vector<int> pattern(E-1);
 32 |         int temp = i;
 33 |         
 34 |         // Fill pattern from right to left (least significant to most significant)
 35 |         for (int j = E-2; j >= 0; --j) {
 36 |             pattern[j] = (temp % 3) + 1;  // Convert to 1, 2, 3
 37 |             temp /= 3;
 38 |         }
 39 |         
 40 |         patterns[i] = pattern;
 41 |     }
 42 |     
 43 |     return patterns;
 44 | }
 45 | 
 46 | // Helper function for hashing - must match R implementation exactly
 47 | static double hashing(const std::vector<int>& vec) {
 48 |     double hash = 0.0;
 49 |     for (size_t i = 0; i < vec.size(); i++) {
 50 |         hash += static_cast<double>(vec[i]) * factorial(i + 2);
 51 |     }
 52 |     return hash;
 53 | }
 54 | 
 55 | // Main function: patternHashing
 56 | static PyObject* patternHashing(PyObject* self, PyObject* args) {
 57 |     int E;
 58 |     if (!PyArg_ParseTuple(args, "i", &E)) {
 59 |         return NULL;
 60 |     }
 61 | 
 62 |     std::vector<std::vector<int>> patterns = possiblePatterns(E);
 63 |     
 64 |     // Handle E <= 1 case
 65 |     if (patterns.empty()) {
 66 |         npy_intp dims[] = {0};
 67 |         return (PyObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 68 |     }
 69 |     
 70 |     // Calculate hash values
 71 |     std::vector<double> hash_values;
 72 |     hash_values.reserve(patterns.size());
 73 |     
 74 |     for (const auto& pattern : patterns) {
 75 |         hash_values.push_back(hashing(pattern));
 76 |     }
 77 |     
 78 |     // Create numpy array for results
 79 |     npy_intp dims[] = {static_cast<npy_intp>(hash_values.size())};
 80 |     PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 81 |     if (!result_array) {
 82 |         return NULL;
 83 |     }
 84 |     
 85 |     // Copy hash values to output array
 86 |     double* data = (double*)PyArray_DATA(result_array);
 87 |     std::copy(hash_values.begin(), hash_values.end(), data);
 88 |     
 89 |     return (PyObject*)result_array;
 90 | }
 91 | 
 92 | static PyMethodDef PatternHashingMethods[] = {
 93 |     {"patternhashing", patternHashing, METH_VARARGS, "Calculate pattern hashing"},
 94 |     {NULL, NULL, 0, NULL}
 95 | };
 96 | 
 97 | static struct PyModuleDef patternhashing_module = {
 98 |     PyModuleDef_HEAD_INIT,
 99 |     "patternhashing",
100 |     NULL,
101 |     -1,
102 |     PatternHashingMethods
103 | };
104 | 
105 | PyMODINIT_FUNC PyInit_patternhashing(void) {
106 |     import_array();
107 |     return PyModule_Create(&patternhashing_module);
108 | }


--------------------------------------------------------------------------------
/pattern_causality/cpp/patternspace.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <vector>
  5 | #include <cmath>
  6 | #include <limits>
  7 | #include <stdexcept>
  8 | 
  9 | // Helper function for factorial calculation - must match R implementation exactly
 10 | static double factorial(int n) {
 11 |     if (n <= 1) return 1.0;
 12 |     double result = 1.0;
 13 |     for (int i = 2; i <= n; ++i) {
 14 |         result *= i;
 15 |     }
 16 |     return result;
 17 | }
 18 | 
 19 | // Helper function for hashing - must match R implementation exactly
 20 | static double hashing(const std::vector<int>& vec) {
 21 |     double hash = 0.0;
 22 |     for (size_t i = 0; i < vec.size(); i++) {
 23 |         hash += static_cast<double>(vec[i]) * factorial(i + 2);
 24 |     }
 25 |     return hash;
 26 | }
 27 | 
 28 | // Pre-allocated vectors to avoid repeated allocation
 29 | thread_local std::vector<double> result_buffer;
 30 | thread_local std::vector<int> p_vec_buffer;
 31 | 
 32 | static double pattern_vector_difference(const std::vector<double>& sVec) {
 33 |     // Quick check for NaN
 34 |     for (const auto& val : sVec) {
 35 |         if (std::isnan(val)) {
 36 |             return std::numeric_limits<double>::quiet_NaN();
 37 |         }
 38 |     }
 39 |     
 40 |     // Reuse pre-allocated vector
 41 |     if (p_vec_buffer.capacity() < sVec.size()) {
 42 |         p_vec_buffer.reserve(sVec.size());
 43 |     }
 44 |     p_vec_buffer.clear();
 45 |     
 46 |     const double eps = std::numeric_limits<double>::epsilon();
 47 |     
 48 |     // Pattern calculation - must match R implementation exactly
 49 |     for (const auto& val : sVec) {
 50 |         if (std::abs(val) < eps) {
 51 |             p_vec_buffer.push_back(2);  // zero
 52 |         } else if (val > 0) {
 53 |             p_vec_buffer.push_back(3);  // positive
 54 |         } else {
 55 |             p_vec_buffer.push_back(1);  // negative
 56 |         }
 57 |     }
 58 |     
 59 |     return hashing(p_vec_buffer);
 60 | }
 61 | 
 62 | static PyObject* patternspace(PyObject* self, PyObject* args) {
 63 |     PyObject* sm_obj;
 64 |     int E;
 65 |     
 66 |     if (!PyArg_ParseTuple(args, "Oi", &sm_obj, &E)) {
 67 |         return NULL;
 68 |     }
 69 |     
 70 |     PyArrayObject* sm_array = (PyArrayObject*)PyArray_FROM_OTF(sm_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 71 |     if (!sm_array || PyArray_NDIM(sm_array) != 2) {
 72 |         Py_XDECREF(sm_array);
 73 |         PyErr_SetString(PyExc_ValueError, "Input must be a 2D array");
 74 |         return NULL;
 75 |     }
 76 |     
 77 |     npy_intp* dims = PyArray_DIMS(sm_array);
 78 |     npy_intp num_rows = dims[0];
 79 |     npy_intp num_cols = dims[1];
 80 |     
 81 |     npy_intp out_dims[2] = {num_rows, 1};
 82 |     PyObject* result = PyArray_SimpleNew(2, out_dims, NPY_DOUBLE);
 83 |     if (!result) {
 84 |         Py_DECREF(sm_array);
 85 |         return NULL;
 86 |     }
 87 |     
 88 |     double* sm_data = (double*)PyArray_DATA(sm_array);
 89 |     double* result_data = (double*)PyArray_DATA((PyArrayObject*)result);
 90 |     
 91 |     // Pre-allocate vector for row data
 92 |     std::vector<double> row_buffer(num_cols);
 93 |     
 94 |     // Process each row sequentially to ensure consistent results
 95 |     for (npy_intp i = 0; i < num_rows; i++) {
 96 |         // Copy row data
 97 |         std::copy(sm_data + i * num_cols, sm_data + (i + 1) * num_cols, row_buffer.begin());
 98 |         result_data[i] = pattern_vector_difference(row_buffer);
 99 |     }
100 |     
101 |     Py_DECREF(sm_array);
102 |     return result;
103 | }
104 | 
105 | static PyMethodDef PatternSpaceMethods[] = {
106 |     {"patternspace", patternspace, METH_VARARGS, "Calculate pattern space matrix from signature matrix"},
107 |     {NULL, NULL, 0, NULL}
108 | };
109 | 
110 | static struct PyModuleDef patternspacemodule = {
111 |     PyModuleDef_HEAD_INIT,
112 |     "patternspace",
113 |     "Pattern space calculation module",
114 |     -1,
115 |     PatternSpaceMethods
116 | };
117 | 
118 | PyMODINIT_FUNC PyInit_patternspace(void) {
119 |     import_array();
120 |     return PyModule_Create(&patternspacemodule);
121 | }
122 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/distancematrix.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <cmath>
  5 | #include <string>
  6 | 
  7 | // Helper functions for different distance metrics
  8 | static inline double euclideanDistance(const double* vec1, const double* vec2, size_t size) {
  9 |     double sum = 0.0;
 10 |     for (size_t i = 0; i < size; i++) {
 11 |         double diff = vec1[i] - vec2[i];
 12 |         sum += diff * diff;
 13 |     }
 14 |     return sqrt(sum);
 15 | }
 16 | 
 17 | static inline double manhattanDistance(const double* vec1, const double* vec2, size_t size) {
 18 |     double sum = 0.0;
 19 |     for (size_t i = 0; i < size; i++) {
 20 |         sum += fabs(vec1[i] - vec2[i]);
 21 |     }
 22 |     return sum;
 23 | }
 24 | 
 25 | static inline double minkowskiDistance(const double* vec1, const double* vec2, size_t size, int n) {
 26 |     double sum = 0.0;
 27 |     for (size_t i = 0; i < size; i++) {
 28 |         sum += pow(fabs(vec1[i] - vec2[i]), n);
 29 |     }
 30 |     return pow(sum, 1.0/n);
 31 | }
 32 | 
 33 | static inline double calculateDistance(const double* vec1, const double* vec2, size_t size,
 34 |                                        const std::string& metric, int n = 2) {
 35 |     if (metric == "euclidean") {
 36 |         return euclideanDistance(vec1, vec2, size);
 37 |     } else if (metric == "manhattan") {
 38 |         return manhattanDistance(vec1, vec2, size);
 39 |     } else if (metric == "minkowski") {
 40 |         return minkowskiDistance(vec1, vec2, size, n);
 41 |     }
 42 |     return euclideanDistance(vec1, vec2, size);
 43 | }
 44 | 
 45 | static PyObject* distanceMatrix(PyObject* self, PyObject* args, PyObject* kwargs) {
 46 |     PyObject* matrix_obj;
 47 |     const char* metric_str = "euclidean";
 48 |     int n = 2;
 49 |     
 50 |     static char* kwlist[] = {"matrix", "metric", "n", NULL};
 51 |     
 52 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|si", kwlist, 
 53 |                                     &matrix_obj, &metric_str, &n)) {
 54 |         return NULL;
 55 |     }
 56 | 
 57 |     PyArrayObject* matrix_array = (PyArrayObject*)PyArray_FROM_OTF(matrix_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 58 |     if (matrix_array == NULL) {
 59 |         PyErr_SetString(PyExc_TypeError, "Could not convert input to numpy array");
 60 |         return NULL;
 61 |     }
 62 | 
 63 |     if (PyArray_NDIM(matrix_array) != 2) {
 64 |         Py_DECREF(matrix_array);
 65 |         PyErr_SetString(PyExc_ValueError, "Input must be a 2D array");
 66 |         return NULL;
 67 |     }
 68 | 
 69 |     npy_intp num_rows = PyArray_DIM(matrix_array, 0);
 70 |     npy_intp vec_size = PyArray_DIM(matrix_array, 1);
 71 | 
 72 |     npy_intp dims[2] = {num_rows, num_rows};
 73 |     PyObject* result_matrix = PyArray_SimpleNew(2, dims, NPY_DOUBLE);
 74 |     if (result_matrix == NULL) {
 75 |         Py_DECREF(matrix_array);
 76 |         return NULL;
 77 |     }
 78 | 
 79 |     double* matrix_data = (double*)PyArray_DATA(matrix_array);
 80 |     double* result_data = (double*)PyArray_DATA((PyArrayObject*)result_matrix);
 81 | 
 82 |     std::string metric(metric_str);
 83 | 
 84 |     for (npy_intp i = 0; i < num_rows; i++) {
 85 |         const double* vec1 = matrix_data + i * vec_size;
 86 |         result_data[i * num_rows + i] = 0.0;
 87 |         
 88 |         for (npy_intp j = i + 1; j < num_rows; j++) {
 89 |             const double* vec2 = matrix_data + j * vec_size;
 90 |             double dist = calculateDistance(vec1, vec2, vec_size, metric, n);
 91 |             
 92 |             result_data[i * num_rows + j] = dist;
 93 |             result_data[j * num_rows + i] = dist;
 94 |         }
 95 |     }
 96 | 
 97 |     Py_DECREF(matrix_array);
 98 | 
 99 |     return result_matrix;
100 | }
101 | 
102 | static PyMethodDef DistanceMatrixMethods[] = {
103 |     {"distancematrix", (PyCFunction)distanceMatrix, METH_VARARGS | METH_KEYWORDS, 
104 |      "Calculate distance matrix for a set of vectors"},
105 |     {NULL, NULL, 0, NULL}
106 | };
107 | 
108 | static struct PyModuleDef distancematrixmodule = {
109 |     PyModuleDef_HEAD_INIT,
110 |     "distancematrix",
111 |     "Distance calculation module",
112 |     -1,
113 |     DistanceMatrixMethods
114 | };
115 | 
116 | PyMODINIT_FUNC PyInit_distancematrix(void) {
117 |     import_array();
118 |     return PyModule_Create(&distancematrixmodule);
119 | }


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [ main ]
  6 |   pull_request:
  7 |     branches: [ main ]
  8 | 
  9 | jobs:
 10 |   test:
 11 |     runs-on: ubuntu-latest
 12 |     strategy:
 13 |       matrix:
 14 |         python-version: ["3.8", "3.9", "3.10"]
 15 |       fail-fast: false
 16 | 
 17 |     steps:
 18 |     - uses: actions/checkout@v3
 19 |       with:
 20 |         fetch-depth: 0
 21 |     
 22 |     - name: Set up Python ${{ matrix.python-version }}
 23 |       uses: actions/setup-python@v4
 24 |       with:
 25 |         python-version: ${{ matrix.python-version }}
 26 |     
 27 |     - name: Cache pip
 28 |       uses: actions/cache@v3
 29 |       with:
 30 |         path: ~/.cache/pip
 31 |         key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', 'setup.py', 'pyproject.toml') }}
 32 |         restore-keys: |
 33 |           ${{ runner.os }}-pip-
 34 |     
 35 |     - name: Install system dependencies
 36 |       run: |
 37 |         sudo apt-get update
 38 |         sudo apt-get install -y g++ python3-dev libomp-dev build-essential
 39 |         # Print system information
 40 |         echo "System information:"
 41 |         uname -a
 42 |         g++ --version
 43 |         python3 --version
 44 |     
 45 |     - name: Install build dependencies
 46 |       run: |
 47 |         python -m pip install --upgrade pip setuptools wheel
 48 |         python -m pip install numpy pandas
 49 |         python -m pip install pytest pytest-cov
 50 |         # Print installed packages
 51 |         echo "Installed Python packages:"
 52 |         pip list
 53 |         
 54 |     - name: Set up compiler environment
 55 |       run: |
 56 |         # Get Python and NumPy include paths
 57 |         PYTHON_INCLUDE=$(python3 -c 'import sysconfig; print(sysconfig.get_path("include"))')
 58 |         NUMPY_INCLUDE=$(python3 -c 'import numpy; print(numpy.get_include())')
 59 |         
 60 |         # Set up environment variables
 61 |         echo "CFLAGS=-I${PYTHON_INCLUDE} -I${NUMPY_INCLUDE} -O3" >> $GITHUB_ENV
 62 |         echo "CXXFLAGS=-I${PYTHON_INCLUDE} -I${NUMPY_INCLUDE} -std=c++11 -fopenmp -O3 -Wall -fPIC" >> $GITHUB_ENV
 63 |         echo "LDFLAGS=-fopenmp" >> $GITHUB_ENV
 64 |         echo "NPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION" >> $GITHUB_ENV
 65 |         echo "CC=gcc" >> $GITHUB_ENV
 66 |         echo "CXX=g++" >> $GITHUB_ENV
 67 |         
 68 |         # Print environment for debugging
 69 |         echo "Python include path: ${PYTHON_INCLUDE}"
 70 |         echo "NumPy include path: ${NUMPY_INCLUDE}"
 71 |         echo "Checking if include directories exist:"
 72 |         ls -la ${PYTHON_INCLUDE} || echo "Python include directory not found"
 73 |         ls -la ${NUMPY_INCLUDE} || echo "NumPy include directory not found"
 74 |     
 75 |     - name: Create package structure
 76 |       run: |
 77 |         mkdir -p utils pattern_causality/utils pattern_causality/cpp
 78 |         
 79 |         cat > utils/__init__.py << 'EOL'
 80 |         from .statespace import statespace
 81 |         from .patternhashing import patternhashing
 82 |         from .signaturespace import signaturespace
 83 |         from .distancematrix import distancematrix
 84 |         from .patternspace import patternspace
 85 |         from .pastNNs import pastNNs
 86 |         from .projectedNNs import projectedNNs
 87 |         from .predictionY import predictionY
 88 |         from .fillPCMatrix import fillPCMatrix
 89 |         from .natureOfCausality import natureOfCausality
 90 |         from .databank import databank
 91 |         from .fcp import fcp
 92 | 
 93 |         __all__ = [
 94 |             'statespace', 'patternhashing', 'signaturespace', 'distancematrix',
 95 |             'patternspace', 'pastNNs', 'projectedNNs', 'predictionY',
 96 |             'fillPCMatrix', 'natureOfCausality', 'databank', 'fcp'
 97 |         ]
 98 |         EOL
 99 |         
100 |         touch pattern_causality/utils/__init__.py
101 |         touch pattern_causality/cpp/__init__.py
102 |         
103 |         echo "Package structure created:"
104 |         find . -type d
105 |     
106 |     - name: Install package
107 |       run: |
108 |         echo "Building package in verbose mode..."
109 |         python -m pip install -v -e .
110 |     
111 |     - name: List directory structure and environment
112 |       run: |
113 |         echo "Current directory structure:"
114 |         find . -type f -name "*.py" -o -name "*.cpp"
115 |         echo "Environment variables:"
116 |         env | grep -E "CFLAGS|CXXFLAGS|LDFLAGS|NPY|CC|CXX"
117 |         echo "Python and package information:"
118 |         python --version
119 |         pip list
120 |         echo "C++ source files:"
121 |         ls -la pattern_causality/cpp/
122 |     
123 |     - name: Run tests
124 |       run: |
125 |         echo "Running tests with coverage..."
126 |         python -m pytest tests/ --cov=pattern_causality -v
127 |     
128 |     - name: Upload coverage reports
129 |       if: success()
130 |       uses: codecov/codecov-action@v3
131 |       with:
132 |         fail_ci_if_error: false
133 |         verbose: true


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import setup, find_packages, Extension
  2 | import os
  3 | import platform
  4 | import sys
  5 | import numpy as np
  6 | import sysconfig
  7 | 
  8 | def get_compiler_args():
  9 |     """Get platform-specific compiler arguments."""
 10 |     system = platform.system().lower()
 11 |     
 12 |     if system == "darwin":  # macOS
 13 |         return {
 14 |             "extra_compile_args": [
 15 |                 "-O3",
 16 |                 "-fPIC",
 17 |                 "-std=c++11",
 18 |                 "-stdlib=libc++",
 19 |                 "-mmacosx-version-min=10.9",
 20 |                 "-Wno-unused-function",
 21 |                 "-Wno-unused-variable",
 22 |                 "-Wno-deprecated-declarations",
 23 |                 "-Wno-c++11-narrowing",
 24 |                 "-v",
 25 |             ],
 26 |             "extra_link_args": [
 27 |                 "-stdlib=libc++",
 28 |                 "-mmacosx-version-min=10.9",
 29 |                 "-v",
 30 |             ]
 31 |         }
 32 |     elif system == "linux":
 33 |         return {
 34 |             "extra_compile_args": [
 35 |                 "-O3",
 36 |                 "-fPIC",
 37 |                 "-std=c++11",
 38 |                 "-v",
 39 |             ],
 40 |             "extra_link_args": ["-v"]
 41 |         }
 42 |     elif system == "windows":
 43 |         return {
 44 |             "extra_compile_args": ["/O2", "/W3", "/EHsc", "/std:c++11", "/verbose"],
 45 |             "extra_link_args": ["/verbose"]
 46 |         }
 47 |     else:
 48 |         return {
 49 |             "extra_compile_args": ["-O3", "-fPIC", "-std=c++11", "-v"],
 50 |             "extra_link_args": ["-v"]
 51 |         }
 52 | 
 53 | def get_include_dirs():
 54 |     """Get include directories for compilation."""
 55 |     cpp_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pattern_causality", "cpp")
 56 |     include_dirs = [
 57 |         np.get_include(),
 58 |         cpp_dir,
 59 |         sysconfig.get_path('include'),
 60 |     ]
 61 |     
 62 |     # Add platform-specific include directories
 63 |     if platform.system() == "Darwin":
 64 |         mac_dirs = [
 65 |             "/usr/local/include",
 66 |             "/usr/include",
 67 |             "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include",
 68 |         ]
 69 |         include_dirs.extend(d for d in mac_dirs if os.path.exists(d))
 70 |     
 71 |     return include_dirs
 72 | 
 73 | def get_extensions():
 74 |     """Get the list of C++ extensions to be built."""
 75 |     cpp_dir = os.path.join("pattern_causality", "cpp")
 76 |     include_dirs = get_include_dirs()
 77 |     
 78 |     compiler_args = get_compiler_args()
 79 |     
 80 |     extensions = []
 81 |     cpp_files = [
 82 |         "statespace",
 83 |         "patternhashing",
 84 |         "signaturespace",
 85 |         "distancematrix",
 86 |         "patternspace",
 87 |         "pastNNs",
 88 |         "projectedNNs",
 89 |         "predictionY",
 90 |         "fillPCMatrix",
 91 |         "natureOfCausality",
 92 |         "databank",
 93 |         "fcp"
 94 |     ]
 95 |     
 96 |     for cpp_file in cpp_files:
 97 |         ext = Extension(
 98 |             f"utils.{cpp_file}",
 99 |             sources=[f"pattern_causality/cpp/{cpp_file}.cpp"],
100 |             language="c++",
101 |             include_dirs=include_dirs,
102 |             extra_compile_args=compiler_args["extra_compile_args"],
103 |             extra_link_args=compiler_args["extra_link_args"]
104 |         )
105 |         extensions.append(ext)
106 |             
107 |     return extensions
108 | 
109 | # Print build environment information
110 | print("\nBuild Environment:")
111 | print(f"Platform: {platform.system()} {platform.machine()}")
112 | print(f"Python: {sys.version}")
113 | print(f"NumPy: {np.__version__}")
114 | print(f"Compiler: {sysconfig.get_config_var('CC')}")
115 | 
116 | # Read README
117 | with open("README.md", encoding="utf-8") as f:
118 |     long_description = f.read()
119 | 
120 | setup(
121 |     name="pattern-causality",
122 |     version="1.0.3",
123 |     description="Pattern Causality Algorithm in Python",
124 |     long_description=long_description,
125 |     long_description_content_type="text/markdown",
126 |     author="Stavros Stavroglou, Athanasios Pantelous, Hui Wang",
127 |     author_email="huiw1128@gmail.com",
128 |     url="https://github.com/skstavroglou/pattern_causality_py",
129 |     packages=find_packages(),
130 |     package_dir={"": "."},
131 |     package_data={
132 |         'pattern_causality': [
133 |             'cpp/*.cpp',
134 |             'cpp/*.h',
135 |             'cpp/*.hpp',
136 |             'cpp/*.so',
137 |             'cpp/*.dylib',
138 |             'data/*.csv'
139 |         ],
140 |     },
141 |     ext_modules=get_extensions(),
142 |     python_requires=">=3.8",
143 |     install_requires=[
144 |         "numpy>=1.19.0",
145 |         "pandas>=1.0.0",
146 |     ],
147 |     include_package_data=True,
148 |     zip_safe=False,
149 |     classifiers=[
150 |         "Development Status :: 5 - Production/Stable",
151 |         "Intended Audience :: Science/Research",
152 |         "License :: OSI Approved :: BSD License",
153 |         "Operating System :: OS Independent",
154 |         "Programming Language :: Python :: 3",
155 |         "Programming Language :: Python :: 3.8",
156 |         "Programming Language :: Python :: 3.9",
157 |         "Programming Language :: Python :: 3.10",
158 |         "Programming Language :: Python :: 3.11",
159 |         "Programming Language :: C++",
160 |         "Topic :: Scientific/Engineering",
161 |     ],
162 | )
163 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/databank.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <string>
  5 | #include <vector>
  6 | #include <limits>
  7 | 
  8 | static PyObject* databank(PyObject* self, PyObject* args) {
  9 |     const char* type_name;
 10 |     PyObject* dimensions_obj;
 11 | 
 12 |     // Parse arguments
 13 |     if (!PyArg_ParseTuple(args, "sO", &type_name, &dimensions_obj)) {
 14 |         return NULL;
 15 |     }
 16 | 
 17 |     // Convert dimensions to vector
 18 |     std::vector<npy_intp> dimensions;
 19 |     if (PyList_Check(dimensions_obj) || PyTuple_Check(dimensions_obj)) {
 20 |         Py_ssize_t size = PySequence_Size(dimensions_obj);
 21 |         dimensions.reserve(size);
 22 |         for (Py_ssize_t i = 0; i < size; i++) {
 23 |             PyObject* item = PySequence_GetItem(dimensions_obj, i);
 24 |             dimensions.push_back(PyLong_AsLong(item));
 25 |             Py_DECREF(item);
 26 |         }
 27 |     } else {
 28 |         PyErr_SetString(PyExc_TypeError, "dimensions must be a list or tuple");
 29 |         return NULL;
 30 |     }
 31 | 
 32 |     std::string type(type_name);
 33 | 
 34 |     if (type == "array") {
 35 |         npy_intp* dims = dimensions.data();
 36 |         PyObject* arr = PyArray_EMPTY(dimensions.size(), dims, NPY_DOUBLE, 0);
 37 |         double* data = (double*)PyArray_DATA((PyArrayObject*)arr);
 38 |         for (npy_intp i = 0; i < PyArray_SIZE((PyArrayObject*)arr); i++) {
 39 |             data[i] = std::numeric_limits<double>::quiet_NaN();
 40 |         }
 41 |         return arr;
 42 |     }
 43 |     else if (type == "vector") {
 44 |         npy_intp dims[1] = {dimensions[0]};
 45 |         PyObject* arr = PyArray_EMPTY(1, dims, NPY_DOUBLE, 0);
 46 |         double* data = (double*)PyArray_DATA((PyArrayObject*)arr);
 47 |         for (npy_intp i = 0; i < dimensions[0]; i++) {
 48 |             data[i] = std::numeric_limits<double>::quiet_NaN();
 49 |         }
 50 |         return arr;
 51 |     }
 52 |     else if (type == "matrix") {
 53 |         npy_intp dims[2] = {dimensions[0], dimensions[1]};
 54 |         PyObject* arr = PyArray_EMPTY(2, dims, NPY_DOUBLE, 0);
 55 |         double* data = (double*)PyArray_DATA((PyArrayObject*)arr);
 56 |         for (npy_intp i = 0; i < dimensions[0] * dimensions[1]; i++) {
 57 |             data[i] = std::numeric_limits<double>::quiet_NaN();
 58 |         }
 59 |         return arr;
 60 |     }
 61 |     else if (type == "neighborhood memories") {
 62 |         // Validate dimensions
 63 |         npy_intp expected_cols = 1 + 4 * dimensions[2] + (dimensions[3] - 1) * dimensions[2] + dimensions[3] * dimensions[2];
 64 |         if (dimensions[1] != expected_cols) {
 65 |             PyErr_SetString(PyExc_ValueError, "The dimensions[1] is wrong!");
 66 |             return NULL;
 67 |         }
 68 | 
 69 |         // Create empty DataFrame equivalent (numpy array)
 70 |         npy_intp dims[2] = {dimensions[0], dimensions[1]};
 71 |         PyObject* arr = PyArray_EMPTY(2, dims, NPY_DOUBLE, 0);
 72 |         if (!arr) return NULL;
 73 | 
 74 |         // Fill with NaN
 75 |         double* data = (double*)PyArray_DATA((PyArrayObject*)arr);
 76 |         for (npy_intp i = 0; i < dimensions[0] * dimensions[1]; i++) {
 77 |             data[i] = std::numeric_limits<double>::quiet_NaN();
 78 |         }
 79 | 
 80 |         // Create list for column names
 81 |         PyObject* col_names = PyList_New(dimensions[1]);
 82 |         if (!col_names) {
 83 |             Py_DECREF(arr);
 84 |             return NULL;
 85 |         }
 86 | 
 87 |         // Add column names
 88 |         int col_idx = 0;
 89 |         
 90 |         // "i" column
 91 |         PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString("i"));
 92 | 
 93 |         // nn-times, nn-dists, nn-weights, nn-patt
 94 |         for (int j = 0; j < 4; j++) {
 95 |             const char* prefix;
 96 |             switch(j) {
 97 |                 case 0: prefix = "nn-times"; break;
 98 |                 case 1: prefix = "nn-dists"; break;
 99 |                 case 2: prefix = "nn-weights"; break;
100 |                 case 3: prefix = "nn-patt"; break;
101 |             }
102 |             for (npy_intp k = 0; k < dimensions[2]; k++) {
103 |                 PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(prefix));
104 |             }
105 |         }
106 | 
107 |         // Signature component columns
108 |         for (npy_intp nn = 1; nn <= dimensions[2]; nn++) {
109 |             for (npy_intp comp = 1; comp < dimensions[3]; comp++) {
110 |                 char buf[100];
111 |                 snprintf(buf, sizeof(buf), "Sig-Comp.%ld of NN%ld", (long)comp, (long)nn);
112 |                 PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(buf));
113 |             }
114 |         }
115 | 
116 |         // Coordinate columns
117 |         for (npy_intp nn = 1; nn <= dimensions[2]; nn++) {
118 |             for (npy_intp coord = 1; coord <= dimensions[3]; coord++) {
119 |                 char buf[100];
120 |                 snprintf(buf, sizeof(buf), "Coord.%ld of NN%ld", (long)coord, (long)nn);
121 |                 PyList_SET_ITEM(col_names, col_idx++, PyUnicode_FromString(buf));
122 |             }
123 |         }
124 | 
125 |         // Import pandas
126 |         PyObject* pandas = PyImport_ImportModule("pandas");
127 |         if (!pandas) {
128 |             Py_DECREF(arr);
129 |             Py_DECREF(col_names);
130 |             return NULL;
131 |         }
132 | 
133 |         // Create DataFrame
134 |         PyObject* df_class = PyObject_GetAttrString(pandas, "DataFrame");
135 |         PyObject* df = PyObject_CallFunction(df_class, "OO", arr, col_names);
136 |         
137 |         Py_DECREF(pandas);
138 |         Py_DECREF(df_class);
139 |         Py_DECREF(arr);
140 |         Py_DECREF(col_names);
141 | 
142 |         return df;
143 |     }
144 | 
145 |     Py_RETURN_NONE;
146 | }
147 | 
148 | static PyMethodDef DatabankMethods[] = {
149 |     {"databank", databank, METH_VARARGS, "Create data structures based on type and dimensions"},
150 |     {NULL, NULL, 0, NULL}
151 | };
152 | 
153 | static struct PyModuleDef databankmodule = {
154 |     PyModuleDef_HEAD_INIT,
155 |     "databank",
156 |     NULL,
157 |     -1,
158 |     DatabankMethods
159 | };
160 | 
161 | PyMODINIT_FUNC PyInit_databank(void) {
162 |     import_array();
163 |     return PyModule_Create(&databankmodule);
164 | }
165 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/signaturespace.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <vector>
  5 | #include <cmath>
  6 | #include <limits>
  7 | 
  8 | // Optimized inline difference calculation
  9 | static inline void calculate_differences(const double* input, double* output, npy_intp length) {
 10 |     for (npy_intp i = 0; i < length - 1; i++) {
 11 |         if (std::isnan(input[i]) || std::isnan(input[i + 1])) {
 12 |             output[i] = std::numeric_limits<double>::quiet_NaN();
 13 |         } else {
 14 |             output[i] = input[i + 1] - input[i];
 15 |         }
 16 |     }
 17 | }
 18 | 
 19 | static PyObject* signatureVectorDifference(PyObject* self, PyObject* args) {
 20 |     PyObject* input_array;
 21 |     if (!PyArg_ParseTuple(args, "O", &input_array)) {
 22 |         return NULL;
 23 |     }
 24 | 
 25 |     // Convert to numpy array
 26 |     PyArrayObject* array = (PyArrayObject*)PyArray_FROM_OTF(
 27 |         input_array, 
 28 |         NPY_DOUBLE,
 29 |         NPY_ARRAY_IN_ARRAY
 30 |     );
 31 |     if (!array) {
 32 |         return NULL;
 33 |     }
 34 | 
 35 |     // Check dimensions
 36 |     if (PyArray_NDIM(array) != 1) {
 37 |         Py_DECREF(array);
 38 |         PyErr_SetString(PyExc_ValueError, "Input must be a 1D array");
 39 |         return NULL;
 40 |     }
 41 | 
 42 |     const npy_intp length = PyArray_DIM(array, 0);
 43 |     const npy_intp output_length = length - 1;
 44 |     
 45 |     // Create output array
 46 |     npy_intp dims[1] = {output_length};
 47 |     PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 48 |     if (!result_array) {
 49 |         Py_DECREF(array);
 50 |         return NULL;
 51 |     }
 52 | 
 53 |     // Get data pointers
 54 |     double* input_data = (double*)PyArray_DATA(array);
 55 |     double* output_data = (double*)PyArray_DATA(result_array);
 56 | 
 57 |     // Calculate differences
 58 |     calculate_differences(input_data, output_data, length);
 59 | 
 60 |     Py_DECREF(array);
 61 |     return (PyObject*)result_array;
 62 | }
 63 | 
 64 | static PyObject* signaturespace(PyObject* self, PyObject* args, PyObject* kwargs) {
 65 |     PyObject* input_matrix;
 66 |     int E;
 67 |     int relative = 1;  // Default to relative (1 for true)
 68 |     
 69 |     static char* kwlist[] = {"input_matrix", "E", "relative", NULL};
 70 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oi|p", kwlist, 
 71 |                                     &input_matrix, &E, &relative)) {
 72 |         return NULL;
 73 |     }
 74 | 
 75 |     // Validate parameters
 76 |     if (E < 2) {
 77 |         PyErr_SetString(PyExc_ValueError, "State space matrix must have at least 2 columns");
 78 |         return NULL;
 79 |     }
 80 | 
 81 |     // Convert input to numpy array
 82 |     PyArrayObject* array = (PyArrayObject*)PyArray_FROM_OTF(
 83 |         input_matrix,
 84 |         NPY_DOUBLE,
 85 |         NPY_ARRAY_IN_ARRAY
 86 |     );
 87 |     if (!array) {
 88 |         PyErr_SetString(PyExc_ValueError, "Input must be a matrix");
 89 |         return NULL;
 90 |     }
 91 | 
 92 |     // Validate dimensions
 93 |     if (PyArray_NDIM(array) != 2) {
 94 |         Py_DECREF(array);
 95 |         PyErr_SetString(PyExc_ValueError, "Input must be a matrix");
 96 |         return NULL;
 97 |     }
 98 | 
 99 |     const npy_intp rows = PyArray_DIM(array, 0);
100 |     const npy_intp cols = PyArray_DIM(array, 1);
101 |     const npy_intp output_cols = cols - 1;
102 | 
103 |     // Handle empty input
104 |     if (rows == 0) {
105 |         Py_DECREF(array);
106 |         npy_intp dims[2] = {0, output_cols};
107 |         return (PyObject*)PyArray_SimpleNew(2, dims, NPY_DOUBLE);
108 |     }
109 | 
110 |     // Create output array
111 |     npy_intp out_dims[2] = {rows, output_cols};
112 |     PyArrayObject* result_matrix = (PyArrayObject*)PyArray_SimpleNew(2, out_dims, NPY_DOUBLE);
113 |     if (!result_matrix) {
114 |         Py_DECREF(array);
115 |         return NULL;
116 |     }
117 | 
118 |     // Get data pointers
119 |     const double* input_data = (double*)PyArray_DATA(array);
120 |     double* output_data = (double*)PyArray_DATA(result_matrix);
121 | 
122 |     // Calculate differences for each row
123 |     for (npy_intp i = 0; i < rows; i++) {
124 |         const double* input_row = input_data + i * cols;
125 |         double* output_row = output_data + i * output_cols;
126 |         
127 |         for (npy_intp j = 0; j < output_cols; j++) {
128 |             if (std::isnan(input_row[j]) || std::isnan(input_row[j + 1])) {
129 |                 output_row[j] = std::numeric_limits<double>::quiet_NaN();
130 |             } else {
131 |                 if (relative) {
132 |                     // Relative change: (new - old) / old
133 |                     // Exactly match R's behavior: no special handling for zero values
134 |                     output_row[j] = (input_row[j + 1] - input_row[j]) / input_row[j];
135 |                 } else {
136 |                     // Absolute change: new - old
137 |                     output_row[j] = input_row[j + 1] - input_row[j];
138 |                 }
139 |             }
140 |         }
141 |     }
142 | 
143 |     Py_DECREF(array);
144 |     return (PyObject*)result_matrix;
145 | }
146 | 
147 | // Module method definitions
148 | static PyMethodDef SignatureSpaceMethods[] = {
149 |     {"signatureVectorDifference", signatureVectorDifference, METH_VARARGS,
150 |      "Calculate differences between successive elements using SIMD optimization"},
151 |     {"signaturespace", (PyCFunction)signaturespace, METH_VARARGS | METH_KEYWORDS,
152 |      "Calculate signature space matrix with parallel processing and SIMD optimization.\n"
153 |      "Args:\n"
154 |      "    input_matrix: Input 2D array\n"
155 |      "    E: Embedding dimension\n"
156 |      "    relative: If True, calculate relative differences (new-old)/old, otherwise absolute differences (new-old). Default is False."},
157 |     {NULL, NULL, 0, NULL}
158 | };
159 | 
160 | // Module definition
161 | static struct PyModuleDef signaturespacemodule = {
162 |     PyModuleDef_HEAD_INIT,
163 |     "signaturespace",
164 |     "Optimized signature space calculation module",
165 |     -1,
166 |     SignatureSpaceMethods
167 | };
168 | 
169 | // Module initialization
170 | PyMODINIT_FUNC PyInit_signaturespace(void) {
171 |     import_array();
172 |     return PyModule_Create(&signaturespacemodule);
173 | }


--------------------------------------------------------------------------------
/pattern_causality/cpp/statespace.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
  3 | #include <Python.h>
  4 | #include <vector>
  5 | #include <numpy/arrayobject.h>
  6 | #include <limits>
  7 | #include <cmath>
  8 | #include <algorithm>
  9 | #include <memory>
 10 | 
 11 | // Thread-local storage for reusable buffers
 12 | thread_local std::vector<double> ts_buffer;
 13 | 
 14 | // Optimized conversion from Python object to double
 15 | static inline double convert_to_double(PyObject* item, bool& success) {
 16 |     if (PyFloat_Check(item)) {
 17 |         success = true;
 18 |         return PyFloat_AS_DOUBLE(item);
 19 |     } else if (PyLong_Check(item)) {
 20 |         success = true;
 21 |         return (double)PyLong_AsLongLong(item);
 22 |     } else {
 23 |         PyObject* float_obj = PyNumber_Float(item);
 24 |         if (!float_obj) {
 25 |             success = false;
 26 |             return 0.0;
 27 |         }
 28 |         double result = PyFloat_AS_DOUBLE(float_obj);
 29 |         Py_DECREF(float_obj);
 30 |         success = true;
 31 |         return result;
 32 |     }
 33 | }
 34 | 
 35 | // Fast check for numpy array contiguity and type
 36 | static inline bool check_array_valid(PyArrayObject* arr) {
 37 |     return (PyArray_ISCARRAY_RO(arr) && 
 38 |             (PyArray_TYPE(arr) == NPY_DOUBLE || 
 39 |              PyArray_TYPE(arr) == NPY_FLOAT ||
 40 |              PyArray_TYPE(arr) == NPY_INT64 ||
 41 |              PyArray_TYPE(arr) == NPY_INT32));
 42 | }
 43 | 
 44 | static PyObject* stateSpace(PyObject* self, PyObject* args) {
 45 |     PyObject* ts_obj;
 46 |     int E, tau;
 47 |     
 48 |     // Parse Python arguments
 49 |     if (!PyArg_ParseTuple(args, "Oii", &ts_obj, &E, &tau)) {
 50 |         return NULL;
 51 |     }
 52 | 
 53 |     // Quick parameter validation
 54 |     if (E < 2 || tau < 1) {
 55 |         PyErr_SetString(PyExc_ValueError, "E must be >= 2 and tau must be >= 1");
 56 |         return NULL;
 57 |     }
 58 | 
 59 |     // Get input type and length
 60 |     const bool is_list = PyList_Check(ts_obj);
 61 |     const bool is_array = PyArray_Check(ts_obj);
 62 |     if (!is_list && !is_array) {
 63 |         PyErr_SetString(PyExc_TypeError, "Input must be a list or numpy array");
 64 |         return NULL;
 65 |     }
 66 | 
 67 |     // Get length of input time series
 68 |     const Py_ssize_t ts_len = is_list ? PyList_Size(ts_obj) : PyArray_SIZE((PyArrayObject*)ts_obj);
 69 | 
 70 |     // Check minimum length requirement
 71 |     if (ts_len < (E - 1) * tau + 1) {
 72 |         PyErr_SetString(PyExc_ValueError, "Time series too short for given E and tau");
 73 |         return NULL;
 74 |     }
 75 | 
 76 |     // Calculate output dimensions
 77 |     const npy_intp rows = ts_len - (E - 1) * tau;
 78 |     const npy_intp cols = E;
 79 |     npy_intp dims[2] = {rows, cols};
 80 | 
 81 |     // Create output array with alignment
 82 |     PyArrayObject* result_array = (PyArrayObject*)PyArray_SimpleNew(2, dims, NPY_DOUBLE);
 83 |     if (!result_array) {
 84 |         PyErr_SetString(PyExc_MemoryError, "Failed to create output array");
 85 |         return NULL;
 86 |     }
 87 | 
 88 |     // Resize thread-local buffer if needed
 89 |     if (static_cast<Py_ssize_t>(ts_buffer.size()) < ts_len) {
 90 |         ts_buffer.resize(static_cast<size_t>(ts_len));
 91 |     }
 92 | 
 93 |     // Get data pointers
 94 |     double* const data = (double*)PyArray_DATA(result_array);
 95 |     double* const ts_data = ts_buffer.data();
 96 | 
 97 |     // Convert input to double array using the most efficient method
 98 |     if (is_list) {
 99 |         #pragma omp parallel for schedule(static)
100 |         for (Py_ssize_t i = 0; i < ts_len; i++) {
101 |             PyObject* item = PyList_GET_ITEM(ts_obj, i);
102 |             bool success = true;
103 |             ts_data[i] = convert_to_double(item, success);
104 |             if (!success) {
105 |                 PyErr_SetString(PyExc_TypeError, "All elements must be numeric");
106 |                 // Note: Cannot return NULL here due to OpenMP, error will be checked later
107 |             }
108 |         }
109 |         if (PyErr_Occurred()) {
110 |             Py_DECREF(result_array);
111 |             return NULL;
112 |         }
113 |     } else {
114 |         PyArrayObject* arr = (PyArrayObject*)ts_obj;
115 |         if (!check_array_valid(arr)) {
116 |             arr = (PyArrayObject*)PyArray_FROM_OTF(ts_obj, NPY_DOUBLE, 
117 |                 NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED | NPY_ARRAY_FORCECAST);
118 |             if (!arr) {
119 |                 Py_DECREF(result_array);
120 |                 return NULL;
121 |             }
122 |             memcpy(ts_data, PyArray_DATA(arr), ts_len * sizeof(double));
123 |             Py_DECREF(arr);
124 |         } else {
125 |             memcpy(ts_data, PyArray_DATA(arr), ts_len * sizeof(double));
126 |         }
127 |     }
128 | 
129 |     // Fill state space matrix using optimized parallel processing
130 |     const npy_intp block_size = std::max<npy_intp>(1, 1024 / E); // Optimize cache usage
131 |     #pragma omp parallel
132 |     {
133 |         #pragma omp for schedule(static) collapse(2)
134 |         for (npy_intp i = 0; i < rows; i += block_size) {
135 |             for (npy_intp j = 0; j < E; j++) {
136 |                 const npy_intp block_end = std::min<npy_intp>(i + block_size, rows);
137 |                 #pragma omp simd
138 |                 for (npy_intp k = i; k < block_end; k++) {
139 |                     const npy_intp idx = k + j * tau;
140 |                     const double val = ts_data[idx];
141 |                     data[k * E + j] = std::isfinite(val) ? val : std::numeric_limits<double>::quiet_NaN();
142 |                 }
143 |             }
144 |         }
145 |     }
146 | 
147 |     return (PyObject*)result_array;
148 | }
149 | 
150 | // Method definition
151 | static PyMethodDef StateSpaceMethods[] = {
152 |     {"statespace", stateSpace, METH_VARARGS, 
153 |      "Create state space matrix from time series using embedding parameters E and tau"},
154 |     {NULL, NULL, 0, NULL}
155 | };
156 | 
157 | // Module definition
158 | static struct PyModuleDef statespacemodule = {
159 |     PyModuleDef_HEAD_INIT,
160 |     "statespace",
161 |     "Optimized state space embedding module",
162 |     -1,
163 |     StateSpaceMethods
164 | };
165 | 
166 | // Module initialization
167 | PyMODINIT_FUNC PyInit_statespace(void) {
168 |     import_array();
169 |     return PyModule_Create(&statespacemodule);
170 | }
171 | 
172 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/pastNNs.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <vector>
  5 | #include <algorithm>
  6 | #include <cmath>
  7 | #include <limits>
  8 | 
  9 | // Pre-allocated buffer size
 10 | constexpr size_t INITIAL_BUFFER_SIZE = 1024;
 11 | 
 12 | // Reusable buffer
 13 | static std::vector<std::pair<double, int>> candidate_buffer;
 14 | static std::vector<int> nn_indices_buffer;
 15 | static std::vector<double> dists_buffer;
 16 | 
 17 | static PyObject* pastNNs(PyObject* self, PyObject* args) {
 18 |     int ccspan, nnspan, i, h;
 19 |     PyObject *mx_obj, *dx_obj, *smx_obj, *psmx_obj;
 20 |     
 21 |     if (!PyArg_ParseTuple(args, "iiOOOOii", &ccspan, &nnspan, 
 22 |                          &mx_obj, &dx_obj, &smx_obj, &psmx_obj, &i, &h)) {
 23 |         return NULL;
 24 |     }
 25 |     
 26 |     // Convert inputs to numpy arrays
 27 |     PyArrayObject* mx_array = (PyArrayObject*)PyArray_FROM_OTF(mx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 28 |     PyArrayObject* dx_array = (PyArrayObject*)PyArray_FROM_OTF(dx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 29 |     PyArrayObject* smx_array = (PyArrayObject*)PyArray_FROM_OTF(smx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 30 |     PyArrayObject* psmx_array = (PyArrayObject*)PyArray_FROM_OTF(psmx_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 31 |     
 32 |     if (!mx_array || !dx_array || !smx_array || !psmx_array) {
 33 |         Py_XDECREF(mx_array);
 34 |         Py_XDECREF(dx_array);
 35 |         Py_XDECREF(smx_array);
 36 |         Py_XDECREF(psmx_array);
 37 |         return NULL;
 38 |     }
 39 |     
 40 |     // Get array dimensions
 41 |     const npy_intp* mx_dims = PyArray_DIMS(mx_array);
 42 |     const npy_intp* smx_dims = PyArray_DIMS(smx_array);
 43 |     const npy_intp mx_stride_0 = PyArray_STRIDE(mx_array, 0) / sizeof(double);
 44 |     const npy_intp smx_stride_0 = PyArray_STRIDE(smx_array, 0) / sizeof(double);
 45 |     const npy_intp psmx_stride = PyArray_STRIDE(psmx_array, 0) / sizeof(double);
 46 |     
 47 |     // Get data pointers
 48 |     double* mx_data = (double*)PyArray_DATA(mx_array);
 49 |     double* dx_data = (double*)PyArray_DATA(dx_array);
 50 |     double* smx_data = (double*)PyArray_DATA(smx_array);
 51 |     double* psmx_data = (double*)PyArray_DATA(psmx_array);
 52 |     
 53 |     // Find valid indices
 54 |     std::vector<int> valid_indices;
 55 |     valid_indices.reserve(mx_dims[0]);
 56 |     
 57 |     for (npy_intp j = 0; j < i - ccspan; j++) {
 58 |         bool valid = true;
 59 |         // Check for NaN in state space
 60 |         for (npy_intp k = 0; k < mx_dims[1]; k++) {
 61 |             if (std::isnan(mx_data[j * mx_stride_0 + k])) {
 62 |                 valid = false;
 63 |                 break;
 64 |             }
 65 |         }
 66 |         // Check for NaN in distance matrix
 67 |         if (valid && std::isnan(dx_data[i * mx_dims[0] + j])) {
 68 |             valid = false;
 69 |         }
 70 |         if (valid) {
 71 |             valid_indices.push_back(j);
 72 |         }
 73 |     }
 74 |     
 75 |     // Sort indices by distance
 76 |     std::vector<std::pair<double, int>> distances;
 77 |     distances.reserve(valid_indices.size());
 78 |     
 79 |     for (int idx : valid_indices) {
 80 |         distances.push_back({dx_data[i * mx_dims[0] + idx], idx});
 81 |     }
 82 |     
 83 |     std::sort(distances.begin(), distances.end());
 84 |     
 85 |     // Take only nnspan nearest neighbors
 86 |     const size_t sort_size = std::min(static_cast<size_t>(nnspan), distances.size());
 87 |     
 88 |     // Create output arrays
 89 |     npy_intp out_dims[] = {static_cast<npy_intp>(sort_size)};
 90 |     PyObject* times = PyArray_SimpleNew(1, out_dims, NPY_LONG);
 91 |     PyObject* dists = PyArray_SimpleNew(1, out_dims, NPY_DOUBLE);
 92 |     
 93 |     npy_intp sig_dims[] = {static_cast<npy_intp>(sort_size), smx_dims[1]};
 94 |     PyObject* signatures = PyArray_SimpleNew(2, sig_dims, NPY_DOUBLE);
 95 |     
 96 |     npy_intp pat_dims[] = {static_cast<npy_intp>(sort_size), 1};
 97 |     PyObject* patterns = PyArray_SimpleNew(2, pat_dims, NPY_DOUBLE);
 98 |     
 99 |     npy_intp coord_dims[] = {static_cast<npy_intp>(sort_size), mx_dims[1]};
100 |     PyObject* coordinates = PyArray_SimpleNew(2, coord_dims, NPY_DOUBLE);
101 |     
102 |     if (!times || !dists || !signatures || !patterns || !coordinates) {
103 |         Py_XDECREF(times);
104 |         Py_XDECREF(dists);
105 |         Py_XDECREF(signatures);
106 |         Py_XDECREF(patterns);
107 |         Py_XDECREF(coordinates);
108 |         Py_DECREF(mx_array);
109 |         Py_DECREF(dx_array);
110 |         Py_DECREF(smx_array);
111 |         Py_DECREF(psmx_array);
112 |         return NULL;
113 |     }
114 |     
115 |     // Fill output arrays
116 |     long* times_data = (long*)PyArray_DATA((PyArrayObject*)times);
117 |     double* dists_data = (double*)PyArray_DATA((PyArrayObject*)dists);
118 |     double* signatures_data = (double*)PyArray_DATA((PyArrayObject*)signatures);
119 |     double* patterns_data = (double*)PyArray_DATA((PyArrayObject*)patterns);
120 |     double* coordinates_data = (double*)PyArray_DATA((PyArrayObject*)coordinates);
121 |     
122 |     for (size_t j = 0; j < sort_size; j++) {
123 |         const int idx = distances[j].second;
124 |         times_data[j] = idx;
125 |         dists_data[j] = distances[j].first;
126 |         
127 |         // Copy signatures
128 |         for (npy_intp k = 0; k < smx_dims[1]; k++) {
129 |             signatures_data[j * smx_dims[1] + k] = smx_data[idx * smx_stride_0 + k];
130 |         }
131 |         
132 |         // Copy pattern
133 |         patterns_data[j] = psmx_data[idx * psmx_stride];
134 |         
135 |         // Copy coordinates
136 |         for (npy_intp k = 0; k < mx_dims[1]; k++) {
137 |             coordinates_data[j * mx_dims[1] + k] = mx_data[idx * mx_stride_0 + k];
138 |         }
139 |     }
140 |     
141 |     // Create return dictionary
142 |     PyObject* result = PyDict_New();
143 |     if (!result) {
144 |         Py_DECREF(times);
145 |         Py_DECREF(dists);
146 |         Py_DECREF(signatures);
147 |         Py_DECREF(patterns);
148 |         Py_DECREF(coordinates);
149 |         Py_DECREF(mx_array);
150 |         Py_DECREF(dx_array);
151 |         Py_DECREF(smx_array);
152 |         Py_DECREF(psmx_array);
153 |         return NULL;
154 |     }
155 |     
156 |     PyDict_SetItemString(result, "times", times);
157 |     PyDict_SetItemString(result, "dists", dists);
158 |     PyDict_SetItemString(result, "signatures", signatures);
159 |     PyDict_SetItemString(result, "patterns", patterns);
160 |     PyDict_SetItemString(result, "coordinates", coordinates);
161 |     
162 |     Py_DECREF(times);
163 |     Py_DECREF(dists);
164 |     Py_DECREF(signatures);
165 |     Py_DECREF(patterns);
166 |     Py_DECREF(coordinates);
167 |     Py_DECREF(mx_array);
168 |     Py_DECREF(dx_array);
169 |     Py_DECREF(smx_array);
170 |     Py_DECREF(psmx_array);
171 |     
172 |     return result;
173 | }
174 | 
175 | static PyMethodDef PastNNsMethods[] = {
176 |     {"pastNNs", (PyCFunction)pastNNs, METH_VARARGS,
177 |      "Get information about past nearest neighbors"},
178 |     {NULL, NULL, 0, NULL}
179 | };
180 | 
181 | static struct PyModuleDef pastnnsmodule = {
182 |     PyModuleDef_HEAD_INIT,
183 |     "pastNNs",
184 |     "Past nearest neighbors calculation module",
185 |     -1,
186 |     PastNNsMethods
187 | };
188 | 
189 | PyMODINIT_FUNC PyInit_pastNNs(void) {
190 |     import_array();
191 |     return PyModule_Create(&pastnnsmodule);
192 | }
193 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/natureOfCausality.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <cmath>
  5 | #include <limits>
  6 | 
  7 | static PyObject* natureOfCausality(PyObject* self, PyObject* args) {
  8 |     PyObject *pc_obj, *dur_obj, *hashed_obj, *x_obj;
  9 |     PyObject* weighted_obj;
 10 |     
 11 |     if (!PyArg_ParseTuple(args, "OOOOO", &pc_obj, &dur_obj, &hashed_obj, &x_obj, &weighted_obj)) {
 12 |         return NULL;
 13 |     }
 14 |     
 15 |     // Convert inputs to numpy arrays
 16 |     PyArrayObject* pc_arr = (PyArrayObject*)PyArray_FROM_OTF(pc_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 17 |     PyArrayObject* dur_arr = (PyArrayObject*)PyArray_FROM_OTF(dur_obj, NPY_LONG, NPY_ARRAY_IN_ARRAY);
 18 |     PyArrayObject* hashed_arr = (PyArrayObject*)PyArray_FROM_OTF(hashed_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 19 |     PyArrayObject* x_arr = (PyArrayObject*)PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 20 |     
 21 |     if (!pc_arr || !dur_arr || !hashed_arr || !x_arr) {
 22 |         Py_XDECREF(pc_arr);
 23 |         Py_XDECREF(dur_arr);
 24 |         Py_XDECREF(hashed_arr);
 25 |         Py_XDECREF(x_arr);
 26 |         return NULL;
 27 |     }
 28 |     
 29 |     const bool weighted = PyObject_IsTrue(weighted_obj);
 30 |     
 31 |     // Get array dimensions
 32 |     const npy_intp* pc_dims = PyArray_DIMS(pc_arr);
 33 |     const npy_intp pc_stride_row = PyArray_STRIDE(pc_arr, 0) / sizeof(double);
 34 |     const npy_intp pc_stride_col = PyArray_STRIDE(pc_arr, 1) / sizeof(double);
 35 |     const npy_intp x_size = PyArray_SIZE(x_arr);
 36 |     
 37 |     // Create output arrays
 38 |     npy_intp dims[] = {x_size};
 39 |     PyArrayObject* positive_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 40 |     PyArrayObject* negative_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 41 |     PyArrayObject* dark_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 42 |     PyArrayObject* no_causality = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 43 |     
 44 |     if (!positive_causality || !negative_causality || !dark_causality || !no_causality) {
 45 |         Py_XDECREF(pc_arr);
 46 |         Py_XDECREF(dur_arr);
 47 |         Py_XDECREF(hashed_arr);
 48 |         Py_XDECREF(x_arr);
 49 |         Py_XDECREF(positive_causality);
 50 |         Py_XDECREF(negative_causality);
 51 |         Py_XDECREF(dark_causality);
 52 |         Py_XDECREF(no_causality);
 53 |         return NULL;
 54 |     }
 55 |     
 56 |     // Get data pointers for direct memory access
 57 |     double* pos_data = (double*)PyArray_DATA(positive_causality);
 58 |     double* neg_data = (double*)PyArray_DATA(negative_causality);
 59 |     double* dark_data = (double*)PyArray_DATA(dark_causality);
 60 |     double* no_data = (double*)PyArray_DATA(no_causality);
 61 |     double* pc_data = (double*)PyArray_DATA(pc_arr);
 62 |     long* dur_data = (long*)PyArray_DATA(dur_arr);
 63 |     
 64 |     // Initialize all arrays with NaN
 65 |     const double nan_value = std::numeric_limits<double>::quiet_NaN();
 66 |     for(npy_intp i = 0; i < x_size; i++) {
 67 |         pos_data[i] = neg_data[i] = dark_data[i] = no_data[i] = nan_value;
 68 |     }
 69 |     
 70 |     const npy_intp dur_size = PyArray_SIZE(dur_arr);
 71 |     const npy_intp hashed_size = PyArray_SIZE(hashed_arr);
 72 |     const npy_intp mean_pattern = hashed_size / 2;
 73 |     const double eps = std::numeric_limits<double>::epsilon();
 74 |     
 75 |     // Main computation loop
 76 |     for (npy_intp d = 0; d < dur_size; d++) {
 77 |         const long i = dur_data[d];
 78 |         
 79 |         bool found_valid = false;
 80 |         bool has_causality = false;
 81 |         double pos_val = 0.0;
 82 |         double neg_val = 0.0;
 83 |         double dark_val = 0.0;
 84 |         int valid_count = 0;
 85 |         
 86 |         // First pass: check if we have any valid values and count total non-NaN values
 87 |         for (npy_intp row = 0; row < pc_dims[0]; row++) {
 88 |             for (npy_intp col = 0; col < pc_dims[1]; col++) {
 89 |                 const double pc_val = pc_data[row * pc_stride_row + col * pc_stride_col + i];
 90 |                 if (!std::isnan(pc_val)) {
 91 |                     found_valid = true;
 92 |                     valid_count++;
 93 |                 }
 94 |             }
 95 |         }
 96 |         
 97 |         // Only proceed with causality calculation if we found valid values
 98 |         if (found_valid) {
 99 |             // Second pass: calculate causalities
100 |             for (npy_intp row = 0; row < pc_dims[0]; row++) {
101 |                 for (npy_intp col = 0; col < pc_dims[1]; col++) {
102 |                     const double pc_val = pc_data[row * pc_stride_row + col * pc_stride_col + i];
103 |                     
104 |                     if (!std::isnan(pc_val) && std::abs(pc_val) > eps) {
105 |                         has_causality = true;
106 |                         
107 |                         // Center diagonal element contributes to dark causality
108 |                         if (row == col && row == mean_pattern) {
109 |                             dark_val += weighted ? pc_val : 1.0;
110 |                         }
111 |                         // Other diagonal elements contribute to positive causality
112 |                         else if (row == col) {
113 |                             pos_val += weighted ? pc_val : 1.0;
114 |                         }
115 |                         // Anti-diagonal elements contribute to negative causality
116 |                         else if (row + col == hashed_size - 1) {
117 |                             neg_val += weighted ? pc_val : 1.0;
118 |                         }
119 |                         // All other elements contribute to dark causality
120 |                         else {
121 |                             dark_val += weighted ? pc_val : 1.0;
122 |                         }
123 |                     }
124 |                 }
125 |             }
126 |             
127 |             // Set values only if we found valid data
128 |             if (valid_count > 0) {
129 |                 no_data[i] = has_causality ? 0.0 : 1.0;
130 |                 pos_data[i] = pos_val;
131 |                 neg_data[i] = neg_val;
132 |                 dark_data[i] = dark_val;
133 |             }
134 |         }
135 |     }
136 |     
137 |     // Create return dictionary
138 |     PyObject* result = PyDict_New();
139 |     if (!result) {
140 |         Py_XDECREF(pc_arr);
141 |         Py_XDECREF(dur_arr);
142 |         Py_XDECREF(hashed_arr);
143 |         Py_XDECREF(x_arr);
144 |         Py_XDECREF(positive_causality);
145 |         Py_XDECREF(negative_causality);
146 |         Py_XDECREF(dark_causality);
147 |         Py_XDECREF(no_causality);
148 |         return NULL;
149 |     }
150 |     
151 |     PyDict_SetItemString(result, "noCausality", (PyObject*)no_causality);
152 |     PyDict_SetItemString(result, "Positive", (PyObject*)positive_causality);
153 |     PyDict_SetItemString(result, "Negative", (PyObject*)negative_causality);
154 |     PyDict_SetItemString(result, "Dark", (PyObject*)dark_causality);
155 |     
156 |     Py_DECREF(pc_arr);
157 |     Py_DECREF(dur_arr);
158 |     Py_DECREF(hashed_arr);
159 |     Py_DECREF(x_arr);
160 |     Py_DECREF(positive_causality);
161 |     Py_DECREF(negative_causality);
162 |     Py_DECREF(dark_causality);
163 |     Py_DECREF(no_causality);
164 |     
165 |     return result;
166 | }
167 | 
168 | static PyMethodDef NatureOfCausalityMethods[] = {
169 |     {"natureOfCausality", natureOfCausality, METH_VARARGS,
170 |      "Calculate nature of causality from PC matrix"},
171 |     {NULL, NULL, 0, NULL}
172 | };
173 | 
174 | static struct PyModuleDef natureOfCausalitymodule = {
175 |     PyModuleDef_HEAD_INIT,
176 |     "natureOfCausality",
177 |     NULL,
178 |     -1,
179 |     NatureOfCausalityMethods
180 | };
181 | 
182 | PyMODINIT_FUNC PyInit_natureOfCausality(void) {
183 |     import_array();
184 |     return PyModule_Create(&natureOfCausalitymodule);
185 | }
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # pattern_causality_py
  2 | 
  3 | [![PyPI version](https://badge.fury.io/py/pattern-causality.svg)](https://badge.fury.io/py/pattern-causality)
  4 | [![PyPI Downloads](https://static.pepy.tech/badge/pattern-causality)](https://pepy.tech/project/pattern-causality)
  5 | [![Tests](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/tests.yml/badge.svg)](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/tests.yml)
  6 | [![Lint](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/lint.yml/badge.svg)](https://github.com/skstavroglou/pattern_causality_py/actions/workflows/lint.yml)
  7 | [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
  8 | [![Python](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/)
  9 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 10 | 
 11 | ## Overview
 12 | 
 13 | `pattern_causality` is a comprehensive Python library that implements the Pattern Causality algorithm for analyzing causal relationships in time series data. This package provides efficient tools for detecting and quantifying causality patterns between multiple time series, with a particular focus on nonlinear complex systems.
 14 | 
 15 | ## Key Features
 16 | 
 17 | - **Efficient C++ Implementation**: Core algorithms implemented in C++ for maximum performance
 18 | - **Comprehensive Analysis Tools**: 
 19 |   - Basic pattern causality analysis
 20 |   - Multivariate time series analysis
 21 |   - Cross-validation capabilities
 22 |   - Parameter optimization
 23 |   - Effect metrics calculation
 24 | - **Built-in Dataset**: Includes climate indices dataset for demonstration
 25 | - **OpenMP Support**: Parallel processing for improved performance
 26 | - **Extensive Testing**: Comprehensive test suite with high coverage
 27 | 
 28 | ## System Requirements
 29 | 
 30 | - Python 3.8 or later
 31 | - C++ compiler with C++11 support
 32 | - OpenMP support (for parallel processing)
 33 | - NumPy 1.19.0 or later
 34 | - Pandas 1.0.0 or later
 35 | 
 36 | ## Changelog
 37 | 
 38 | ### Version 1.0.3 (2024-02-15)
 39 | - Fixed integer type conversion issue in natureOfCausality function for Windows compatibility
 40 | - Improved type handling for array data in pattern causality calculations
 41 | - Enhanced cross-platform compatibility for integer types
 42 | 
 43 | ### Version 1.0.2 (2024-02-15)
 44 | - Changed default behavior to use relative differences (relative=True by default)
 45 | - Added relative parameter to signaturespace for choosing between relative and absolute differences
 46 | - Enhanced documentation for the new parameter
 47 | - Improved backward compatibility with absolute difference mode (relative=False)
 48 | 
 49 | ### Version 1.0.1 (2024-02-14)
 50 | - Fixed type conversion issue in natureOfCausality function
 51 | - Improved compatibility with different system architectures by using np.int_
 52 | - Enhanced stability for array data type handling
 53 | - Fixed Python 3.8 compatibility issue with numpy integer types
 54 | 
 55 | ## Installation
 56 | 
 57 | ### Via pip (Recommended)
 58 | ```bash
 59 | pip install pattern-causality
 60 | ```
 61 | 
 62 | ### Via pip + git
 63 | ```bash
 64 | pip install git+https://github.com/skstavroglou/pattern_causality_py.git
 65 | ```
 66 | 
 67 | ### From Source
 68 | #### Prerequisites
 69 | 
 70 | #### On Ubuntu/Debian:
 71 | ```bash
 72 | sudo apt-get update
 73 | sudo apt-get install -y g++ python3-dev libomp-dev build-essential
 74 | ```
 75 | 
 76 | #### On macOS:
 77 | ```bash
 78 | brew install libomp
 79 | ```
 80 | 
 81 | ### Installing the Package
 82 | 
 83 | ```bash
 84 | # Install required Python packages
 85 | python -m pip install numpy pandas
 86 | 
 87 | # Install pattern-causality
 88 | python -m pip install -e .
 89 | ```
 90 | 
 91 | ## Usage Examples
 92 | 
 93 | ### Basic Usage
 94 | 
 95 | ```python
 96 | from pattern_causality import pattern_causality, load_data
 97 | 
 98 | # Load the included climate indices dataset
 99 | data = load_data()
100 | 
101 | # Initialize pattern causality analyzer
102 | pc = pattern_causality(verbose=True)
103 | 
104 | # Analyze causality between NAO and AAO indices
105 | result = pc.pc_lightweight(
106 |     X=data["NAO"].values,
107 |     Y=data["AAO"].values,
108 |     E=3,          # embedding dimension
109 |     tau=1,        # time delay
110 |     metric="euclidean",
111 |     h=1,          # prediction horizon
112 |     weighted=True, # use weighted calculations
113 |     relative=True  # use relative differences (default)
114 | )
115 | 
116 | print(result)
117 | ```
118 | 
119 | ### Multivariate Analysis
120 | 
121 | ```python
122 | # Analyze causality patterns across multiple variables
123 | matrix_result = pc.pc_matrix(
124 |     dataset=data.drop(columns=["Date"]),
125 |     E=3,
126 |     tau=1,
127 |     metric="euclidean",
128 |     h=1,
129 |     weighted=True,
130 |     relative=True  # Using relative differences (default)
131 | )
132 | 
133 | print("Pattern Causality Matrix Results:")
134 | print(matrix_result)
135 | ```
136 | 
137 | ### Parameter Optimization
138 | 
139 | ```python
140 | # Find optimal parameters
141 | optimal_params = pc.optimal_parameters_search(
142 |     Emax=5,
143 |     tau_max=3,
144 |     metric="euclidean",
145 |     h=1,
146 |     dataset=data.drop(columns=["Date"])
147 | )
148 | 
149 | print("Optimal Parameters:")
150 | print(optimal_params)
151 | ```
152 | 
153 | ### Cross Validation
154 | 
155 | ```python
156 | # Perform cross-validation
157 | cv_results = pc.pc_cross_validation(
158 |     X=data["NAO"].values,
159 |     Y=data["AAO"].values,
160 |     E=3,
161 |     tau=1,
162 |     metric="euclidean",
163 |     h=1,
164 |     weighted=True,
165 |     numberset=[100, 200, 300]
166 | )
167 | 
168 | print("Cross-validation Results:")
169 | print(cv_results)
170 | ```
171 | 
172 | ## Development
173 | 
174 | ### Setting Up Development Environment
175 | 
176 | 1. Clone the repository:
177 | ```bash
178 | git clone https://github.com/skstavroglou/pattern_causality_py.git
179 | cd pattern_causality_py
180 | ```
181 | 
182 | 2. Create and activate a virtual environment:
183 | ```bash
184 | python -m venv venv
185 | source venv/bin/activate  # On Unix/macOS
186 | # or
187 | .\venv\Scripts\activate  # On Windows
188 | ```
189 | 
190 | 3. Install development dependencies:
191 | ```bash
192 | python -m pip install -e ".[dev]"
193 | ```
194 | 
195 | ### Running Tests
196 | 
197 | ```bash
198 | # Run tests with coverage
199 | python -m pytest tests/ --cov=pattern_causality -v
200 | ```
201 | 
202 | ### Code Style
203 | 
204 | The project uses:
205 | - Black for code formatting
206 | - isort for import sorting
207 | - flake8 for linting
208 | - mypy for type checking
209 | 
210 | To check code style:
211 | ```bash
212 | black .
213 | isort .
214 | flake8 .
215 | mypy pattern_causality
216 | ```
217 | 
218 | ## Contributing
219 | 
220 | Contributions are welcome! Please follow these steps:
221 | 
222 | 1. Fork the repository
223 | 2. Create a feature branch (`git checkout -b feature/amazing-feature`)
224 | 3. Make your changes
225 | 4. Run the test suite
226 | 5. Commit your changes (`git commit -m 'Add amazing feature'`)
227 | 6. Push to the branch (`git push origin feature/amazing-feature`)
228 | 7. Open a Pull Request
229 | 
230 | ## References
231 | 
232 | - Stavroglou, S. K., Pantelous, A. A., Stanley, H. E., & Zuev, K. M. (2019). Hidden interactions in financial markets. _Proceedings of the National Academy of Sciences, 116(22)_, 10646-10651.
233 | - Stavroglou, S. K., Pantelous, A. A., Stanley, H. E., & Zuev, K. M. (2020). Unveiling causal interactions in complex systems. _Proceedings of the National Academy of Sciences, 117(14)_, 7599-7605.
234 | - Stavroglou, S. K., Ayyub, B. M., Kallinterakis, V., Pantelous, A. A., & Stanley, H. E. (2021). A novel causal risk‐based decision‐making methodology: The case of coronavirus. _Risk Analysis, 41(5)_, 814-830.
235 | 
236 | ## License
237 | 
238 | This project is licensed under the BSD 3-Clause License - see the [LICENSE](LICENSE) file for details.


--------------------------------------------------------------------------------
/pattern_causality/cpp/predictionY.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <vector>
  5 | #include <cmath>
  6 | #include <limits>
  7 | #include <array>
  8 | 
  9 | // Pre-compute factorials for common cases
 10 | static constexpr size_t MAX_FACTORIAL_CACHE = 10;
 11 | static const std::array<int, MAX_FACTORIAL_CACHE> factorial_cache = []() {
 12 |     std::array<int, MAX_FACTORIAL_CACHE> cache{};
 13 |     cache[0] = 1;
 14 |     for(size_t i = 1; i < MAX_FACTORIAL_CACHE; ++i) {
 15 |         cache[i] = cache[i-1] * i;
 16 |     }
 17 |     return cache;
 18 | }();
 19 | 
 20 | // Optimized factorial calculation with cache
 21 | static inline int factorial(int n) {
 22 |     if (n < 0) return 1;  // Handle error case
 23 |     if (n < MAX_FACTORIAL_CACHE) {
 24 |         return factorial_cache[n];
 25 |     }
 26 |     int result = factorial_cache[MAX_FACTORIAL_CACHE - 1];
 27 |     for(int i = MAX_FACTORIAL_CACHE; i <= n; ++i) {
 28 |         result *= i;
 29 |     }
 30 |     return result;
 31 | }
 32 | 
 33 | // Optimized hashing function with SIMD hints
 34 | static inline int hashing(const std::vector<int>& vec) {
 35 |     int hash = 0;
 36 |     const size_t size = vec.size();
 37 |     #pragma omp simd reduction(+:hash)
 38 |     for (size_t i = 0; i < size; i++) {
 39 |         hash += vec[i] * factorial(i + 2);
 40 |     }
 41 |     return hash;
 42 | }
 43 | 
 44 | // Thread-local storage for reusable vectors
 45 | thread_local std::vector<int> p_vec_buffer;
 46 | 
 47 | static inline int pattern_vector_difference(const std::vector<double>& sVec) {
 48 |     // Quick check for NaN values
 49 |     for (size_t i = 0; i < sVec.size(); ++i) {
 50 |         if (std::isnan(sVec[i])) {
 51 |             return 0;
 52 |         }
 53 |     }
 54 |     
 55 |     // Reuse pre-allocated vector
 56 |     if (p_vec_buffer.capacity() < sVec.size()) {
 57 |         p_vec_buffer.reserve(sVec.size());
 58 |     }
 59 |     p_vec_buffer.clear();
 60 |     
 61 |     // Convert to pattern values
 62 |     for (size_t i = 0; i < sVec.size(); ++i) {
 63 |         p_vec_buffer.push_back(sVec[i] > 0 ? 3 : (sVec[i] < 0 ? 1 : 2));
 64 |     }
 65 |     
 66 |     return hashing(p_vec_buffer);
 67 | }
 68 | 
 69 | static PyObject* predictionY(PyObject* self, PyObject* args, PyObject* kwargs) {
 70 |     long E;
 71 |     PyObject* projNNy;
 72 |     PyObject* zeroTolerance_obj = Py_None;
 73 |     
 74 |     // Use char* instead of const char* for PyArg_ParseTupleAndKeywords compatibility
 75 |     static char* const_cast_kwlist[] = {
 76 |         const_cast<char*>("E"),
 77 |         const_cast<char*>("projNNy"),
 78 |         const_cast<char*>("zeroTolerance"),
 79 |         nullptr
 80 |     };
 81 |     
 82 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "lO|O", const_cast_kwlist, 
 83 |                                     &E, &projNNy, &zeroTolerance_obj)) {
 84 |         return NULL;
 85 |     }
 86 | 
 87 |     // Optimize default value calculation
 88 |     const double zeroTolerance = (zeroTolerance_obj == Py_None) ? 
 89 |                                 (E + 1.0) / 2.0 : 
 90 |                                 PyFloat_AsDouble(zeroTolerance_obj);
 91 |     
 92 |     if (PyErr_Occurred()) return NULL;
 93 | 
 94 |     // Get dictionary items with error checking
 95 |     PyObject* signatures = PyDict_GetItemString(projNNy, "signatures");
 96 |     PyObject* weights = PyDict_GetItemString(projNNy, "weights");
 97 |     
 98 |     if (!signatures || !weights) {
 99 |         PyErr_SetString(PyExc_KeyError, "projNNy must contain 'signatures' and 'weights' keys");
100 |         return NULL;
101 |     }
102 | 
103 |     // Convert to numpy arrays with error checking
104 |     PyArrayObject* signatures_array = (PyArrayObject*)PyArray_FROM_OTF(signatures, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
105 |     PyArrayObject* weights_array = (PyArrayObject*)PyArray_FROM_OTF(weights, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
106 |     
107 |     if (!signatures_array || !weights_array) {
108 |         Py_XDECREF(signatures_array);
109 |         Py_XDECREF(weights_array);
110 |         PyErr_SetString(PyExc_TypeError, "Failed to convert signatures or weights to numpy array");
111 |         return NULL;
112 |     }
113 | 
114 |     // Pre-allocate vector with proper size
115 |     std::vector<double> predictedSignatureY;
116 |     predictedSignatureY.reserve(E >= 3 ? E - 1 : 1);
117 |     
118 |     double* sig_data = (double*)PyArray_DATA(signatures_array);
119 |     double* weights_data = (double*)PyArray_DATA(weights_array);
120 |     npy_intp* sig_dims = PyArray_DIMS(signatures_array);
121 | 
122 |     if (E >= 3) {
123 |         predictedSignatureY.resize(E - 1, 0.0);
124 |         const npy_intp rows = sig_dims[0];
125 |         const npy_intp cols = sig_dims[1];
126 |         
127 |         // Optimize main calculation loop
128 |         #pragma omp parallel for
129 |         for(long part = 1; part <= E - 1; part++) {
130 |             int zero_count = 0;
131 |             double sum = 0.0;
132 |             
133 |             // Vectorized inner loop
134 |             #pragma omp simd reduction(+:zero_count,sum)
135 |             for(npy_intp i = 0; i < rows; i++) {
136 |                 const double sig_val = sig_data[i * cols + (part-1)];
137 |                 zero_count += (sig_val == 0.0);
138 |                 sum += sig_val * weights_data[i];
139 |             }
140 |             
141 |             predictedSignatureY[part-1] = (zero_count > zeroTolerance) ? 0.0 : sum;
142 |         }
143 |     } else {
144 |         predictedSignatureY.resize(1, 0.0);
145 |         const npy_intp total_elements = PyArray_SIZE(signatures_array);
146 |         
147 |         int zero_count = 0;
148 |         double sum = 0.0;
149 |         
150 |         // Vectorized calculation for E < 3 case
151 |         #pragma omp simd reduction(+:zero_count,sum)
152 |         for(npy_intp i = 0; i < total_elements; i++) {
153 |             zero_count += (sig_data[i] == 0.0);
154 |             sum += sig_data[i] * weights_data[i];
155 |         }
156 |         
157 |         predictedSignatureY[0] = (zero_count > zeroTolerance) ? 0.0 : sum;
158 |     }
159 | 
160 |     // Calculate pattern value
161 |     const int pattern_value = pattern_vector_difference(predictedSignatureY);
162 | 
163 |     // Create return objects
164 |     npy_intp sig_dims_out[] = {static_cast<npy_intp>(predictedSignatureY.size())};
165 |     PyObject* predictedSignatureY_array = PyArray_SimpleNew(1, sig_dims_out, NPY_DOUBLE);
166 |     if (!predictedSignatureY_array) {
167 |         Py_DECREF(signatures_array);
168 |         Py_DECREF(weights_array);
169 |         return NULL;
170 |     }
171 |     
172 |     // Fast memory copy
173 |     memcpy(PyArray_DATA((PyArrayObject*)predictedSignatureY_array),
174 |            predictedSignatureY.data(),
175 |            predictedSignatureY.size() * sizeof(double));
176 | 
177 |     PyObject* predictedPatternY = PyLong_FromLong(pattern_value);
178 |     if (!predictedPatternY) {
179 |         Py_DECREF(signatures_array);
180 |         Py_DECREF(weights_array);
181 |         Py_DECREF(predictedSignatureY_array);
182 |         return NULL;
183 |     }
184 | 
185 |     // Create return dictionary
186 |     PyObject* return_dict = PyDict_New();
187 |     if (!return_dict || 
188 |         PyDict_SetItemString(return_dict, "predictedSignatureY", predictedSignatureY_array) < 0 ||
189 |         PyDict_SetItemString(return_dict, "predictedPatternY", predictedPatternY) < 0) {
190 |         Py_XDECREF(return_dict);
191 |         Py_DECREF(signatures_array);
192 |         Py_DECREF(weights_array);
193 |         Py_DECREF(predictedPatternY);
194 |         Py_DECREF(predictedSignatureY_array);
195 |         return NULL;
196 |     }
197 | 
198 |     // Cleanup
199 |     Py_DECREF(signatures_array);
200 |     Py_DECREF(weights_array);
201 |     Py_DECREF(predictedPatternY);
202 |     Py_DECREF(predictedSignatureY_array);
203 | 
204 |     return return_dict;
205 | }
206 | 
207 | static PyMethodDef PredictionYMethods[] = {
208 |     {"predictionY", (PyCFunction)predictionY, METH_VARARGS | METH_KEYWORDS,
209 |      "Predict Y signature and pattern based on projected nearest neighbors"},
210 |     {NULL, NULL, 0, NULL}
211 | };
212 | 
213 | static struct PyModuleDef predictionymodule = {
214 |     PyModuleDef_HEAD_INIT,
215 |     "predictionY",
216 |     "Prediction Y calculation module",
217 |     -1,
218 |     PredictionYMethods
219 | };
220 | 
221 | PyMODINIT_FUNC PyInit_predictionY(void) {
222 |     import_array();
223 |     return PyModule_Create(&predictionymodule);
224 | }
225 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/projectedNNs.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <vector>
  5 | #include <cmath>
  6 | #include <algorithm>
  7 | #include <numeric>
  8 | 
  9 | // Thread-local storage for reusable vectors
 10 | thread_local std::vector<double> weights_2_buffer;
 11 | thread_local std::vector<double> exp_weights_buffer;
 12 | 
 13 | // Optimized weights calculation with SIMD support
 14 | static PyObject* weights_relative_to_distance(PyObject* dists_vec_obj) {
 15 |     PyArrayObject* dists_vec = (PyArrayObject*)PyArray_FROM_OTF(dists_vec_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
 16 |     if (!dists_vec) return NULL;
 17 | 
 18 |     const npy_intp n = PyArray_SIZE(dists_vec);
 19 |     const double* dists_data = (double*)PyArray_DATA(dists_vec);
 20 |     
 21 |     // Calculate sum using SIMD
 22 |     double w_total = 0.0;
 23 |     #pragma omp simd reduction(+:w_total)
 24 |     for(npy_intp i = 0; i < n; i++) {
 25 |         w_total += dists_data[i];
 26 |     }
 27 |     
 28 |     // Handle zero case
 29 |     w_total = (w_total == 0.0) ? 0.0001 : w_total;
 30 |     const double w_total_inv = 1.0 / w_total;
 31 |     
 32 |     // Reuse pre-allocated vectors
 33 |     if (weights_2_buffer.size() < n) {
 34 |         weights_2_buffer.resize(n);
 35 |         exp_weights_buffer.resize(n);
 36 |     }
 37 |     
 38 |     // Calculate weights_2 using SIMD
 39 |     #pragma omp simd
 40 |     for(npy_intp i = 0; i < n; i++) {
 41 |         weights_2_buffer[i] = dists_data[i] * w_total_inv;
 42 |     }
 43 |     
 44 |     // Calculate exponentials using SIMD
 45 |     double exp_sum = 0.0;
 46 |     #pragma omp simd reduction(+:exp_sum)
 47 |     for(npy_intp i = 0; i < n; i++) {
 48 |         exp_weights_buffer[i] = std::exp(-weights_2_buffer[i]);
 49 |         exp_sum += exp_weights_buffer[i];
 50 |     }
 51 |     
 52 |     // Prepare output array
 53 |     npy_intp dims[] = {n};
 54 |     PyObject* weights = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
 55 |     if (!weights) {
 56 |         Py_DECREF(dists_vec);
 57 |         return NULL;
 58 |     }
 59 |     
 60 |     // Calculate final weights using SIMD
 61 |     const double exp_sum_inv = 1.0 / exp_sum;
 62 |     double* weights_data = (double*)PyArray_DATA((PyArrayObject*)weights);
 63 |     #pragma omp simd
 64 |     for(npy_intp i = 0; i < n; i++) {
 65 |         weights_data[i] = exp_weights_buffer[i] * exp_sum_inv;
 66 |     }
 67 |     
 68 |     Py_DECREF(dists_vec);
 69 |     return weights;
 70 | }
 71 | 
 72 | // Optimized projectedNNs function
 73 | static PyObject* projectedNNs(PyObject* self, PyObject* args) {
 74 |     PyObject *my_obj, *dy_obj, *smy_obj, *psmy_obj, *times_x_obj;
 75 |     int i, h;
 76 |     
 77 |     if (!PyArg_ParseTuple(args, "OOOOOii", &my_obj, &dy_obj, &smy_obj, 
 78 |                          &psmy_obj, &times_x_obj, &i, &h)) {
 79 |         return NULL;
 80 |     }
 81 |     
 82 |     // Convert input arrays with error checking
 83 |     PyArrayObject* arrays[] = {
 84 |         (PyArrayObject*)PyArray_FROM_OTF(my_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY),
 85 |         (PyArrayObject*)PyArray_FROM_OTF(dy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY),
 86 |         (PyArrayObject*)PyArray_FROM_OTF(smy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY),
 87 |         (PyArrayObject*)PyArray_FROM_OTF(psmy_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY),
 88 |         (PyArrayObject*)PyArray_FROM_OTF(times_x_obj, NPY_LONG, NPY_ARRAY_IN_ARRAY)
 89 |     };
 90 |     
 91 |     // Check for conversion errors
 92 |     for (int j = 0; j < 5; j++) {
 93 |         if (!arrays[j]) {
 94 |             for (int k = 0; k < j; k++) {
 95 |                 Py_DECREF(arrays[k]);
 96 |             }
 97 |             return NULL;
 98 |         }
 99 |     }
100 |     
101 |     // Get array dimensions once
102 |     const npy_intp n_times = PyArray_SIZE(arrays[4]);
103 |     const npy_intp dy_cols = PyArray_SHAPE(arrays[1])[1];
104 |     const npy_intp sig_cols = PyArray_SHAPE(arrays[2])[1];
105 |     const npy_intp pat_cols = PyArray_SHAPE(arrays[3])[1];
106 |     const npy_intp coord_cols = PyArray_SHAPE(arrays[0])[1];
107 |     
108 |     // Pre-allocate all output arrays
109 |     npy_intp dims[] = {n_times};
110 |     PyObject* projected_times = PyArray_SimpleNew(1, dims, NPY_LONG);
111 |     PyObject* distances = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
112 |     
113 |     npy_intp sig_dims[] = {n_times, sig_cols};
114 |     npy_intp pat_dims[] = {n_times, pat_cols};
115 |     npy_intp coord_dims[] = {n_times, coord_cols};
116 |     
117 |     PyObject* signatures = PyArray_SimpleNew(2, sig_dims, NPY_DOUBLE);
118 |     PyObject* patterns = PyArray_SimpleNew(2, pat_dims, NPY_DOUBLE);
119 |     PyObject* coordinates = PyArray_SimpleNew(2, coord_dims, NPY_DOUBLE);
120 |     
121 |     // Check memory allocation
122 |     if (!projected_times || !distances || !signatures || !patterns || !coordinates) {
123 |         for (auto arr : arrays) Py_DECREF(arr);
124 |         Py_XDECREF(projected_times);
125 |         Py_XDECREF(distances);
126 |         Py_XDECREF(signatures);
127 |         Py_XDECREF(patterns);
128 |         Py_XDECREF(coordinates);
129 |         return NULL;
130 |     }
131 |     
132 |     // Get data pointers
133 |     long* times_data = (long*)PyArray_DATA(arrays[4]);
134 |     double* dy_data = (double*)PyArray_DATA(arrays[1]);
135 |     double* smy_data = (double*)PyArray_DATA(arrays[2]);
136 |     double* psmy_data = (double*)PyArray_DATA(arrays[3]);
137 |     double* my_data = (double*)PyArray_DATA(arrays[0]);
138 |     
139 |     long* proj_times_data = (long*)PyArray_DATA((PyArrayObject*)projected_times);
140 |     double* dist_data = (double*)PyArray_DATA((PyArrayObject*)distances);
141 |     double* sig_data = (double*)PyArray_DATA((PyArrayObject*)signatures);
142 |     double* pat_data = (double*)PyArray_DATA((PyArrayObject*)patterns);
143 |     double* coord_data = (double*)PyArray_DATA((PyArrayObject*)coordinates);
144 |     
145 |     // Calculate projected times and distances using SIMD
146 |     #pragma omp parallel for simd schedule(static)
147 |     for(npy_intp j = 0; j < n_times; j++) {
148 |         const long proj_time = times_data[j] + h;
149 |         proj_times_data[j] = proj_time;
150 |         dist_data[j] = dy_data[i * dy_cols + proj_time];
151 |     }
152 |     
153 |     // Calculate weights
154 |     PyObject* weights = weights_relative_to_distance(distances);
155 |     if (!weights) {
156 |         for (auto arr : arrays) Py_DECREF(arr);
157 |         Py_DECREF(projected_times);
158 |         Py_DECREF(distances);
159 |         Py_DECREF(signatures);
160 |         Py_DECREF(patterns);
161 |         Py_DECREF(coordinates);
162 |         return NULL;
163 |     }
164 |     
165 |     // Copy data using parallel processing where beneficial
166 |     #pragma omp parallel for collapse(2) schedule(static)
167 |     for(npy_intp j = 0; j < n_times; j++) {
168 |         for(npy_intp k = 0; k < sig_cols; k++) {
169 |             const long proj_time = proj_times_data[j];
170 |             sig_data[j * sig_cols + k] = smy_data[proj_time * sig_cols + k];
171 |         }
172 |     }
173 |     
174 |     #pragma omp parallel for collapse(2) schedule(static)
175 |     for(npy_intp j = 0; j < n_times; j++) {
176 |         for(npy_intp k = 0; k < pat_cols; k++) {
177 |             const long proj_time = proj_times_data[j];
178 |             pat_data[j * pat_cols + k] = psmy_data[proj_time * pat_cols + k];
179 |         }
180 |     }
181 |     
182 |     #pragma omp parallel for collapse(2) schedule(static)
183 |     for(npy_intp j = 0; j < n_times; j++) {
184 |         for(npy_intp k = 0; k < coord_cols; k++) {
185 |             const long proj_time = proj_times_data[j];
186 |             coord_data[j * coord_cols + k] = my_data[proj_time * coord_cols + k];
187 |         }
188 |     }
189 |     
190 |     // Build return dictionary
191 |     PyObject* return_dict = PyDict_New();
192 |     if (!return_dict) {
193 |         for (auto arr : arrays) Py_DECREF(arr);
194 |         Py_DECREF(projected_times);
195 |         Py_DECREF(distances);
196 |         Py_DECREF(weights);
197 |         Py_DECREF(signatures);
198 |         Py_DECREF(patterns);
199 |         Py_DECREF(coordinates);
200 |         return NULL;
201 |     }
202 |     
203 |     // Set dictionary items
204 |     const char* keys[] = {"i", "times_projected", "dists", "weights", 
205 |                          "signatures", "patterns", "coordinates"};
206 |     PyObject* values[] = {PyLong_FromLong(i), projected_times, distances, 
207 |                          weights, signatures, patterns, coordinates};
208 |     
209 |     for (int j = 0; j < 7; j++) {
210 |         if (PyDict_SetItemString(return_dict, keys[j], values[j]) < 0) {
211 |             for (auto arr : arrays) Py_DECREF(arr);
212 |             for (auto val : values) Py_DECREF(val);
213 |             Py_DECREF(return_dict);
214 |             return NULL;
215 |         }
216 |         Py_DECREF(values[j]);
217 |     }
218 |     
219 |     // Cleanup input arrays
220 |     for (auto arr : arrays) {
221 |         Py_DECREF(arr);
222 |     }
223 |     
224 |     return return_dict;
225 | }
226 | 
227 | static PyMethodDef ProjectedNNsMethods[] = {
228 |     {"projectedNNs", projectedNNs, METH_VARARGS,
229 |      "Get information about projected nearest neighbors"},
230 |     {NULL, NULL, 0, NULL}
231 | };
232 | 
233 | static struct PyModuleDef projectednnsmodule = {
234 |     PyModuleDef_HEAD_INIT,
235 |     "projectedNNs", 
236 |     "Projected nearest neighbors calculation module",
237 |     -1,
238 |     ProjectedNNsMethods
239 | };
240 | 
241 | PyMODINIT_FUNC PyInit_projectedNNs(void) {
242 |     import_array();
243 |     return PyModule_Create(&projectednnsmodule);
244 | }
245 | 


--------------------------------------------------------------------------------
/pattern_causality/cpp/fillPCMatrix.cpp:
--------------------------------------------------------------------------------
  1 | #define PY_SSIZE_T_CLEAN
  2 | #include <Python.h>
  3 | #include <numpy/arrayobject.h>
  4 | #include <cmath>
  5 | #include <limits>
  6 | 
  7 | // Include SIMD headers based on architecture
  8 | #ifdef __ARM_NEON
  9 | #include <arm_neon.h>
 10 | #elif defined(__x86_64__) || defined(_M_X64)
 11 | #include <immintrin.h>
 12 | #endif
 13 | 
 14 | // Optimized norm calculation using available SIMD instructions
 15 | static double norm_vec(PyObject* x) {
 16 |     PyArrayObject* arr = (PyArrayObject*)PyArray_FROM_OTF(x, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED);
 17 |     if (!arr) {
 18 |         return 0.0;
 19 |     }
 20 |     
 21 |     double sum = 0.0;
 22 |     double* data = (double*)PyArray_DATA(arr);
 23 |     npy_intp size = PyArray_SIZE(arr);
 24 |     
 25 |     #ifdef __ARM_NEON
 26 |     // ARM NEON implementation (processes 2 doubles at a time)
 27 |     float64x2_t sum_vec = vdupq_n_f64(0.0);
 28 |     npy_intp i;
 29 |     
 30 |     for(i = 0; i <= size - 2; i += 2) {
 31 |         float64x2_t v = vld1q_f64(data + i);
 32 |         sum_vec = vfmaq_f64(sum_vec, v, v);
 33 |     }
 34 |     
 35 |     sum = vgetq_lane_f64(sum_vec, 0) + vgetq_lane_f64(sum_vec, 1);
 36 |     
 37 |     for(; i < size; i++) {
 38 |         sum += data[i] * data[i];
 39 |     }
 40 |     #elif defined(__AVX__)
 41 |     // x86 AVX implementation (processes 4 doubles at a time)
 42 |     __m256d sum_vec = _mm256_setzero_pd();
 43 |     npy_intp i;
 44 |     
 45 |     for(i = 0; i <= size - 4; i += 4) {
 46 |         __m256d v = _mm256_load_pd(data + i);
 47 |         sum_vec = _mm256_add_pd(sum_vec, _mm256_mul_pd(v, v));
 48 |     }
 49 |     
 50 |     // Horizontal sum
 51 |     __m128d sum128 = _mm_add_pd(_mm256_extractf128_pd(sum_vec, 0),
 52 |                                 _mm256_extractf128_pd(sum_vec, 1));
 53 |     sum = _mm_cvtsd_f64(sum128) + _mm_cvtsd_f64(_mm_unpackhi_pd(sum128, sum128));
 54 |     
 55 |     for(; i < size; i++) {
 56 |         sum += data[i] * data[i];
 57 |     }
 58 |     #else
 59 |     // Fallback to scalar operations with OpenMP SIMD
 60 |     #pragma omp simd reduction(+:sum)
 61 |     for(npy_intp i = 0; i < size; i++) {
 62 |         sum += data[i] * data[i];
 63 |     }
 64 |     #endif
 65 |     
 66 |     Py_DECREF(arr);
 67 |     return sqrt(sum);
 68 | }
 69 | 
 70 | static PyObject* fillPCMatrix(PyObject* self, PyObject* args, PyObject* kwargs) {
 71 |     PyObject *predictedPatternY_obj, *realPatternY_obj, *predictedSignatureY_obj;
 72 |     PyObject *realSignatureY_obj, *patternX_obj, *signatureX_obj;
 73 |     PyObject* weighted_obj;
 74 | 
 75 |     static const char* const kwlist[] = {
 76 |         "weighted", "predictedPatternY", "realPatternY",
 77 |         "predictedSignatureY", "realSignatureY",
 78 |         "patternX", "signatureX", NULL
 79 |     };
 80 | 
 81 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOOOOOO", const_cast<char**>(kwlist),
 82 |                                     &weighted_obj,
 83 |                                     &predictedPatternY_obj, &realPatternY_obj,
 84 |                                     &predictedSignatureY_obj, &realSignatureY_obj,
 85 |                                     &patternX_obj, &signatureX_obj)) {
 86 |         return NULL;
 87 |     }
 88 | 
 89 |     const bool weighted = PyObject_IsTrue(weighted_obj);
 90 | 
 91 |     // Convert inputs to numpy arrays with optimization flags
 92 |     const int requirements = NPY_ARRAY_IN_ARRAY | NPY_ARRAY_ALIGNED;
 93 |     PyArrayObject* pred_pattern_arr = (PyArrayObject*)PyArray_FROM_OTF(predictedPatternY_obj, NPY_DOUBLE, requirements);
 94 |     PyArrayObject* real_pattern_arr = (PyArrayObject*)PyArray_FROM_OTF(realPatternY_obj, NPY_DOUBLE, requirements);
 95 |     PyArrayObject* pattern_x_arr = (PyArrayObject*)PyArray_FROM_OTF(patternX_obj, NPY_DOUBLE, requirements);
 96 |     
 97 |     if (!pred_pattern_arr || !real_pattern_arr || !pattern_x_arr) {
 98 |         Py_XDECREF(pred_pattern_arr);
 99 |         Py_XDECREF(real_pattern_arr);
100 |         Py_XDECREF(pattern_x_arr);
101 |         PyErr_SetString(PyExc_TypeError, "Could not convert input to numpy array");
102 |         return NULL;
103 |     }
104 | 
105 |     // Get array data and sizes
106 |     const double* const pred_pattern = (const double*)PyArray_DATA(pred_pattern_arr);
107 |     const double* const real_pattern = (const double*)PyArray_DATA(real_pattern_arr);
108 |     const double* const pattern_x = (const double*)PyArray_DATA(pattern_x_arr);
109 |     
110 |     const npy_intp size_pred = PyArray_SIZE(pred_pattern_arr);
111 |     const npy_intp size_real = PyArray_SIZE(real_pattern_arr);
112 |     const npy_intp size_x = PyArray_SIZE(pattern_x_arr);
113 | 
114 |     // Quick size checks first
115 |     if (size_pred == 0 || size_x == 0) {
116 |         Py_DECREF(pred_pattern_arr);
117 |         Py_DECREF(real_pattern_arr);
118 |         Py_DECREF(pattern_x_arr);
119 |         PyErr_SetString(PyExc_ValueError, size_pred == 0 ? 
120 |             "The length of the predicted pattern of Y is ZERO" :
121 |             "The length of the causal pattern of X is ZERO");
122 |         return NULL;
123 |     }
124 | 
125 |     // Check for NaN values using available SIMD instructions
126 |     bool has_nan = false;
127 |     #ifdef __ARM_NEON
128 |     npy_intp i;
129 |     for(i = 0; i <= size_pred - 2 && !has_nan; i += 2) {
130 |         float64x2_t v = vld1q_f64(pred_pattern + i);
131 |         uint64x2_t cmp = vceqq_f64(v, v);
132 |         if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) {
133 |             has_nan = true;
134 |             break;
135 |         }
136 |     }
137 |     #elif defined(__AVX__)
138 |     npy_intp i;
139 |     for(i = 0; i <= size_pred - 4 && !has_nan; i += 4) {
140 |         __m256d v = _mm256_load_pd(pred_pattern + i);
141 |         if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) {
142 |             has_nan = true;
143 |             break;
144 |         }
145 |     }
146 |     #else
147 |     npy_intp i = 0;
148 |     #endif
149 | 
150 |     // Handle remaining elements and non-SIMD case
151 |     for(; i < size_pred && !has_nan; i++) {
152 |         if(std::isnan(pred_pattern[i])) {
153 |             has_nan = true;
154 |             break;
155 |         }
156 |     }
157 | 
158 |     if (!has_nan) {
159 |         #ifdef __ARM_NEON
160 |         for(i = 0; i <= size_real - 2 && !has_nan; i += 2) {
161 |             float64x2_t v = vld1q_f64(real_pattern + i);
162 |             uint64x2_t cmp = vceqq_f64(v, v);
163 |             if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) {
164 |                 has_nan = true;
165 |                 break;
166 |             }
167 |         }
168 |         #elif defined(__AVX__)
169 |         for(i = 0; i <= size_real - 4 && !has_nan; i += 4) {
170 |             __m256d v = _mm256_load_pd(real_pattern + i);
171 |             if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) {
172 |                 has_nan = true;
173 |                 break;
174 |             }
175 |         }
176 |         #else
177 |         i = 0;
178 |         #endif
179 | 
180 |         for(; i < size_real && !has_nan; i++) {
181 |             if(std::isnan(real_pattern[i])) {
182 |                 has_nan = true;
183 |                 break;
184 |             }
185 |         }
186 |     }
187 | 
188 |     if (!has_nan) {
189 |         #ifdef __ARM_NEON
190 |         for(i = 0; i <= size_x - 2 && !has_nan; i += 2) {
191 |             float64x2_t v = vld1q_f64(pattern_x + i);
192 |             uint64x2_t cmp = vceqq_f64(v, v);
193 |             if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) {
194 |                 has_nan = true;
195 |                 break;
196 |             }
197 |         }
198 |         #elif defined(__AVX__)
199 |         for(i = 0; i <= size_x - 4 && !has_nan; i += 4) {
200 |             __m256d v = _mm256_load_pd(pattern_x + i);
201 |             if (_mm256_movemask_pd(_mm256_cmp_pd(v, v, _CMP_UNORD_Q))) {
202 |                 has_nan = true;
203 |                 break;
204 |             }
205 |         }
206 |         #else
207 |         i = 0;
208 |         #endif
209 | 
210 |         for(; i < size_x && !has_nan; i++) {
211 |             if(std::isnan(pattern_x[i])) {
212 |                 has_nan = true;
213 |                 break;
214 |             }
215 |         }
216 |     }
217 | 
218 |     if (has_nan) {
219 |         Py_DECREF(pred_pattern_arr);
220 |         Py_DECREF(real_pattern_arr);
221 |         Py_DECREF(pattern_x_arr);
222 |         return Py_BuildValue("{s:O,s:O}", "real", Py_None, "predicted", Py_None);
223 |     }
224 | 
225 |     // Check if patterns are equal using available SIMD
226 |     bool patterns_equal = (size_pred == size_real);
227 |     if (patterns_equal) {
228 |         #ifdef __ARM_NEON
229 |         for(i = 0; i <= size_pred - 2 && patterns_equal; i += 2) {
230 |             float64x2_t v1 = vld1q_f64(pred_pattern + i);
231 |             float64x2_t v2 = vld1q_f64(real_pattern + i);
232 |             uint64x2_t cmp = vceqq_f64(v1, v2);
233 |             if (vgetq_lane_u64(cmp, 0) == 0 || vgetq_lane_u64(cmp, 1) == 0) {
234 |                 patterns_equal = false;
235 |                 break;
236 |             }
237 |         }
238 |         #elif defined(__AVX__)
239 |         for(i = 0; i <= size_pred - 4 && patterns_equal; i += 4) {
240 |             __m256d v1 = _mm256_load_pd(pred_pattern + i);
241 |             __m256d v2 = _mm256_load_pd(real_pattern + i);
242 |             if (_mm256_movemask_pd(_mm256_cmp_pd(v1, v2, _CMP_NEQ_OQ))) {
243 |                 patterns_equal = false;
244 |                 break;
245 |             }
246 |         }
247 |         #else
248 |         i = 0;
249 |         #endif
250 | 
251 |         for(; i < size_pred && patterns_equal; i++) {
252 |             if(pred_pattern[i] != real_pattern[i]) {
253 |                 patterns_equal = false;
254 |                 break;
255 |             }
256 |         }
257 |     }
258 | 
259 |     double predictedCausalityStrength, realCausalityStrength;
260 | 
261 |     if(patterns_equal) {
262 |         if(weighted) {
263 |             // Pre-calculate norms
264 |             const double pred_norm = norm_vec(predictedSignatureY_obj);
265 |             const double real_norm = norm_vec(realSignatureY_obj);
266 |             const double sig_x_norm = norm_vec(signatureX_obj);
267 |             
268 |             if(sig_x_norm > std::numeric_limits<double>::epsilon()) {
269 |                 const double pred_ratio = pred_norm / sig_x_norm;
270 |                 const double real_ratio = real_norm / sig_x_norm;
271 |                 predictedCausalityStrength = std::erf(pred_ratio);
272 |                 realCausalityStrength = std::erf(real_ratio);
273 |             } else {
274 |                 predictedCausalityStrength = realCausalityStrength = 1.0;
275 |             }
276 |         } else {
277 |             predictedCausalityStrength = realCausalityStrength = 1.0;
278 |         }
279 |     } else {
280 |         predictedCausalityStrength = realCausalityStrength = 0.0;
281 |     }
282 | 
283 |     // Clean up
284 |     Py_DECREF(pred_pattern_arr);
285 |     Py_DECREF(real_pattern_arr);
286 |     Py_DECREF(pattern_x_arr);
287 | 
288 |     // Return results
289 |     return Py_BuildValue("{s:d,s:d}", 
290 |                         "real", realCausalityStrength, 
291 |                         "predicted", predictedCausalityStrength);
292 | }
293 | 
294 | static PyMethodDef FillPCMatrixMethods[] = {
295 |     {"fillPCMatrix", (PyCFunction)fillPCMatrix, METH_VARARGS | METH_KEYWORDS,
296 |      "Fill pattern causality matrix with causality strengths"},
297 |     {NULL, NULL, 0, NULL}
298 | };
299 | 
300 | static struct PyModuleDef fillpcmatrixmodule = {
301 |     PyModuleDef_HEAD_INIT,
302 |     "fillPCMatrix",
303 |     "Fill pattern causality matrix module",
304 |     -1,
305 |     FillPCMatrixMethods
306 | };
307 | 
308 | PyMODINIT_FUNC PyInit_fillPCMatrix(void) {
309 |     import_array();
310 |     return PyModule_Create(&fillpcmatrixmodule);
311 | }


--------------------------------------------------------------------------------
/pattern_causality/data/Climate_Indices.csv:
--------------------------------------------------------------------------------
  1 | Date,AO,AAO,NAO,PNA
  2 | 1979-01-01,-2.2328,0.2088,-1.38,-0.69
  3 | 1979-02-01,-0.6967,0.3563,-0.67,-1.82
  4 | 1979-03-01,-0.8141,0.8992,0.78,0.38
  5 | 1979-04-01,-1.1568,0.6776,-1.71,0.09
  6 | 1979-05-01,-0.2501,0.7237,-1.03,1.35
  7 | 1979-06-01,0.9332,1.7,1.6,-1.64
  8 | 1979-07-01,0.0385,2.4121,0.83,0.99
  9 | 1979-08-01,-0.6841,0.5455,0.96,0.7
 10 | 1979-09-01,-0.0459,0.6295,1.01,1.28
 11 | 1979-10-01,-1.2434,0.1598,-0.3,1.53
 12 | 1979-11-01,0.4751,-0.4225,0.53,0.54
 13 | 1979-12-01,1.2948,-0.9507,1.0,-0.38
 14 | 1980-01-01,-2.0657,-0.447,-0.75,-0.28
 15 | 1980-02-01,-0.9337,-0.9797,0.05,1.74
 16 | 1980-03-01,-1.4333,-1.4244,-0.31,-0.35
 17 | 1980-04-01,-0.4191,-2.0682,1.29,1.96
 18 | 1980-05-01,-1.1548,-0.4787,-1.5,-0.28
 19 | 1980-06-01,0.7215,0.2857,-0.37,-0.86
 20 | 1980-07-01,-0.6222,-1.9439,-0.42,-0.42
 21 | 1980-08-01,-0.1852,-0.9968,-2.24,-1.99
 22 | 1980-09-01,0.3126,-1.7008,0.66,-0.05
 23 | 1980-10-01,-0.5212,0.5774,-1.77,2.45
 24 | 1980-11-01,-1.361,-2.0129,-0.37,1.35
 25 | 1980-12-01,-0.0573,-0.3563,0.78,-0.27
 26 | 1981-01-01,-0.1163,0.2305,0.37,2.42
 27 | 1981-02-01,-0.3316,0.0393,0.92,0.38
 28 | 1981-03-01,-1.6447,-0.9655,-1.19,1.6
 29 | 1981-04-01,0.4304,-1.462,0.36,-1.02
 30 | 1981-05-01,0.1796,-0.344,0.2,1.98
 31 | 1981-06-01,-0.4379,0.3517,-0.45,-0.2
 32 | 1981-07-01,0.5605,-0.9859,0.05,-0.64
 33 | 1981-08-01,-0.2441,-2.1183,0.39,-1.5
 34 | 1981-09-01,-1.0401,-1.5094,-1.45,0.15
 35 | 1981-10-01,-1.1675,-0.2603,-1.35,-1.45
 36 | 1981-11-01,-0.1877,0.6256,-0.38,1.26
 37 | 1981-12-01,-1.2157,1.1164,-0.02,-0.12
 38 | 1982-01-01,-0.8834,-0.5544,-0.89,-0.86
 39 | 1982-02-01,0.9739,0.2772,1.15,-0.68
 40 | 1982-03-01,1.0741,1.6035,1.15,-1.4
 41 | 1982-04-01,1.4538,1.5314,0.1,-1.89
 42 | 1982-05-01,-0.2087,0.1179,-0.53,-0.7
 43 | 1982-06-01,-1.1801,0.9201,-1.63,1.93
 44 | 1982-07-01,0.0048,-0.4147,1.15,0.97
 45 | 1982-08-01,0.3622,0.7793,0.26,0.34
 46 | 1982-09-01,0.5577,1.58,1.76,1.05
 47 | 1982-10-01,-0.211,-0.7023,-0.74,-0.93
 48 | 1982-11-01,0.6609,-0.8492,1.6,-0.48
 49 | 1982-12-01,0.9672,-1.9337,1.78,0.75
 50 | 1983-01-01,1.3591,-1.3404,1.59,1.18
 51 | 1983-02-01,-1.8059,-1.0807,-0.53,1.3
 52 | 1983-03-01,-0.5671,0.1663,0.95,2.38
 53 | 1983-04-01,-0.7378,0.1494,-0.85,1.26
 54 | 1983-05-01,-0.4409,-0.4372,-0.07,-0.19
 55 | 1983-06-01,0.3125,-0.2628,0.99,1.87
 56 | 1983-07-01,0.1305,1.1141,1.19,1.33
 57 | 1983-08-01,1.0978,0.792,1.61,0.53
 58 | 1983-09-01,0.1669,-0.696,-1.12,-1.59
 59 | 1983-10-01,1.3689,1.1935,0.65,0.09
 60 | 1983-11-01,-0.6879,0.7274,-0.98,1.84
 61 | 1983-12-01,0.1862,0.4755,0.29,-0.31
 62 | 1984-01-01,0.905,-1.0975,1.66,0.97
 63 | 1984-02-01,-0.3027,-0.5437,0.72,0.77
 64 | 1984-03-01,-2.386,0.2509,-0.37,1.41
 65 | 1984-04-01,-0.2836,-0.2042,-0.28,1.7
 66 | 1984-05-01,0.4792,-1.2374,0.54,0.32
 67 | 1984-06-01,0.0073,0.4261,-0.42,-0.44
 68 | 1984-07-01,0.0189,0.8896,-0.07,-2.34
 69 | 1984-08-01,0.4657,-0.5484,1.15,-1.11
 70 | 1984-09-01,-0.4128,0.327,0.17,0.03
 71 | 1984-10-01,-0.2703,-0.0094,-0.07,-0.63
 72 | 1984-11-01,-0.9659,-0.0241,-0.06,0.42
 73 | 1984-12-01,0.446,-1.4756,0.0,-1.6
 74 | 1985-01-01,-2.8057,-0.7948,-1.61,1.63
 75 | 1985-02-01,-1.4398,0.2155,-0.49,-0.52
 76 | 1985-03-01,0.5514,-0.1336,0.2,-0.92
 77 | 1985-04-01,0.6524,0.0315,0.32,-1.06
 78 | 1985-05-01,-0.4322,-0.0661,-0.49,-1.03
 79 | 1985-06-01,-0.3466,-0.3307,-0.8,1.1
 80 | 1985-07-01,-0.3896,1.9137,1.22,0.25
 81 | 1985-08-01,-0.0014,0.5948,-0.48,-0.5
 82 | 1985-09-01,0.1144,1.5073,-0.52,-0.71
 83 | 1985-10-01,1.0351,0.4708,0.9,-1.51
 84 | 1985-11-01,-1.2175,1.0847,-0.67,-1.9
 85 | 1985-12-01,-1.9476,1.2403,0.22,1.39
 86 | 1986-01-01,-0.5676,0.1578,1.11,0.97
 87 | 1986-02-01,-2.9041,-1.588,-1.0,0.53
 88 | 1986-03-01,1.9308,-0.7696,1.71,0.83
 89 | 1986-04-01,0.103,-0.0867,-0.59,0.09
 90 | 1986-05-01,0.3669,-1.8466,0.85,-0.13
 91 | 1986-06-01,0.5346,-0.6194,1.22,0.26
 92 | 1986-07-01,-0.0083,0.0892,0.12,0.07
 93 | 1986-08-01,-0.8263,-0.157,-1.09,-1.45
 94 | 1986-09-01,-0.0234,0.8487,-1.12,-0.29
 95 | 1986-10-01,1.4246,0.3057,1.55,0.92
 96 | 1986-11-01,0.9257,-0.2225,2.29,-0.73
 97 | 1986-12-01,0.0598,0.8863,0.99,1.37
 98 | 1987-01-01,-1.1476,-0.9504,-1.15,1.0
 99 | 1987-02-01,-1.4732,-0.7077,-0.73,0.65
100 | 1987-03-01,-1.7465,-0.1327,0.14,1.17
101 | 1987-04-01,0.387,-0.2856,2.0,1.83
102 | 1987-05-01,0.3252,0.0386,0.98,-1.26
103 | 1987-06-01,-0.7103,-0.7019,-1.82,0.42
104 | 1987-07-01,-0.4663,-1.5313,0.52,-0.01
105 | 1987-08-01,-0.8357,1.4852,-0.83,0.77
106 | 1987-09-01,0.2865,-0.7989,-1.22,-3.07
107 | 1987-10-01,-0.08,0.4555,0.14,0.53
108 | 1987-11-01,-0.5358,1.0604,0.18,1.26
109 | 1987-12-01,-0.5339,0.2723,0.32,0.8
110 | 1988-01-01,0.2647,-0.6117,1.02,0.53
111 | 1988-02-01,-1.0662,0.5508,0.76,1.25
112 | 1988-03-01,-0.1971,-0.219,-0.17,0.69
113 | 1988-04-01,-0.5607,-0.0768,-1.17,1.4
114 | 1988-05-01,-0.8461,-0.7486,0.63,0.6
115 | 1988-06-01,0.0605,-1.0549,0.88,1.13
116 | 1988-07-01,-0.1434,0.576,-0.35,2.16
117 | 1988-08-01,0.2546,-0.7449,0.04,-0.59
118 | 1988-09-01,1.0393,-0.6885,-0.99,-1.11
119 | 1988-10-01,0.0324,-2.314,-1.08,0.66
120 | 1988-11-01,-0.0347,0.401,-0.34,0.13
121 | 1988-12-01,1.6788,1.0745,0.61,0.63
122 | 1989-01-01,3.106,0.6184,1.17,-0.72
123 | 1989-02-01,3.2793,0.8489,2.0,-1.06
124 | 1989-03-01,1.5303,0.6321,1.85,-1.3
125 | 1989-04-01,-0.2502,-0.5731,0.28,-0.54
126 | 1989-05-01,0.8888,2.6906,1.38,-0.14
127 | 1989-06-01,0.345,1.9948,-0.27,-0.63
128 | 1989-07-01,0.8656,1.4576,0.97,-0.18
129 | 1989-08-01,0.5509,-0.1319,0.01,-0.24
130 | 1989-09-01,0.7031,-0.1212,2.05,0.54
131 | 1989-10-01,0.9907,0.1358,-0.03,-1.13
132 | 1989-11-01,0.0338,0.572,0.16,-0.72
133 | 1989-12-01,-0.6437,-0.445,-1.15,0.87
134 | 1990-01-01,1.0007,-0.3521,1.04,-0.34
135 | 1990-02-01,3.4016,1.1507,1.41,-1.86
136 | 1990-03-01,2.99,0.4142,1.46,0.34
137 | 1990-04-01,1.8788,-1.8786,2.0,-0.39
138 | 1990-05-01,0.9428,-1.8034,-1.53,0.21
139 | 1990-06-01,0.3043,0.0931,-0.02,-1.41
140 | 1990-07-01,-0.2958,-1.2151,0.53,0.17
141 | 1990-08-01,-0.1802,0.4657,0.97,1.79
142 | 1990-09-01,-0.2104,1.4816,1.06,-0.5
143 | 1990-10-01,0.6603,0.139,0.23,-0.97
144 | 1990-11-01,0.5206,-0.359,-0.24,-1.73
145 | 1990-12-01,1.2767,-0.3117,0.22,-1.32
146 | 1991-01-01,0.7232,0.8689,0.86,0.66
147 | 1991-02-01,-0.876,-0.8517,1.04,1.07
148 | 1991-03-01,-0.5268,0.5223,-0.2,-0.94
149 | 1991-04-01,0.5302,-0.6394,0.29,0.64
150 | 1991-05-01,0.4865,-0.5386,0.08,-0.81
151 | 1991-06-01,-0.1154,-1.1546,-0.82,-0.97
152 | 1991-07-01,-0.188,-1.2202,-0.49,0.75
153 | 1991-08-01,0.7969,0.0355,1.23,-0.25
154 | 1991-09-01,-0.1122,-0.513,0.48,1.43
155 | 1991-10-01,-0.2519,-0.6232,-0.19,-2.28
156 | 1991-11-01,0.2847,-0.8042,0.48,0.24
157 | 1991-12-01,1.6132,-2.0675,0.46,0.47
158 | 1992-01-01,0.55,0.0726,-0.13,1.28
159 | 1992-02-01,1.1217,-1.6268,1.07,0.29
160 | 1992-03-01,0.9842,-1.0103,0.87,0.77
161 | 1992-04-01,-0.5205,-0.4393,1.86,-0.05
162 | 1992-05-01,1.3414,-2.032,2.63,1.27
163 | 1992-06-01,-0.302,-2.1933,0.2,1.17
164 | 1992-07-01,0.1911,-0.5662,0.16,0.85
165 | 1992-08-01,0.5353,-0.3495,0.85,-0.34
166 | 1992-09-01,-0.6403,0.435,-0.44,-1.31
167 | 1992-10-01,-0.3659,-0.3194,-1.76,0.21
168 | 1992-11-01,0.717,0.1218,1.19,0.85
169 | 1992-12-01,1.6267,0.2436,0.47,-1.23
170 | 1993-01-01,3.4953,-2.0206,1.6,-0.65
171 | 1993-02-01,0.1845,0.437,0.5,0.55
172 | 1993-03-01,0.7643,-0.3776,0.67,1.27
173 | 1993-04-01,-0.4354,0.0872,0.97,1.54
174 | 1993-05-01,-1.6075,1.2599,-0.78,2.66
175 | 1993-06-01,-0.5195,1.2179,-0.59,0.52
176 | 1993-07-01,-0.5107,1.9571,-3.18,0.15
177 | 1993-08-01,-0.393,1.0829,0.12,-0.14
178 | 1993-09-01,-0.3606,1.061,-0.57,-1.12
179 | 1993-10-01,-0.565,0.7481,-0.71,-0.3
180 | 1993-11-01,1.0018,0.3237,2.56,-0.36
181 | 1993-12-01,-0.1041,1.0281,1.56,0.72
182 | 1994-01-01,-0.2879,0.7227,1.04,-0.12
183 | 1994-02-01,-0.8615,1.157,0.46,-0.73
184 | 1994-03-01,1.881,0.6933,1.26,0.54
185 | 1994-04-01,0.2247,-0.0525,1.14,-0.47
186 | 1994-05-01,-0.1154,-0.1527,-0.57,0.45
187 | 1994-06-01,1.6063,-1.6819,1.52,-1.38
188 | 1994-07-01,0.3507,-0.4922,1.31,0.38
189 | 1994-08-01,0.8275,1.9099,0.38,-1.34
190 | 1994-09-01,-0.0841,-0.947,-1.32,-2.38
191 | 1994-10-01,0.174,-0.5778,-0.97,-0.23
192 | 1994-11-01,1.7794,-0.7926,0.64,-1.67
193 | 1994-12-01,0.8938,0.9327,2.02,0.69
194 | 1995-01-01,-0.1538,1.4485,0.93,0.66
195 | 1995-02-01,1.4289,0.5329,1.14,0.73
196 | 1995-03-01,0.3932,-0.1544,1.25,0.33
197 | 1995-04-01,-0.9631,0.6488,-0.85,0.29
198 | 1995-05-01,-0.8912,1.3967,-1.49,0.01
199 | 1995-06-01,-0.1118,-0.802,0.13,0.74
200 | 1995-07-01,-0.2171,-3.0097,-0.22,-0.34
201 | 1995-08-01,0.5436,-0.6965,0.69,-0.3
202 | 1995-09-01,-0.549,1.1733,0.31,1.52
203 | 1995-10-01,0.075,-0.057,0.19,-0.05
204 | 1995-11-01,-0.7233,0.1429,-1.38,-0.75
205 | 1995-12-01,-2.1271,1.4697,-1.67,0.92
206 | 1996-01-01,-1.2004,0.3321,-0.12,-0.02
207 | 1996-02-01,0.1632,-0.525,-0.07,-0.3
208 | 1996-03-01,-1.4832,0.5435,-0.24,-0.47
209 | 1996-04-01,-1.5251,0.115,-0.17,0.77
210 | 1996-05-01,-0.2264,0.9832,-1.06,0.32
211 | 1996-06-01,0.4967,-0.252,0.56,-1.21
212 | 1996-07-01,0.7146,0.0209,0.67,0.64
213 | 1996-08-01,0.1247,-1.5019,1.02,-0.9
214 | 1996-09-01,-1.14,-1.3144,-0.86,-0.28
215 | 1996-10-01,0.1825,0.9657,-0.33,-0.76
216 | 1996-11-01,0.1364,-1.6669,-0.56,-0.45
217 | 1996-12-01,-1.7208,-0.0231,-1.41,-1.23
218 | 1997-01-01,-0.4568,0.3689,-0.49,0.63
219 | 1997-02-01,1.8887,-0.2442,1.7,0.56
220 | 1997-03-01,1.0908,0.701,1.46,-1.3
221 | 1997-04-01,0.3236,-0.4576,-1.02,0.55
222 | 1997-05-01,-0.9611,1.0281,-0.28,0.78
223 | 1997-06-01,-0.815,-0.4576,-1.47,-0.34
224 | 1997-07-01,-0.4306,0.7797,0.34,0.56
225 | 1997-08-01,0.1206,0.7684,0.83,-0.52
226 | 1997-09-01,0.1945,0.1222,0.61,0.31
227 | 1997-10-01,-0.6997,-0.5947,-1.7,-0.26
228 | 1997-11-01,-0.6611,-1.9046,-0.9,0.91
229 | 1997-12-01,-0.0711,-0.8355,-0.96,1.16
230 | 1998-01-01,-2.0806,0.4125,0.39,0.74
231 | 1998-02-01,-0.1832,0.3896,-0.11,0.89
232 | 1998-03-01,-0.2544,0.7359,0.87,1.01
233 | 1998-04-01,-0.0379,1.9273,-0.68,1.12
234 | 1998-05-01,0.4286,-0.0381,-1.32,-2.22
235 | 1998-06-01,-0.7107,1.031,-2.72,-0.02
236 | 1998-07-01,-0.2117,1.45,-0.48,2.24
237 | 1998-08-01,0.6503,0.9041,-0.02,-0.57
238 | 1998-09-01,-1.0499,-0.1224,-2.0,0.55
239 | 1998-10-01,0.2943,0.3995,-0.29,0.55
240 | 1998-11-01,-1.4494,0.8172,-0.28,0.74
241 | 1998-12-01,1.3534,1.4352,0.87,-0.09
242 | 1999-01-01,0.1103,0.9991,0.77,0.16
243 | 1999-02-01,0.4821,0.4559,0.29,-0.12
244 | 1999-03-01,-1.4916,0.1804,0.23,0.69
245 | 1999-04-01,0.2844,0.9494,-0.95,0.3
246 | 1999-05-01,0.2259,1.639,0.92,-0.15
247 | 1999-06-01,0.707,-1.3249,1.12,0.3
248 | 1999-07-01,-0.002,0.3156,-0.9,-0.54
249 | 1999-08-01,-0.6721,0.0419,0.39,1.97
250 | 1999-09-01,0.0591,-0.0121,0.36,0.44
251 | 1999-10-01,-0.0058,1.6535,0.2,0.41
252 | 1999-11-01,0.6109,0.9006,0.65,0.48
253 | 1999-12-01,1.0431,1.7838,1.61,0.21
254 | 2000-01-01,1.2702,1.2734,0.6,-0.82
255 | 2000-02-01,1.0758,0.6197,1.7,1.12
256 | 2000-03-01,-0.4514,0.1331,0.77,1.28
257 | 2000-04-01,-0.2785,0.2327,-0.03,-0.35
258 | 2000-05-01,0.9691,1.1271,1.58,-0.28
259 | 2000-06-01,0.5861,0.1172,-0.03,-1.22
260 | 2000-07-01,-0.6494,0.0586,-1.03,-2.28
261 | 2000-08-01,0.1439,-0.6735,-0.29,-0.48
262 | 2000-09-01,0.3949,-1.8529,-0.21,-1.4
263 | 2000-10-01,0.3168,0.347,0.92,0.25
264 | 2000-11-01,-1.5815,-1.5371,-0.92,0.74
265 | 2000-12-01,-2.3544,-1.2903,-0.58,1.23
266 | 2001-01-01,-0.9588,-0.4709,0.25,1.51
267 | 2001-02-01,-0.6224,-0.2649,0.45,-0.16
268 | 2001-03-01,-1.6865,-0.5548,-1.26,0.7
269 | 2001-04-01,0.906,0.515,0.0,-0.47
270 | 2001-05-01,0.452,-0.2622,-0.02,-0.11
271 | 2001-06-01,-0.0153,0.3861,-0.2,-0.94
272 | 2001-07-01,-0.031,-0.9283,-0.25,0.06
273 | 2001-08-01,0.5205,0.9103,-0.07,-0.11
274 | 2001-09-01,-0.7066,1.1614,-0.65,0.1
275 | 2001-10-01,0.7075,1.2771,-0.24,-0.22
276 | 2001-11-01,0.8186,0.9958,0.63,1.09
277 | 2001-12-01,-1.3224,1.4736,-0.83,0.56
278 | 2002-01-01,1.3813,0.7469,0.44,-0.04
279 | 2002-02-01,1.3035,1.3341,1.1,0.14
280 | 2002-03-01,0.902,-1.8235,0.69,-1.3
281 | 2002-04-01,0.7484,0.165,1.18,-2.22
282 | 2002-05-01,0.4014,-2.7985,-0.22,-0.73
283 | 2002-06-01,0.5727,-1.112,0.38,-0.05
284 | 2002-07-01,0.3276,-0.5909,0.62,0.88
285 | 2002-08-01,-0.2285,-0.0994,0.38,0.64
286 | 2002-09-01,-0.0427,-0.8645,-0.7,0.77
287 | 2002-10-01,-1.4885,-2.564,-2.28,-0.65
288 | 2002-11-01,-1.4251,-0.9235,-0.18,1.54
289 | 2002-12-01,-1.5921,1.3085,-0.94,1.59
290 | 2003-01-01,-0.4717,-0.9879,0.16,1.29
291 | 2003-02-01,0.1278,-0.3569,0.62,0.73
292 | 2003-03-01,0.933,-0.1877,0.32,-0.07
293 | 2003-04-01,-0.1781,0.2243,-0.18,0.14
294 | 2003-05-01,1.0167,0.3845,0.01,-2.2
295 | 2003-06-01,-0.1021,-0.7745,-0.07,-0.6
296 | 2003-07-01,0.0753,0.727,0.13,1.23
297 | 2003-08-01,-0.2804,0.678,-0.07,-0.28
298 | 2003-09-01,0.4666,-0.3231,0.01,0.76
299 | 2003-10-01,-0.6698,-0.0249,-1.26,0.97
300 | 2003-11-01,0.6424,-0.7117,0.86,-1.72
301 | 2003-12-01,0.2652,-1.3229,0.64,0.86
302 | 2004-01-01,-1.6858,0.8071,-0.29,0.41
303 | 2004-02-01,-1.5285,-1.1819,-0.14,1.06
304 | 2004-03-01,0.3181,0.4317,1.02,0.3
305 | 2004-04-01,-0.4094,0.151,1.15,0.51
306 | 2004-05-01,-0.0943,0.4596,0.19,-1.76
307 | 2004-06-01,-0.2359,1.1954,-0.89,-0.37
308 | 2004-07-01,-0.2005,1.4743,1.13,0.09
309 | 2004-08-01,-0.7202,-0.0712,-0.48,1.55
310 | 2004-09-01,0.855,0.2536,0.38,-0.08
311 | 2004-10-01,-0.5154,-0.0425,-1.1,-1.39
312 | 2004-11-01,0.6783,-0.2422,0.73,0.31
313 | 2004-12-01,1.2301,-0.9729,1.21,0.26
314 | 2005-01-01,0.3562,-0.1287,1.52,0.02
315 | 2005-02-01,-1.2706,1.2435,-0.06,0.14
316 | 2005-03-01,-1.3479,0.1583,-1.83,0.88
317 | 2005-04-01,-0.0462,0.3554,-0.3,1.28
318 | 2005-05-01,-0.7634,-0.2973,-1.25,1.74
319 | 2005-06-01,-0.3832,-1.4277,-0.05,0.29
320 | 2005-07-01,-0.0302,-0.252,-0.51,0.43
321 | 2005-08-01,0.0261,0.2282,0.37,0.72
322 | 2005-09-01,0.8024,0.2407,0.63,1.63
323 | 2005-10-01,0.0298,0.031,-0.98,0.9
324 | 2005-11-01,0.2277,-0.5515,-0.31,-0.76
325 | 2005-12-01,-2.1039,-1.9678,-0.44,1.38
326 | 2006-01-01,-0.1705,0.3389,1.2651,0.4337
327 | 2006-02-01,-0.1558,-0.2113,-0.5106,-0.1119
328 | 2006-03-01,-1.6038,0.5006,-1.2779,-0.2744
329 | 2006-04-01,0.1383,-0.1693,1.2353,0.4461
330 | 2006-05-01,0.1558,1.6954,-1.1444,-1.2712
331 | 2006-06-01,1.0708,0.438,0.8412,-1.0634
332 | 2006-07-01,0.1027,0.9255,0.9024,1.834
333 | 2006-08-01,-0.2652,-1.7271,-1.7264,-1.4073
334 | 2006-09-01,0.6065,-0.3241,-1.6219,0.424
335 | 2006-10-01,-1.0291,0.8792,-2.2439,-0.838
336 | 2006-11-01,0.5213,0.1013,0.4368,-1.3886
337 | 2006-12-01,2.2817,0.6384,1.3366,1.8647
338 | 2007-01-01,2.0338,-0.0828,0.2227,0.7169
339 | 2007-02-01,-1.3069,0.0749,-0.47,-0.0874
340 | 2007-03-01,1.1821,-0.5701,1.4425,0.1808
341 | 2007-04-01,0.5443,-1.0352,0.1694,1.245
342 | 2007-05-01,0.8937,-0.6119,0.6638,-0.0571
343 | 2007-06-01,-0.555,-1.198,-1.3064,-0.3786
344 | 2007-07-01,-0.3965,-2.6307,-0.5794,2.2087
345 | 2007-08-01,-0.0337,-0.1079,-0.139,1.9775
346 | 2007-09-01,0.1789,0.0305,0.7213,1.8826
347 | 2007-10-01,0.3835,-0.4337,0.4465,0.5504
348 | 2007-11-01,-0.5187,-0.9838,0.5756,0.6926
349 | 2007-12-01,0.8211,1.9293,0.3436,0.1395
350 | 2008-01-01,0.819,1.2085,0.8901,-0.3179
351 | 2008-02-01,0.9381,1.1474,0.7346,0.5006
352 | 2008-03-01,0.5856,0.5875,0.0761,-0.3225
353 | 2008-04-01,-0.455,-0.8734,-1.0655,-0.9707
354 | 2008-05-01,-1.2047,-0.4898,-1.728,1.3274
355 | 2008-06-01,-0.0898,1.3484,-1.3895,-1.7529
356 | 2008-07-01,-0.4799,0.3202,-1.2741,-0.0988
357 | 2008-08-01,-0.0805,0.087,-1.1602,0.9209
358 | 2008-09-01,-0.3266,1.3856,1.0169,1.1173
359 | 2008-10-01,1.6758,1.2149,-0.0434,0.8563
360 | 2008-11-01,0.0922,0.9197,-0.3199,1.1104
361 | 2008-12-01,0.6478,1.1938,-0.2765,-1.4097
362 | 2009-01-01,0.7997,0.9626,-0.0075,0.606
363 | 2009-02-01,-0.6723,0.4558,0.0565,-0.9476
364 | 2009-03-01,0.1213,0.6046,0.5723,-1.0245
365 | 2009-04-01,0.9725,0.0292,-0.2044,0.2112
366 | 2009-05-01,1.1937,-0.7328,1.6827,-0.5809
367 | 2009-06-01,-1.3507,-0.47,-1.2074,0.3505
368 | 2009-07-01,-1.3559,-1.2342,-2.1529,1.2175
369 | 2009-08-01,-0.0537,-0.6856,-0.1938,0.6496
370 | 2009-09-01,0.8745,-0.0165,1.5089,1.2891
371 | 2009-10-01,-1.5399,0.0848,-1.0322,0.431
372 | 2009-11-01,0.4589,-1.9151,-0.0248,0.2057
373 | 2009-12-01,-3.4128,0.6068,-1.9257,0.3386
374 | 2010-01-01,-2.5868,-0.7569,-1.1086,1.2536
375 | 2010-02-01,-4.2657,-0.7752,-1.9845,0.5788
376 | 2010-03-01,-0.4321,0.1083,-0.8831,2.0197
377 | 2010-04-01,-0.2745,0.3774,-0.7199,1.5365
378 | 2010-05-01,-0.9186,1.021,-1.487,-0.9322
379 | 2010-06-01,-0.013,2.0707,-0.816,-0.2042
380 | 2010-07-01,0.4353,2.4237,-0.425,1.3955
381 | 2010-08-01,-0.1166,1.5101,-1.2227,1.1019
382 | 2010-09-01,-0.8646,0.4018,-0.7948,1.3359
383 | 2010-10-01,-0.467,1.3349,-0.9283,1.7967
384 | 2010-11-01,-0.3757,1.5163,-1.6158,-0.845
385 | 2010-12-01,-2.631,0.2054,-1.8472,-1.7774
386 | 2011-01-01,-1.6831,0.0524,-0.8768,1.2855
387 | 2011-02-01,1.5754,1.0738,0.7012,-1.5807
388 | 2011-03-01,1.4241,-0.2962,0.6129,0.6887
389 | 2011-04-01,2.2748,-0.87,2.4775,-1.5859
390 | 2011-05-01,-0.0351,1.2657,-0.0623,0.1896
391 | 2011-06-01,-0.8578,-0.099,-1.2795,0.2236
392 | 2011-07-01,-0.4716,-1.3842,-1.5122,-0.355
393 | 2011-08-01,-1.0626,-1.202,-1.3475,1.4217
394 | 2011-09-01,0.6647,-1.25,0.536,-0.4196
395 | 2011-10-01,0.7998,0.3882,0.3935,0.6273
396 | 2011-11-01,1.4592,-0.9075,1.3602,-0.7886
397 | 2011-12-01,2.2208,2.5735,2.5213,0.3635
398 | 2012-01-01,-0.2197,1.5833,1.1739,0.6035
399 | 2012-02-01,-0.0363,-0.2831,0.4201,0.7501
400 | 2012-03-01,1.0371,0.2748,1.2655,0.1075
401 | 2012-04-01,-0.0346,0.6656,0.471,0.1542
402 | 2012-05-01,0.1684,0.1528,-0.9068,-0.4228
403 | 2012-06-01,-0.6724,-0.1971,-2.5309,-0.5009
404 | 2012-07-01,0.1678,1.2593,-1.3215,-0.1625
405 | 2012-08-01,0.014,0.4888,-0.9832,-0.1709
406 | 2012-09-01,0.7722,0.5624,-0.5861,-0.4217
407 | 2012-10-01,-1.514,-0.4442,-2.062,-1.1764
408 | 2012-11-01,-0.1106,-1.7009,-0.5782,-1.095
409 | 2012-12-01,-1.7486,-0.7635,0.1706,-1.0097
410 | 2013-01-01,-0.6095,0.071,0.3453,0.5458
411 | 2013-02-01,-1.0074,0.7156,-0.4531,0.4483
412 | 2013-03-01,-3.1854,1.3748,-1.6119,0.038
413 | 2013-04-01,0.3222,0.6106,0.687,-1.5574
414 | 2013-05-01,0.494,0.3596,0.5691,-0.3636
415 | 2013-06-01,0.5487,-0.2711,0.5208,-0.4348
416 | 2013-07-01,-0.0111,0.9453,0.6722,-0.3068
417 | 2013-08-01,0.1542,-1.5609,0.9702,-0.0293
418 | 2013-09-01,-0.4609,-1.6583,0.2406,0.5445
419 | 2013-10-01,0.2628,-0.4576,-1.2801,-0.3405
420 | 2013-11-01,2.029,0.1888,0.9008,-1.1833
421 | 2013-12-01,1.4749,0.0613,0.9457,-0.8582
422 | 2014-01-01,-0.9688,-0.6826,0.2903,0.9683
423 | 2014-02-01,0.0438,0.3222,1.3352,-0.9513
424 | 2014-03-01,1.2058,0.467,0.7983,-0.2085
425 | 2014-04-01,0.972,0.6137,0.3052,0.2465
426 | 2014-05-01,0.4642,-0.4452,-0.9225,-0.7633
427 | 2014-06-01,-0.5074,0.8412,-0.9701,-1.4583
428 | 2014-07-01,-0.4889,0.247,0.1754,0.9871
429 | 2014-08-01,-0.3715,-0.0589,-1.6815,1.3699
430 | 2014-09-01,0.1019,-1.1189,1.6163,0.9892
431 | 2014-10-01,-1.1344,-0.039,-1.2706,0.887
432 | 2014-11-01,-0.5303,-0.5192,0.678,0.6782
433 | 2014-12-01,0.4129,1.3222,1.8575,0.6689
434 | 2015-01-01,1.0916,0.6747,1.7887,0.6124
435 | 2015-02-01,1.0426,1.2165,1.3228,0.59
436 | 2015-03-01,1.8374,0.7733,1.4497,-0.2299
437 | 2015-04-01,1.2157,1.0286,0.7253,-0.1502
438 | 2015-05-01,0.7628,0.4156,0.1455,-0.1578
439 | 2015-06-01,0.427,0.7113,-0.0668,-0.1731
440 | 2015-07-01,-1.1079,1.678,-3.179,0.7612
441 | 2015-08-01,-0.689,1.0618,-0.7604,0.0789
442 | 2015-09-01,-0.1645,0.5424,-0.6485,-0.9181
443 | 2015-10-01,-0.2501,-0.1699,0.4372,1.775
444 | 2015-11-01,1.945,0.6954,1.7438,-0.1927
445 | 2015-12-01,1.4441,-0.0589,2.2436,0.7771
446 | 2016-01-01,-1.4487,1.3918,0.1165,2.0213
447 | 2016-02-01,-0.0235,1.0933,1.5803,1.4822
448 | 2016-03-01,0.2802,2.0383,0.7344,0.7278
449 | 2016-04-01,-1.0511,0.0969,0.3751,0.8657
450 | 2016-05-01,-0.0357,0.0124,-0.7736,-1.0611
451 | 2016-06-01,0.3129,2.5655,-0.4325,-0.7011
452 | 2016-07-01,0.0848,0.4066,-1.7603,1.0193
453 | 2016-08-01,0.4724,-0.7385,-1.6453,-0.8759
454 | 2016-09-01,0.781,2.3328,0.611,0.182
455 | 2016-10-01,-1.9173,-0.1772,0.4113,1.2393
456 | 2016-11-01,-0.6109,-1.5077,-0.1637,1.517
457 | 2016-12-01,1.7864,-0.7112,0.4783,-0.3534
458 | 2017-01-01,0.942,-0.9822,0.4782,0.28
459 | 2017-02-01,0.3399,-0.0149,1.0048,0.1848
460 | 2017-03-01,1.3654,0.1561,0.737,0.2682
461 | 2017-04-01,-0.0887,0.6192,1.7325,0.3966
462 | 2017-05-01,-0.7301,1.0528,-1.911,-0.3112
463 | 2017-06-01,0.4017,0.5464,0.045,1.0051
464 | 2017-07-01,0.6342,0.7284,1.2556,1.8642
465 | 2017-08-01,0.15,0.7641,-1.0977,0.2317
466 | 2017-09-01,-0.4924,1.2961,-0.6127,-0.3308
467 | 2017-10-01,0.6903,-0.5676,0.1859,-0.398
468 | 2017-11-01,-0.0776,0.7706,-0.0049,-2.0568
469 | 2017-12-01,-0.059,0.9839,0.8816,0.8877
470 | 2018-01-01,-0.2808,1.2752,1.4423,0.3995
471 | 2018-02-01,0.1127,1.0411,1.5778,-1.0256
472 | 2018-03-01,-0.9411,0.1405,-0.9269,-0.886
473 | 2018-04-01,0.5439,-1.1662,1.2411,-0.9091
474 | 2018-05-01,1.1796,-0.0766,2.1208,-1.3363
475 | 2018-06-01,0.3799,-0.0119,1.088,0.5115
476 | 2018-07-01,0.6118,0.3768,1.3893,-0.3674
477 | 2018-08-01,0.8361,-0.3428,1.967,1.2674
478 | 2018-09-01,0.5845,1.4584,1.6736,1.4366
479 | 2018-10-01,0.4128,0.5303,0.9345,0.21
480 | 2018-11-01,-1.1162,0.9907,-0.1113,0.2415
481 | 2018-12-01,0.1097,0.9299,0.6116,0.8604
482 | 2019-01-01,-0.7132,0.6767,0.592,0.8342
483 | 2019-02-01,1.1495,-0.4996,0.2914,-1.076
484 | 2019-03-01,2.1161,0.7446,1.2321,0.2502
485 | 2019-04-01,-0.2553,0.3358,0.466,-0.6116
486 | 2019-05-01,-1.2313,0.3346,-2.623,-0.2907
487 | 2019-06-01,-0.6013,1.465,-1.0886,0.1212
488 | 2019-07-01,-0.8897,-0.3897,-1.4255,1.1116
489 | 2019-08-01,-0.7218,-1.0804,-1.1684,1.1786
490 | 2019-09-01,0.3062,0.563,-0.1641,2.0
491 | 2019-10-01,-0.0822,-0.9252,-1.4134,-1.0241
492 | 2019-11-01,-1.1934,-1.8398,0.2785,-0.0565
493 | 2019-12-01,0.4121,-1.3599,1.2016,0.1809
494 | 2020-01-01,2.419,-0.2312,1.3432,-0.2369
495 | 2020-02-01,3.4172,0.2746,1.257,0.1722
496 | 2020-03-01,2.6414,1.4264,1.0126,-2.171
497 | 2020-04-01,0.9281,-0.4752,-1.0224,-1.1756
498 | 2020-05-01,-0.0271,0.5766,-0.4098,0.2107
499 | 2020-06-01,-0.1218,1.0708,-0.1469,0.7038
500 | 2020-07-01,-0.4118,-0.5464,-1.2262,1.7276
501 | 2020-08-01,-0.3812,-0.7205,0.1217,1.8218
502 | 2020-09-01,0.6314,0.1943,0.985,0.7536
503 | 2020-10-01,-0.0717,1.2636,-0.6547,-1.1312
504 | 2020-11-01,2.0864,0.8132,2.5445,0.2351
505 | 2020-12-01,-1.736,1.4813,-0.3024,1.5844
506 | 2021-01-01,-2.4836,1.0446,-1.1087,0.1861
507 | 2021-02-01,-1.1907,1.3435,0.1361,-0.3074
508 | 2021-03-01,2.1092,0.0858,0.7299,-0.9688
509 | 2021-04-01,-0.2044,0.8274,-1.4251,-1.0486
510 | 2021-05-01,-0.1606,0.3138,-1.2386,-1.3469
511 | 2021-06-01,0.8446,1.1792,0.7655,0.666
512 | 2021-07-01,0.6302,-0.4595,0.026,0.5624
513 | 2021-08-01,-0.2093,-0.2268,-0.2825,0.9547
514 | 2021-09-01,-0.2516,1.3364,-0.2137,0.4433
515 | 2021-10-01,-0.1458,0.4532,-2.2899,1.134
516 | 2021-11-01,0.093,1.3211,-0.1846,0.7215
517 | 2021-12-01,0.1981,2.1548,0.2885,-2.5584
518 | 2022-01-01,0.8483,0.8252,1.0778,1.0093
519 | 2022-02-01,1.5444,0.6432,1.683,0.6577
520 | 2022-03-01,0.3052,0.5584,0.7677,0.1348
521 | 2022-04-01,-0.6026,0.5319,-0.3646,-0.735
522 | 2022-05-01,1.2235,0.0965,0.7062,-0.8263
523 | 2022-06-01,-0.0742,-0.8713,-0.118,-0.3128
524 | 2022-07-01,0.0249,0.4467,-0.0936,2.5404
525 | 2022-08-01,-0.17,0.7313,1.4699,0.7893
526 | 2022-09-01,-0.6552,1.4685,-1.6105,0.2093
527 | 2022-10-01,1.3457,0.3303,-0.7175,0.1662
528 | 2022-11-01,0.3389,1.7134,0.6922,-0.7341
529 | 2022-12-01,-2.7192,1.7004,-0.1456,-0.662
530 | 2023-01-01,-0.6743,2.3037,1.2503,0.2148
531 | 2023-02-01,1.6004,0.5536,0.9227,-0.64
532 | 2023-03-01,0.2803,-0.2582,-1.1088,-1.6341
533 | 2023-04-01,-0.9731,-0.9207,-0.6284,-0.4242
534 | 2023-05-01,1.1343,1.4518,0.3864,-0.8575
535 | 2023-06-01,-0.2862,-0.4379,-0.5784,0.687
536 | 2023-07-01,-0.1545,-0.8183,-2.1746,1.1454
537 | 


--------------------------------------------------------------------------------
/pattern_causality/pattern_causality.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | # -*- coding: utf-8 -*-
   3 | """Pattern Causality Analysis Package.
   4 | 
   5 | This module implements pattern causality analysis methods for time series data.
   6 | It provides tools for analyzing causal relationships between variables using pattern-based approaches.
   7 | 
   8 | The package includes methods for:
   9 |     - Basic pattern causality analysis
  10 |     - Multivariate time series analysis
  11 |     - Cross-validation and parameter optimization
  12 |     - Effect metrics calculation and visualization
  13 | 
  14 | Example:
  15 |     Basic usage example::
  16 | 
  17 |         >>> from pattern_causality import pattern_causality
  18 |         >>> pc = pattern_causality(verbose=True)
  19 |         >>> result = pc.pc_lightweight(X, Y, E=3, tau=1)
  20 | """
  21 | 
  22 | from __future__ import annotations
  23 | 
  24 | # Standard library imports
  25 | import time
  26 | from typing import (
  27 |     Dict,
  28 |     List,
  29 |     Optional,
  30 |     Protocol,
  31 |     Sequence,
  32 |     Tuple,
  33 |     TypeVar,
  34 |     Union,
  35 | )
  36 | from dataclasses import dataclass
  37 | from importlib.metadata import version, metadata
  38 | 
  39 | # Third-party imports
  40 | import numpy as np
  41 | import pandas as pd
  42 | 
  43 | # Local imports - using relative imports
  44 | try:
  45 |     from utils.databank import databank
  46 |     from utils.distancematrix import distancematrix
  47 |     from utils.fcp import fcp
  48 |     from utils.fillPCMatrix import fillPCMatrix
  49 |     from utils.natureOfCausality import natureOfCausality
  50 |     from utils.pastNNs import pastNNs
  51 |     from utils.patternhashing import patternhashing
  52 |     from utils.patternspace import patternspace
  53 |     from utils.predictionY import predictionY
  54 |     from utils.projectedNNs import projectedNNs
  55 |     from utils.signaturespace import signaturespace
  56 |     from utils.statespace import statespace
  57 | except ImportError as e:
  58 |     import warnings
  59 |     warnings.warn(f"Failed to import C++ modules: {str(e)}")
  60 |     # You might want to provide Python fallbacks here if available
  61 | 
  62 | # Package metadata
  63 | __version__ = version("pattern-causality")
  64 | __author__ = metadata("pattern-causality").get("Author")
  65 | __email__ = metadata("pattern-causality").get("Author-email")
  66 | __license__ = metadata("pattern-causality").get("License")
  67 | __copyright__ = f"Copyright (c) 2024 {__author__}"
  68 | __all__ = ['pattern_causality']
  69 | 
  70 | # Type aliases
  71 | T = TypeVar('T')
  72 | ArrayLike = Union[List[T], np.ndarray, pd.Series]
  73 | DatasetType = Union[pd.DataFrame, np.ndarray, List[T]]
  74 | 
  75 | 
  76 | @dataclass
  77 | class PCMatrixResult:
  78 |     """Data class for storing pattern causality matrix results.
  79 |     
  80 |     Attributes:
  81 |         positive (np.ndarray): Matrix of positive causality values
  82 |         negative (np.ndarray): Matrix of negative causality values
  83 |         dark (np.ndarray): Matrix of dark causality values
  84 |         items (list): List of variable names corresponding to matrix indices
  85 |     """
  86 |     positive: np.ndarray
  87 |     negative: np.ndarray
  88 |     dark: np.ndarray
  89 |     items: list
  90 | 
  91 | 
  92 | class pattern_causality:
  93 |     """Pattern Causality Analysis Class for Time Series Data.
  94 |     
  95 |     This class implements various pattern causality analysis methods for time series data.
  96 |     All methods return pandas DataFrames for consistency and ease of use.
  97 |     
  98 |     The class provides a comprehensive set of tools for analyzing causal relationships
  99 |     in time series data using pattern-based approaches.
 100 |     
 101 |     Attributes:
 102 |         verbose (bool): Whether to print detailed information during computation
 103 |         
 104 |     Methods:
 105 |         pc_lightweight: Basic pattern causality analysis for two time series
 106 |         pc_matrix: Calculate pattern causality matrix for multivariate time series
 107 |         pc_effect: Calculate effect metrics from pattern causality matrices
 108 |         pc_accuracy: Calculate pattern causality accuracy metrics
 109 |         pc_full_details: Detailed pattern causality analysis with time point information
 110 |         pc_cross_validation: Perform cross validation for pattern causality analysis
 111 |         optimal_parameters_search: Search for optimal E and tau parameters
 112 |         to_matrix: Convert flattened causality results to matrix format
 113 |         format_effects: Format effect results into matrices for visualization
 114 |         
 115 |     Note:
 116 |         All methods are designed to handle NaN values and invalid inputs gracefully.
 117 |         Error messages and warnings are provided when appropriate.
 118 |     """
 119 |     
 120 |     def __init__(self, verbose: bool = False):
 121 |         """Initialize pattern_causality class
 122 |         
 123 |         Args:
 124 |             verbose: Whether to print detailed information during computation
 125 |         """
 126 |         self.verbose = verbose
 127 | 
 128 |     @staticmethod
 129 |     def __version__():
 130 |         """Return the current version of the package"""
 131 |         from importlib.metadata import version
 132 |         return version("pattern-causality")
 133 |         
 134 |     def __repr__(self) -> str:
 135 |         """Return string representation of the class"""
 136 |         return f"pattern_causality(verbose={self.verbose})"
 137 |         
 138 |     def __str__(self) -> str:
 139 |         """Return string description of the class"""
 140 |         return "Pattern Causality Analysis Class for Time Series Data"
 141 | 
 142 |     def _print_if_verbose(self, message: str, verbose: bool = None) -> None:
 143 |         """
 144 |         Helper method to print messages when verbose is True
 145 |         
 146 |         Args:
 147 |             message: Message to print
 148 |             verbose: Override class-level verbose setting
 149 |         """
 150 |         verbose = self.verbose if verbose is None else verbose
 151 |         if verbose:
 152 |             print(message)
 153 | 
 154 |     def _calculate_basic_stats(self, X: Union[List, np.ndarray], Y: Union[List, np.ndarray]) -> Dict:
 155 |         """Calculate basic statistics for time series"""
 156 |         X, Y = np.array(X), np.array(Y)
 157 |         stats = {
 158 |             "X_mean": np.mean(X),
 159 |             "X_std": np.std(X),
 160 |             "Y_mean": np.mean(Y),
 161 |             "Y_std": np.std(Y),
 162 |             "correlation": np.corrcoef(X, Y)[0, 1],
 163 |             "X_length": len(X),
 164 |             "missing_values": np.sum(np.isnan(X)) + np.sum(np.isnan(Y))
 165 |         }
 166 |         return stats
 167 | 
 168 |     @staticmethod
 169 |     def _validate_input(X: Union[List, np.ndarray, pd.Series], 
 170 |                        Y: Union[List, np.ndarray, pd.Series]) -> tuple:
 171 |         """Validate and convert input time series to lists"""
 172 |         # Convert to numpy array first for type checking
 173 |         if isinstance(X, pd.Series):
 174 |             X = X.values
 175 |         elif isinstance(X, list):
 176 |             X = np.array(X)
 177 |             
 178 |         if isinstance(Y, pd.Series):
 179 |             Y = Y.values
 180 |         elif isinstance(Y, list):
 181 |             Y = np.array(Y)
 182 |             
 183 |         # Check if numeric
 184 |         if not np.issubdtype(X.dtype, np.number) or not np.issubdtype(Y.dtype, np.number):
 185 |             raise TypeError("All elements must be numeric")
 186 |             
 187 |         # Convert to list for processing
 188 |         return X.tolist(), Y.tolist()
 189 | 
 190 |     @staticmethod
 191 |     def _validate_dataset(dataset: Union[pd.DataFrame, np.ndarray, List]) -> pd.DataFrame:
 192 |         """Validate and convert dataset to DataFrame with numeric values"""
 193 |         if isinstance(dataset, np.ndarray):
 194 |             if not np.issubdtype(dataset.dtype, np.number):
 195 |                 raise TypeError("All elements in array must be numeric")
 196 |             return pd.DataFrame(dataset)
 197 |         elif isinstance(dataset, list):
 198 |             arr = np.array(dataset).T
 199 |             if not np.issubdtype(arr.dtype, np.number):
 200 |                 raise TypeError("All elements in list must be numeric")
 201 |             return pd.DataFrame(arr)
 202 |         elif isinstance(dataset, pd.DataFrame):
 203 |             if not all(dataset.dtypes.apply(lambda x: np.issubdtype(x, np.number))):
 204 |                 raise TypeError("All columns in DataFrame must be numeric")
 205 |             return dataset
 206 |         else:
 207 |             raise TypeError("dataset must be a DataFrame, numpy array, or list")
 208 | 
 209 |     @staticmethod
 210 |     def _validate_pc_matrix_result(result: Dict) -> bool:
 211 |         """Validate that the input is a result from pc_matrix"""
 212 |         required_keys = {"positive", "negative", "dark", "items"}
 213 |         
 214 |         if not all(key in result for key in required_keys):
 215 |             return False
 216 |             
 217 |         matrices = [result["positive"], result["negative"], result["dark"]]
 218 |         if not all(isinstance(m, np.ndarray) for m in matrices):
 219 |             return False
 220 |             
 221 |         shapes = [m.shape for m in matrices]
 222 |         if not all(len(shape) == 2 and shape[0] == shape[1] for shape in shapes):
 223 |             return False
 224 |             
 225 |         if not isinstance(result["items"], list) or len(result["items"]) != matrices[0].shape[0]:
 226 |             return False
 227 |             
 228 |         return True
 229 | 
 230 |     def pc_lightweight(self, 
 231 |                       X: Union[List, np.ndarray, pd.Series],
 232 |                       Y: Union[List, np.ndarray, pd.Series],
 233 |                       E: int,
 234 |                       tau: int,
 235 |                       metric: str = "euclidean",
 236 |                       h: int = 1,
 237 |                       weighted: bool = False,
 238 |                       relative: bool = True,
 239 |                       verbose: bool = None) -> pd.DataFrame:
 240 |         """
 241 |         Pattern Causality Lightweight implementation
 242 |         
 243 |         Args:
 244 |             X: Input time series (causal variable)
 245 |             Y: Input time series (affected variable)
 246 |             E: Embedding dimension
 247 |             tau: Time delay
 248 |             metric: Distance metric to use
 249 |             h: Prediction horizon
 250 |             weighted: Whether to use weighted calculations
 251 |             relative: Whether to use relative differences (default: False for absolute)
 252 |             verbose: Override class-level verbose setting
 253 |             
 254 |         Returns:
 255 |             DataFrame containing causality metrics
 256 |         """
 257 |         verbose = self.verbose if verbose is None else verbose
 258 |         start_time = time.time()
 259 |         
 260 |         X, Y = self._validate_input(X, Y)
 261 |         
 262 |         if verbose:
 263 |             stats = self._calculate_basic_stats(X, Y)
 264 |             self._print_if_verbose(f"\nInput Statistics:", verbose)
 265 |             self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose)
 266 |             self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose)
 267 |             self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose)
 268 |             self._print_if_verbose(f"Series length: {stats['X_length']}", verbose)
 269 |             if stats['missing_values'] > 0:
 270 |                 self._print_if_verbose(f"Warning: {stats['missing_values']} missing values detected", verbose)
 271 |         
 272 |         # Initialize constants
 273 |         NNSPAN = E + 1
 274 |         CCSPAN = (E - 1) * tau
 275 |         hashedpatterns = patternhashing(E)
 276 |         
 277 |         if hashedpatterns is None or len(hashedpatterns) == 0:
 278 |             raise ValueError(f"Failed to generate hash patterns for E={E}")
 279 |             
 280 |         self._print_if_verbose(f"\nInitializing computation with E={E}, tau={tau}, h={h}", verbose)
 281 |         
 282 |         # Calculate shadow attractors
 283 |         self._print_if_verbose("Calculating state space and signatures...", verbose)
 284 |         Mx = statespace(X, E, tau)
 285 |         My = statespace(Y, E, tau)
 286 |         SMx = signaturespace(Mx, E, relative=relative)
 287 |         SMy = signaturespace(My, E, relative=relative)
 288 |         PSMx = patternspace(SMx, E)
 289 |         PSMy = patternspace(SMy, E)
 290 |         Dx = distancematrix(Mx, metric=metric)
 291 |         Dy = distancematrix(My, metric=metric)
 292 |         
 293 |         # Check time series length
 294 |         FCP = fcp(E, tau, h, X)
 295 |         al_loop_dur = range(FCP - 1, len(X) - (E - 1) * tau - h + 1)
 296 |         total_steps = len(al_loop_dur)
 297 |         
 298 |         self._print_if_verbose(f"\nProcessing time series...", verbose)
 299 |         self._print_if_verbose(f"Total time points to analyze: {total_steps}\n", verbose)
 300 |         
 301 |         # Initialize causality matrix
 302 |         predictedPCMatrix = databank("array", [3 ** (E - 1), 3 ** (E - 1), len(Y)])
 303 |         real_loop = None
 304 |         processed_points = 0
 305 |         valid_points = 0
 306 |         processable_points = 0  # Points that can be processed (no NaN, within bounds)
 307 |         
 308 |         # Main computation loop
 309 |         for i in al_loop_dur:
 310 |             processed_points += 1
 311 |             
 312 |             # Update progress every 10%
 313 |             progress_interval = max(1, total_steps // 10)
 314 |             if verbose and processed_points % progress_interval == 0:
 315 |                 progress_percent = min(100, (processed_points/total_steps) * 100)  # Ensure we don't exceed 100%
 316 |                 self._print_if_verbose(f"Progress: {processed_points}/{total_steps} points processed ({progress_percent:.1f}%)", verbose)
 317 |             
 318 |             if i + h >= len(My):
 319 |                 continue
 320 |                 
 321 |             # Check if point can be processed (no NaN values)
 322 |             if not np.any(np.isnan(Mx[i, :])) and not np.any(np.isnan(My[i + h, :])):
 323 |                 processable_points += 1
 324 |                 NNx = pastNNs(CCSPAN, NNSPAN, Mx, Dx, SMx, PSMx, i, h)
 325 |                 
 326 |                 if NNx is not None and not np.any(np.isnan(NNx["dists"])):
 327 |                     if not np.any(np.isnan(Dy[i, NNx["times"] + h])):
 328 |                         valid_points += 1
 329 |                         if real_loop is None:
 330 |                             real_loop = i
 331 |                         else:
 332 |                             real_loop = np.append(real_loop, i)
 333 |                             
 334 |                         projNNy = projectedNNs(My, Dy, SMy, PSMy, NNx["times"], i, h)
 335 |                         predicted_result = predictionY(E=E, projNNy=projNNy, zeroTolerance=E-1)
 336 |                         
 337 |                         # Get patterns and signatures
 338 |                         predictedSignatureY = predicted_result["predictedSignatureY"]
 339 |                         predictedPatternY = predicted_result["predictedPatternY"]
 340 |                         signatureX = SMx[i, :]
 341 |                         patternX = PSMx[i]
 342 |                         realSignatureY = SMy[i + h, :]
 343 |                         realPatternY = PSMy[i + h]
 344 |                         
 345 |                         # Calculate PC matrix values
 346 |                         pc = fillPCMatrix(
 347 |                             weighted=weighted,
 348 |                             predictedPatternY=predictedPatternY,
 349 |                             realPatternY=realPatternY,
 350 |                             predictedSignatureY=predictedSignatureY,
 351 |                             realSignatureY=realSignatureY,
 352 |                             patternX=patternX,
 353 |                             signatureX=signatureX
 354 |                         )
 355 |                         
 356 |                         # Find pattern indices
 357 |                         tolerance = 1e-10
 358 |                         hashedpatterns = np.array(hashedpatterns, dtype=np.float64)
 359 |                         patternX_val = np.float64(patternX.item())
 360 |                         predictedPatternY_val = np.float64(predictedPatternY)
 361 |                         
 362 |                         patternX_matches = np.where(np.abs(hashedpatterns - patternX_val) < tolerance)[0]
 363 |                         predictedPatternY_matches = np.where(np.abs(hashedpatterns - predictedPatternY_val) < tolerance)[0]
 364 |                         
 365 |                         if len(patternX_matches) > 0 and len(predictedPatternY_matches) > 0:
 366 |                             patternX_idx = patternX_matches[0]
 367 |                             predictedPatternY_idx = predictedPatternY_matches[0]
 368 |                             predictedPCMatrix[patternX_idx, predictedPatternY_idx, i] = pc["predicted"]
 369 |         
 370 |         # Print final progress update
 371 |         if verbose and processed_points > 0:
 372 |             self._print_if_verbose(f"Progress: {total_steps}/{total_steps} points processed (100.0%)\n", verbose)
 373 |         
 374 |         # Calculate causality metrics
 375 |         self._print_if_verbose("Calculating final causality metrics...", verbose)
 376 |         # Convert real_loop to integer type compatible with C++ NPY_LONG
 377 |         if real_loop is not None:
 378 |             real_loop = np.asarray(real_loop, dtype=np.int32)
 379 |         causality = natureOfCausality(predictedPCMatrix, real_loop, hashedpatterns, X, weighted)
 380 |         
 381 |         # Calculate percentages
 382 |         totalCausPercent = 1 - np.nanmean(causality["noCausality"])
 383 |         mask = causality["noCausality"][real_loop] != 1
 384 |         
 385 |         if np.any(mask):
 386 |             valid_indices = real_loop[mask]
 387 |             valid_pos = causality["Positive"][valid_indices]
 388 |             valid_neg = causality["Negative"][valid_indices]
 389 |             valid_dark = causality["Dark"][valid_indices]
 390 |             
 391 |             valid_pos = valid_pos[~np.isnan(valid_pos)]
 392 |             valid_neg = valid_neg[~np.isnan(valid_neg)]
 393 |             valid_dark = valid_dark[~np.isnan(valid_dark)]
 394 |             
 395 |             posiCausPercent = np.mean(valid_pos) if len(valid_pos) > 0 else 0.0
 396 |             negaCausPercent = np.mean(valid_neg) if len(valid_neg) > 0 else 0.0
 397 |             darkCausPercent = np.mean(valid_dark) if len(valid_dark) > 0 else 0.0
 398 |             
 399 |             if weighted:
 400 |                 total = posiCausPercent + negaCausPercent + darkCausPercent
 401 |                 if total > 0:
 402 |                     posiCausPercent /= total
 403 |                     negaCausPercent /= total
 404 |                     darkCausPercent /= total
 405 |         else:
 406 |             posiCausPercent = negaCausPercent = darkCausPercent = 0.0
 407 |             
 408 |         end_time = time.time()
 409 |         if verbose:
 410 |             self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose)
 411 |             self._print_if_verbose("\nProcessing Summary:", verbose)
 412 |             self._print_if_verbose(f"Total points analyzed: {total_steps}", verbose)
 413 |             self._print_if_verbose(f"Points with valid data: {processable_points}", verbose)
 414 |             self._print_if_verbose(f"Successfully processed: {valid_points}/{processable_points} ({(valid_points/processable_points)*100:.1f}%)", verbose)
 415 |             self._print_if_verbose("\nResults:", verbose)
 416 |             self._print_if_verbose(f"Total Causality: {totalCausPercent:.3f}", verbose)
 417 |             self._print_if_verbose(f"Positive Causality: {posiCausPercent:.3f}", verbose)
 418 |             self._print_if_verbose(f"Negative Causality: {negaCausPercent:.3f}", verbose)
 419 |             self._print_if_verbose(f"Dark Causality: {darkCausPercent:.3f}", verbose)
 420 |             
 421 |         return pd.DataFrame({
 422 |             "Total Causality": [totalCausPercent],
 423 |             "Positive Causality": [posiCausPercent],
 424 |             "Negative Causality": [negaCausPercent],
 425 |             "Dark Causality": [darkCausPercent]
 426 |         })
 427 | 
 428 |     def pc_matrix(self,
 429 |                   dataset: Union[pd.DataFrame, np.ndarray, List],
 430 |                   E: int,
 431 |                   tau: int,
 432 |                   metric: str = "euclidean",
 433 |                   h: int = 1,
 434 |                   weighted: bool = False,
 435 |                   relative: bool = True,
 436 |                   verbose: bool = None) -> pd.DataFrame:
 437 |         """
 438 |         Calculate pattern causality matrix for multivariate time series
 439 |         
 440 |         Args:
 441 |             dataset: Input dataset
 442 |             E: Embedding dimension
 443 |             tau: Time delay
 444 |             metric: Distance metric to use
 445 |             h: Prediction horizon
 446 |             weighted: Whether to use weighted calculations
 447 |             relative: Whether to use relative differences (default: False for absolute)
 448 |             verbose: Override class-level verbose setting
 449 |             
 450 |         Returns:
 451 |             pd.DataFrame: Flattened causality matrix where:
 452 |                          - Each row represents a pair of variables (from_var, to_var)
 453 |                          - Columns are ['from_var', 'to_var', 'positive', 'negative', 'dark']
 454 |                          - NaN values indicate self-causality (when from_var == to_var)
 455 |         """
 456 |         verbose = self.verbose if verbose is None else verbose
 457 |         start_time = time.time()
 458 |         
 459 |         dataset = self._validate_dataset(dataset)
 460 |         n_cols = dataset.shape[1]
 461 |         
 462 |         if verbose:
 463 |             self._print_if_verbose(f"\nAnalyzing dataset with {n_cols} variables", verbose)
 464 |             self._print_if_verbose(f"Parameters: E={E}, tau={tau}, h={h}", verbose)
 465 |             
 466 |             # Basic dataset statistics
 467 |             self._print_if_verbose("\nDataset Statistics:", verbose)
 468 |             for i in range(n_cols):
 469 |                 col = dataset.iloc[:, i]
 470 |                 self._print_if_verbose(f"Variable {i}: mean={col.mean():.3f}, std={col.std():.3f}", verbose)
 471 |         
 472 |         # Get variable names
 473 |         items = dataset.columns.tolist() if dataset.columns is not None else [f"Var_{i}" for i in range(n_cols)]
 474 |         
 475 |         # Initialize results list
 476 |         results = []
 477 |         
 478 |         total_pairs = n_cols * (n_cols - 1)
 479 |         processed_pairs = 0
 480 |         
 481 |         for i in range(n_cols):
 482 |             X = dataset.iloc[:, i].values.tolist()
 483 |             
 484 |             for j in range(n_cols):
 485 |                 if i != j:
 486 |                     processed_pairs += 1
 487 |                     if verbose:
 488 |                         self._print_if_verbose(f"\nAnalyzing pair ({items[i]}, {items[j]}) - Progress: {processed_pairs}/{total_pairs}", verbose)
 489 |                     
 490 |                     if fcp(E, tau, h, X):
 491 |                         Y = dataset.iloc[:, j].values.tolist()
 492 |                         if fcp(E, tau, h, Y):
 493 |                             temp = self.pc_lightweight(
 494 |                                 X=X,
 495 |                                 Y=Y,
 496 |                                 E=E,
 497 |                                 tau=tau,
 498 |                                 metric=metric,
 499 |                                 h=h,
 500 |                                 weighted=weighted,
 501 |                                 relative=relative,
 502 |                                 verbose=False
 503 |                             )
 504 |                             
 505 |                             # Store results in flattened format
 506 |                             results.append({
 507 |                                 'from_var': items[i],
 508 |                                 'to_var': items[j],
 509 |                                 'positive': temp["Positive Causality"].values[0],
 510 |                                 'negative': temp["Negative Causality"].values[0],
 511 |                                 'dark': temp["Dark Causality"].values[0]
 512 |                             })
 513 |                             
 514 |                             if verbose:
 515 |                                 self._print_if_verbose(f"Results for ({items[i]}, {items[j]}):", verbose)
 516 |                                 self._print_if_verbose(f"  Positive: {results[-1]['positive']:.3f}", verbose)
 517 |                                 self._print_if_verbose(f"  Negative: {results[-1]['negative']:.3f}", verbose)
 518 |                                 self._print_if_verbose(f"  Dark: {results[-1]['dark']:.3f}", verbose)
 519 |                 else:
 520 |                     # Add NaN values for self-causality
 521 |                     results.append({
 522 |                         'from_var': items[i],
 523 |                         'to_var': items[j],
 524 |                         'positive': np.nan,
 525 |                         'negative': np.nan,
 526 |                         'dark': np.nan
 527 |                     })
 528 |         
 529 |         if verbose:
 530 |             end_time = time.time()
 531 |             self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose)
 532 |         
 533 |         # Create DataFrame from results
 534 |         result_df = pd.DataFrame(results)
 535 |         
 536 |         # Optional: Sort by from_var and to_var for consistency
 537 |         result_df = result_df.sort_values(['from_var', 'to_var']).reset_index(drop=True)
 538 |         
 539 |         return result_df
 540 | 
 541 |     def to_matrix(self, flat_df: pd.DataFrame) -> Dict[str, pd.DataFrame]:
 542 |         """Convert flattened causality results to matrix format
 543 |         
 544 |         Args:
 545 |             flat_df: Flattened DataFrame from pc_matrix method
 546 |             
 547 |         Returns:
 548 |             Dictionary containing three matrices:
 549 |                 - 'positive': Matrix of positive causality values
 550 |                 - 'negative': Matrix of negative causality values
 551 |                 - 'dark': Matrix of dark causality values
 552 |         """
 553 |         # Get unique variable names
 554 |         variables = sorted(list(set(flat_df['from_var'].unique()) | set(flat_df['to_var'].unique())))
 555 |         n = len(variables)
 556 |         
 557 |         # Initialize matrices
 558 |         matrices = {
 559 |             'positive': pd.DataFrame(np.nan, index=variables, columns=variables),
 560 |             'negative': pd.DataFrame(np.nan, index=variables, columns=variables),
 561 |             'dark': pd.DataFrame(np.nan, index=variables, columns=variables)
 562 |         }
 563 |         
 564 |         # Fill matrices
 565 |         for _, row in flat_df.iterrows():
 566 |             matrices['positive'].loc[row['from_var'], row['to_var']] = row['positive']
 567 |             matrices['negative'].loc[row['from_var'], row['to_var']] = row['negative']
 568 |             matrices['dark'].loc[row['from_var'], row['to_var']] = row['dark']
 569 |             
 570 |         return matrices
 571 | 
 572 |     def format_effects(self, effect_df: pd.DataFrame) -> Dict[str, pd.DataFrame]:
 573 |         """Format effect results into matrices suitable for visualization
 574 |         
 575 |         This method transforms the effect results from pc_effect into a matrix format
 576 |         that is particularly suitable for visualization and analysis. The output matrices
 577 |         contain information about received and exerted effects, as well as their differences.
 578 |         
 579 |         Args:
 580 |             effect_df: DataFrame from pc_effect method containing causality effect metrics
 581 |             
 582 |         Returns:
 583 |             Dictionary containing three matrices:
 584 |                 - 'positive': Matrix with columns [Received, Exerted, Difference]
 585 |                 - 'negative': Matrix with columns [Received, Exerted, Difference]
 586 |                 - 'dark': Matrix with columns [Received, Exerted, Difference]
 587 |                 Each row represents a variable.
 588 |                 
 589 |         Example:
 590 |             >>> pc = pattern_causality()
 591 |             >>> effects = pc.pc_effect(matrix_results)
 592 |             >>> effect_matrices = pc.format_effects(effects)
 593 |             >>> # Scatter plot for positive causality
 594 |             >>> plt.figure(figsize=(10, 6))
 595 |             >>> plt.scatter(effect_matrices['positive']['Received'], 
 596 |             ...           effect_matrices['positive']['Exerted'])
 597 |             >>> plt.xlabel('Received Effects')
 598 |             >>> plt.ylabel('Exerted Effects')
 599 |             >>> plt.title('Positive Causality: Received vs Exerted Effects')
 600 |             >>> plt.show()
 601 |             
 602 |         Raises:
 603 |             KeyError: If required columns are missing from effect_df
 604 |             ValueError: If effect_df has invalid structure
 605 |         """
 606 |         # Remove the 'Mean' row if it exists
 607 |         if 'Mean' in effect_df.index:
 608 |             effect_df = effect_df.drop('Mean')
 609 |             
 610 |         # Initialize the three matrices
 611 |         matrices = {
 612 |             'positive': pd.DataFrame(index=effect_df.index),
 613 |             'negative': pd.DataFrame(index=effect_df.index),
 614 |             'dark': pd.DataFrame(index=effect_df.index)
 615 |         }
 616 |         
 617 |         # Fill the matrices
 618 |         for causality_type in ['positive', 'negative', 'dark']:
 619 |             type_cap = causality_type.capitalize()
 620 |             matrices[causality_type]['Received'] = effect_df[f'{type_cap}_Received']
 621 |             matrices[causality_type]['Exerted'] = effect_df[f'{type_cap}_Exerted']
 622 |             matrices[causality_type]['Difference'] = effect_df[f'{type_cap}_Difference']
 623 |             
 624 |         return matrices
 625 | 
 626 |     def pc_effect(self, 
 627 |                   pcmatrix: pd.DataFrame,
 628 |                   verbose: bool = None) -> pd.DataFrame:
 629 |         """
 630 |         Calculate effect metrics from pattern causality matrices
 631 |         
 632 |         Args:
 633 |             pcmatrix: DataFrame from pc_matrix function (flattened format)
 634 |             verbose: Override class-level verbose setting
 635 |             
 636 |         Returns:
 637 |             pd.DataFrame: Effect metrics for each variable with columns:
 638 |                          [Positive/Negative/Dark]_[Received/Exerted/Difference]
 639 |         """
 640 |         verbose = self.verbose if verbose is None else verbose
 641 |         
 642 |         # Convert to matrix format first
 643 |         matrices = self.to_matrix(pcmatrix)
 644 |         
 645 |         if verbose:
 646 |             self._print_if_verbose("\nCalculating causality effects...", verbose)
 647 |             n_vars = len(matrices['positive'])
 648 |             self._print_if_verbose(f"Number of variables: {n_vars}", verbose)
 649 |         
 650 |         # Initialize results dictionary
 651 |         results = {}
 652 |         variables = matrices['positive'].index
 653 |         
 654 |         # Calculate metrics for each causality type
 655 |         for causality_type in ['positive', 'negative', 'dark']:
 656 |             matrix = matrices[causality_type].values
 657 |             
 658 |             # Calculate metrics
 659 |             received = np.nansum(matrix, axis=0) * 100  # Sum along rows (received effects)
 660 |             exerted = np.nansum(matrix, axis=1) * 100   # Sum along columns (exerted effects)
 661 |             diff = received - exerted
 662 |             
 663 |             # Store results
 664 |             results[f'{causality_type.capitalize()}_Received'] = received
 665 |             results[f'{causality_type.capitalize()}_Exerted'] = exerted
 666 |             results[f'{causality_type.capitalize()}_Difference'] = diff
 667 |             
 668 |             if verbose:
 669 |                 self._print_if_verbose(f"\n{causality_type.capitalize()} Effects:", verbose)
 670 |                 self._print_if_verbose(f"Mean Received: {np.nanmean(received):.2f}%", verbose)
 671 |                 self._print_if_verbose(f"Mean Exerted: {np.nanmean(exerted):.2f}%", verbose)
 672 |                 self._print_if_verbose(f"Mean Difference: {np.nanmean(diff):.2f}%", verbose)
 673 |                 
 674 |                 # Add detailed statistics for top effects
 675 |                 if np.any(~np.isnan(received)):
 676 |                     self._print_if_verbose("\nTop Variables by Effect:", verbose)
 677 |                     self._print_if_verbose("Received Effects:", verbose)
 678 |                     sorted_idx = np.argsort(received)[-3:]
 679 |                     for idx in sorted_idx[::-1]:
 680 |                         self._print_if_verbose(f"  {variables[idx]}: {received[idx]:.2f}%", verbose)
 681 |                     
 682 |                     self._print_if_verbose("Exerted Effects:", verbose)
 683 |                     sorted_idx = np.argsort(exerted)[-3:]
 684 |                     for idx in sorted_idx[::-1]:
 685 |                         self._print_if_verbose(f"  {variables[idx]}: {exerted[idx]:.2f}%", verbose)
 686 |         
 687 |         # Create DataFrame with results
 688 |         result_df = pd.DataFrame(results, index=variables)
 689 |         
 690 |         # Add summary row
 691 |         summary = pd.DataFrame({
 692 |             col: np.nanmean(result_df[col])
 693 |             for col in result_df.columns
 694 |         }, index=['Mean'])
 695 |         
 696 |         result_df = pd.concat([result_df, summary])
 697 |         
 698 |         return result_df
 699 | 
 700 |     def pc_accuracy(self,
 701 |                    dataset: Union[pd.DataFrame, np.ndarray, List],
 702 |                    E: int,
 703 |                    tau: int,
 704 |                    metric: str,
 705 |                    h: int,
 706 |                    weighted: bool,
 707 |                    relative: bool = True) -> pd.DataFrame:
 708 |         """
 709 |         Calculate pattern causality accuracy metrics for a dataset.
 710 | 
 711 |         Args:
 712 |             dataset: Input dataset (DataFrame, numpy array, or list)
 713 |             E: Embedding dimension
 714 |             tau: Time delay
 715 |             metric: Distance metric to use
 716 |             h: Prediction horizon
 717 |             weighted: Whether to use weighted calculations
 718 |             relative: Whether to use relative differences (default: False for absolute)
 719 | 
 720 |         Returns:
 721 |             pd.DataFrame: Accuracy metrics with shape (1, 6)
 722 |                          Columns: ['E', 'tau', 'total', 'positive', 'negative', 'dark']
 723 |         """
 724 |         # Convert input to numpy array properly
 725 |         dataset = self._validate_dataset(dataset)
 726 |         n_cols = dataset.shape[1]
 727 |         
 728 |         couplingsTotal = databank("matrix", [n_cols, n_cols])
 729 |         couplingsPosi = databank("matrix", [n_cols, n_cols])
 730 |         couplingsNega = databank("matrix", [n_cols, n_cols])
 731 |         couplingsDark = databank("matrix", [n_cols, n_cols])
 732 | 
 733 |         # Calculate causality for each pair of variables
 734 |         for i in range(n_cols):
 735 |             for j in range(n_cols):
 736 |                 if i != j:
 737 |                     X_list = dataset.iloc[:, i].values.tolist()
 738 |                     Y_list = dataset.iloc[:, j].values.tolist()
 739 | 
 740 |                     # Check if enough data points for causality calculation
 741 |                     if fcp(E, tau, h, X_list) and fcp(E, tau, h, Y_list):
 742 |                         # Calculate pattern causality
 743 |                         results = self.pc_lightweight(
 744 |                             X_list, Y_list, E, tau, metric, h, weighted, relative
 745 |                         )
 746 | 
 747 |                         # Store results
 748 |                         couplingsTotal[i, j] = results["Total Causality"].values[0]
 749 |                         couplingsPosi[i, j] = results["Positive Causality"].values[0]
 750 |                         couplingsNega[i, j] = results["Negative Causality"].values[0]
 751 |                         couplingsDark[i, j] = results["Dark Causality"].values[0]
 752 | 
 753 |         # Calculate mean metrics
 754 |         results = pd.DataFrame({
 755 |             'E': [E],
 756 |             'tau': [tau],
 757 |             'total': [np.nanmean(couplingsTotal)],
 758 |             'positive': [np.nanmean(couplingsPosi)],
 759 |             'negative': [np.nanmean(couplingsNega)],
 760 |             'dark': [np.nanmean(couplingsDark)]
 761 |         })
 762 | 
 763 |         return results
 764 | 
 765 |     def optimal_parameters_search(self,
 766 |                                 Emax: int,
 767 |                                 tau_max: int,
 768 |                                 metric: str = "euclidean",
 769 |                                 h: int = 1,
 770 |                                 weighted: bool = False,
 771 |                                 relative: bool = True,
 772 |                                 dataset: Union[pd.DataFrame, np.ndarray, List] = None,
 773 |                                 verbose: bool = None) -> pd.DataFrame:
 774 |         """
 775 |         Search for optimal parameters E and tau for pattern causality analysis.
 776 |         
 777 |         Args:
 778 |             Emax: Maximum embedding dimension (must be > 2)
 779 |             tau_max: Maximum time delay
 780 |             metric: Distance metric to use
 781 |             h: Prediction horizon
 782 |             weighted: Whether to use weighted calculations
 783 |             relative: Whether to use relative differences (default: False for absolute)
 784 |             dataset: Input dataset (DataFrame, numpy array, or list)
 785 |             verbose: Override class-level verbose setting
 786 |             
 787 |         Returns:
 788 |             DataFrame containing accuracy results for different parameter combinations
 789 |         """
 790 |         verbose = self.verbose if verbose is None else verbose
 791 |         start_time = time.time()
 792 |         
 793 |         if dataset is None:
 794 |             raise ValueError("Dataset must be provided")
 795 |             
 796 |         if Emax < 3:
 797 |             raise ValueError("Please enter the Emax with the number > 2")
 798 | 
 799 |         # Validate dataset
 800 |         dataset = self._validate_dataset(dataset)
 801 |         
 802 |         if verbose:
 803 |             self._print_if_verbose(f"\nStarting parameter search:", verbose)
 804 |             self._print_if_verbose(f"Dataset shape: {dataset.shape}", verbose)
 805 |             self._print_if_verbose(f"E range: 2 to {Emax}", verbose)
 806 |             self._print_if_verbose(f"tau range: 1 to {tau_max}", verbose)
 807 |         
 808 |         E_array = range(2, Emax + 1)
 809 |         tau_array = range(1, tau_max + 1)
 810 |         total_combinations = len(E_array) * len(tau_array)
 811 |         
 812 |         if verbose:
 813 |             self._print_if_verbose(f"Total parameter combinations to test: {total_combinations}", verbose)
 814 | 
 815 |         # Initialize matrices using databank
 816 |         tests_total = databank("matrix", [len(E_array), len(tau_array)])
 817 |         tests_posi = databank("matrix", [len(E_array), len(tau_array)])
 818 |         tests_nega = databank("matrix", [len(E_array), len(tau_array)])
 819 |         tests_dark = databank("matrix", [len(E_array), len(tau_array)])
 820 | 
 821 |         combinations_tested = 0
 822 |         best_score = -np.inf
 823 |         best_params = None
 824 | 
 825 |         # Main calculation loop
 826 |         for i, E in enumerate(E_array):
 827 |             for j, tau in enumerate(tau_array):
 828 |                 combinations_tested += 1
 829 |                 if verbose:
 830 |                     self._print_if_verbose(f"\nTesting combination {combinations_tested}/{total_combinations}", verbose)
 831 |                     self._print_if_verbose(f"Parameters: E={E}, tau={tau}", verbose)
 832 |                 
 833 |                 temp = self.pc_accuracy(
 834 |                     dataset=dataset, 
 835 |                     E=E, 
 836 |                     tau=tau, 
 837 |                     metric=metric, 
 838 |                     h=h, 
 839 |                     weighted=weighted,
 840 |                     relative=relative
 841 |                 )
 842 | 
 843 |                 # Store results
 844 |                 total_score = temp["total"].values[0]
 845 |                 tests_total[i, j] = total_score
 846 |                 tests_posi[i, j] = temp["positive"].values[0]
 847 |                 tests_nega[i, j] = temp["negative"].values[0]
 848 |                 tests_dark[i, j] = temp["dark"].values[0]
 849 |                 
 850 |                 # Track best parameters
 851 |                 if total_score > best_score:
 852 |                     best_score = total_score
 853 |                     best_params = (E, tau)
 854 |                     
 855 |                 if verbose:
 856 |                     self._print_if_verbose(f"Results:", verbose)
 857 |                     self._print_if_verbose(f"  Total: {total_score:.3f}", verbose)
 858 |                     self._print_if_verbose(f"  Positive: {temp['positive'].values[0]:.3f}", verbose)
 859 |                     self._print_if_verbose(f"  Negative: {temp['negative'].values[0]:.3f}", verbose)
 860 |                     self._print_if_verbose(f"  Dark: {temp['dark'].values[0]:.3f}", verbose)
 861 | 
 862 |         # Process results
 863 |         accuracy_summary = []
 864 |         for i, E in enumerate(E_array):
 865 |             for j, tau in enumerate(tau_array):
 866 |                 row_data = {
 867 |                     "E": E,
 868 |                     "tau": tau,
 869 |                     "Total": tests_total[i, j],
 870 |                     "of which Positive": tests_posi[i, j],
 871 |                     "of which Negative": tests_nega[i, j],
 872 |                     "of which Dark": tests_dark[i, j],
 873 |                 }
 874 |                 accuracy_summary.append(row_data)
 875 | 
 876 |         # Create final DataFrame without custom index
 877 |         accuracy_df = pd.DataFrame(accuracy_summary)
 878 | 
 879 |         end_time = time.time()
 880 |         time_taken = end_time - start_time
 881 |         
 882 |         if verbose:
 883 |             self._print_if_verbose(f"\nParameter search completed in {time_taken:.2f} seconds", verbose)
 884 |             self._print_if_verbose(f"Best parameters found: E={best_params[0]}, tau={best_params[1]}", verbose)
 885 |             self._print_if_verbose(f"Best total score: {best_score:.3f}", verbose)
 886 |             
 887 |             # Additional statistics
 888 |             self._print_if_verbose("\nParameter Search Statistics:", verbose)
 889 |             self._print_if_verbose(f"Mean total score: {np.mean(tests_total):.3f}", verbose)
 890 |             self._print_if_verbose(f"Std total score: {np.std(tests_total):.3f}", verbose)
 891 |             self._print_if_verbose(f"Score range: [{np.min(tests_total):.3f}, {np.max(tests_total):.3f}]", verbose)
 892 | 
 893 |         return accuracy_df
 894 | 
 895 |     def pc_full_details(self,
 896 |                        X: Union[List, np.ndarray, pd.Series],
 897 |                        Y: Union[List, np.ndarray, pd.Series],
 898 |                        E: int,
 899 |                        tau: int,
 900 |                        metric: str = "euclidean",
 901 |                        h: int = 1,
 902 |                        weighted: bool = False,
 903 |                        relative: bool = True,
 904 |                        verbose: bool = None) -> pd.DataFrame:
 905 |         """
 906 |         Pattern Causality Full Details implementation
 907 |         
 908 |         Args:
 909 |             X: Input time series (causal variable)
 910 |             Y: Input time series (affected variable)
 911 |             E: Embedding dimension
 912 |             tau: Time delay
 913 |             metric: Distance metric to use
 914 |             h: Prediction horizon
 915 |             weighted: Whether to use weighted calculations
 916 |             relative: Whether to use relative differences (default: False for absolute)
 917 |             verbose: Override class-level verbose setting
 918 |             
 919 |         Returns:
 920 |             pd.DataFrame: Detailed causality metrics for each time point
 921 |                          Columns: ['No Causality', 'Positive Causality', 
 922 |                                  'Negative Causality', 'Dark Causality']
 923 |                          Each row represents a time point. For weighted=True,
 924 |                          values are erf calculation results. For weighted=False,
 925 |                          exactly one column will have value 1 and others 0.
 926 |                          Points outside the valid range will be NaN.
 927 |         """
 928 |         verbose = self.verbose if verbose is None else verbose
 929 |         start_time = time.time()
 930 |         
 931 |         X, Y = self._validate_input(X, Y)
 932 |         
 933 |         if verbose:
 934 |             stats = self._calculate_basic_stats(X, Y)
 935 |             self._print_if_verbose(f"\nInput Statistics:", verbose)
 936 |             self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose)
 937 |             self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose)
 938 |             self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose)
 939 |             self._print_if_verbose(f"Series length: {stats['X_length']}", verbose)
 940 |         
 941 |         # Initialize constants
 942 |         NNSPAN = E + 1
 943 |         CCSPAN = (E - 1) * tau
 944 |         hashedpatterns = patternhashing(E)
 945 |         
 946 |         if hashedpatterns is None or len(hashedpatterns) == 0:
 947 |             raise ValueError(f"Failed to generate hash patterns for E={E}")
 948 |             
 949 |         if verbose:
 950 |             self._print_if_verbose(f"\nInitializing computation with E={E}, tau={tau}, h={h}", verbose)
 951 |         
 952 |         # Calculate shadow attractors
 953 |         Mx = statespace(X, E, tau)
 954 |         My = statespace(Y, E, tau)
 955 |         SMx = signaturespace(Mx, E, relative=relative)
 956 |         SMy = signaturespace(My, E, relative=relative)
 957 |         PSMx = patternspace(SMx, E)
 958 |         PSMy = patternspace(SMy, E)
 959 |         Dx = distancematrix(Mx, metric=metric)
 960 |         Dy = distancematrix(My, metric=metric)
 961 |         
 962 |         # Check time series length
 963 |         FCP = fcp(E, tau, h, X)
 964 |         al_loop_dur = range(FCP - 1, len(X) - (E - 1) * tau - h + 1)
 965 |         total_steps = len(al_loop_dur)
 966 |         
 967 |         if verbose:
 968 |             self._print_if_verbose(f"\nProcessing {total_steps} time points...", verbose)
 969 |         
 970 |         # Initialize causality matrix
 971 |         predictedPCMatrix = databank("array", [3 ** (E - 1), 3 ** (E - 1), len(Y)])
 972 |         real_loop = None
 973 |         processed_points = 0
 974 |         valid_points = 0
 975 |         
 976 |         # Main computation loop
 977 |         for i in al_loop_dur:
 978 |             processed_points += 1
 979 |             if verbose and processed_points % max(1, total_steps // 10) == 0:
 980 |                 self._print_if_verbose(f"Progress: {processed_points}/{total_steps} points processed ({(processed_points/total_steps)*100:.1f}%)", verbose)
 981 |             
 982 |             if i + h >= len(My):
 983 |                 continue
 984 |                 
 985 |             if not np.any(np.isnan(Mx[i, :])) and not np.any(np.isnan(My[i + h, :])):
 986 |                 NNx = pastNNs(CCSPAN, NNSPAN, Mx, Dx, SMx, PSMx, i, h)
 987 |                 
 988 |                 if NNx is not None and not np.any(np.isnan(NNx["dists"])):
 989 |                     if not np.any(np.isnan(Dy[i, NNx["times"] + h])):
 990 |                         valid_points += 1
 991 |                         if real_loop is None:
 992 |                             real_loop = i
 993 |                         else:
 994 |                             real_loop = np.append(real_loop, i)
 995 |                             
 996 |                         projNNy = projectedNNs(My, Dy, SMy, PSMy, NNx["times"], i, h)
 997 |                         predicted_result = predictionY(E=E, projNNy=projNNy, zeroTolerance=E-1)
 998 |                         
 999 |                         # Get patterns and signatures
1000 |                         predictedSignatureY = predicted_result["predictedSignatureY"]
1001 |                         predictedPatternY = predicted_result["predictedPatternY"]
1002 |                         signatureX = SMx[i, :]
1003 |                         patternX = PSMx[i]
1004 |                         realSignatureY = SMy[i + h, :]
1005 |                         realPatternY = PSMy[i + h]
1006 |                         
1007 |                         # Calculate PC matrix values
1008 |                         pc = fillPCMatrix(
1009 |                             weighted=weighted,
1010 |                             predictedPatternY=predictedPatternY,
1011 |                             realPatternY=realPatternY,
1012 |                             predictedSignatureY=predictedSignatureY,
1013 |                             realSignatureY=realSignatureY,
1014 |                             patternX=patternX,
1015 |                             signatureX=signatureX
1016 |                         )
1017 |                         
1018 |                         # Find pattern indices
1019 |                         tolerance = 1e-10
1020 |                         hashedpatterns = np.array(hashedpatterns, dtype=np.float64)
1021 |                         patternX_val = np.float64(patternX.item())
1022 |                         predictedPatternY_val = np.float64(predictedPatternY)
1023 |                         
1024 |                         patternX_matches = np.where(np.abs(hashedpatterns - patternX_val) < tolerance)[0]
1025 |                         predictedPatternY_matches = np.where(np.abs(hashedpatterns - predictedPatternY_val) < tolerance)[0]
1026 |                         
1027 |                         if len(patternX_matches) > 0 and len(predictedPatternY_matches) > 0:
1028 |                             patternX_idx = patternX_matches[0]
1029 |                             predictedPatternY_idx = predictedPatternY_matches[0]
1030 |                             predictedPCMatrix[patternX_idx, predictedPatternY_idx, i] = pc["predicted"]
1031 |         
1032 |         # Print final progress update
1033 |         if verbose:
1034 |             self._print_if_verbose(f"Progress: {total_steps}/{total_steps} points processed (100.0%)\n", verbose)
1035 |         
1036 |         # Calculate causality metrics
1037 |         causality = natureOfCausality(predictedPCMatrix, real_loop, hashedpatterns, X, weighted)
1038 |         
1039 |         # Create DataFrame with NaN values
1040 |         result_df = pd.DataFrame(
1041 |             np.full((len(X), 4), np.nan),
1042 |             columns=['No Causality', 'Positive Causality', 'Negative Causality', 'Dark Causality']
1043 |         )
1044 |         
1045 |         # Calculate valid range
1046 |         start_idx = FCP - 1
1047 |         end_idx = len(X) - (E - 1) * tau - h
1048 |         
1049 |         # Fill in causality values for the valid range
1050 |         if weighted:
1051 |             # For weighted=True, use the actual values from causality
1052 |             for i in range(len(X)):
1053 |                 if i in real_loop:
1054 |                     result_df.loc[i, 'No Causality'] = causality['noCausality'][i]
1055 |                     result_df.loc[i, 'Positive Causality'] = causality['Positive'][i]
1056 |                     result_df.loc[i, 'Negative Causality'] = causality['Negative'][i]
1057 |                     result_df.loc[i, 'Dark Causality'] = causality['Dark'][i]
1058 |         else:
1059 |             # For weighted=False, use binary values (0 or 1)
1060 |             for i in range(len(X)):
1061 |                 if i in real_loop:
1062 |                     if causality['noCausality'][i] == 1:
1063 |                         result_df.loc[i, 'No Causality'] = 1
1064 |                         result_df.loc[i, ['Positive Causality', 'Negative Causality', 'Dark Causality']] = 0
1065 |                     elif causality['Positive'][i] == 1:
1066 |                         result_df.loc[i, 'Positive Causality'] = 1
1067 |                         result_df.loc[i, ['No Causality', 'Negative Causality', 'Dark Causality']] = 0
1068 |                     elif causality['Negative'][i] == 1:
1069 |                         result_df.loc[i, 'Negative Causality'] = 1
1070 |                         result_df.loc[i, ['No Causality', 'Positive Causality', 'Dark Causality']] = 0
1071 |                     elif causality['Dark'][i] == 1:
1072 |                         result_df.loc[i, 'Dark Causality'] = 1
1073 |                         result_df.loc[i, ['No Causality', 'Positive Causality', 'Negative Causality']] = 0
1074 |         
1075 |         # Add summary row (counting only non-NaN values)
1076 |         summary = pd.DataFrame({
1077 |             'No Causality': [np.sum(~np.isnan(result_df['No Causality']) & (result_df['No Causality'] > 0))],
1078 |             'Positive Causality': [np.sum(~np.isnan(result_df['Positive Causality']) & (result_df['Positive Causality'] > 0))],
1079 |             'Negative Causality': [np.sum(~np.isnan(result_df['Negative Causality']) & (result_df['Negative Causality'] > 0))],
1080 |             'Dark Causality': [np.sum(~np.isnan(result_df['Dark Causality']) & (result_df['Dark Causality'] > 0))]
1081 |         })
1082 |         
1083 |         result_df = pd.concat([result_df, summary])
1084 |         
1085 |         if verbose:
1086 |             end_time = time.time()
1087 |             self._print_if_verbose(f"\nComputation completed in {end_time - start_time:.2f} seconds", verbose)
1088 |             self._print_if_verbose("\nCausality Summary:", verbose)
1089 |             self._print_if_verbose(f"Valid range: points {start_idx} to {end_idx}", verbose)
1090 |             self._print_if_verbose(f"No Causality Points: {int(summary['No Causality'].values[0])}", verbose)
1091 |             self._print_if_verbose(f"Positive Causality Points: {int(summary['Positive Causality'].values[0])}", verbose)
1092 |             self._print_if_verbose(f"Negative Causality Points: {int(summary['Negative Causality'].values[0])}", verbose)
1093 |             self._print_if_verbose(f"Dark Causality Points: {int(summary['Dark Causality'].values[0])}", verbose)
1094 |         
1095 |         return result_df
1096 | 
1097 |     def pc_cross_validation(self,
1098 |                           X: Union[List, np.ndarray, pd.Series],
1099 |                           Y: Union[List, np.ndarray, pd.Series],
1100 |                           E: int,
1101 |                           tau: int,
1102 |                           metric: str,
1103 |                           h: int,
1104 |                           weighted: bool,
1105 |                           numberset: Sequence[int],
1106 |                           relative: bool = True,
1107 |                           verbose: bool = None) -> pd.DataFrame:
1108 |         """
1109 |         Perform cross validation for pattern causality analysis
1110 |         
1111 |         Args:
1112 |             X: Input time series (causal variable)
1113 |             Y: Input time series (affected variable)
1114 |             E: Embedding dimension
1115 |             tau: Time delay
1116 |             metric: Distance metric to use
1117 |             h: Prediction horizon
1118 |             weighted: Whether to use weighted calculations
1119 |             numberset: Sequence of sample sizes to test
1120 |             relative: Whether to use relative differences (default: False for absolute)
1121 |             verbose: Override class-level verbose setting
1122 |             
1123 |         Returns:
1124 |             DataFrame containing cross validation results
1125 |         """
1126 |         verbose = self.verbose if verbose is None else verbose
1127 |         start_time = time.time()
1128 |         
1129 |         if not isinstance(numberset, (list, tuple, np.ndarray)):
1130 |             raise TypeError("Please enter the vector of the sample number.")
1131 |             
1132 |         X, Y = self._validate_input(X, Y)
1133 |         X = np.array(X)
1134 |         Y = np.array(Y)
1135 |         
1136 |         if max(numberset) > len(X):
1137 |             raise ValueError("The sample number is larger than the dataset.")
1138 |             
1139 |         if verbose:
1140 |             stats = self._calculate_basic_stats(X, Y)
1141 |             self._print_if_verbose(f"\nCross Validation Setup:", verbose)
1142 |             self._print_if_verbose(f"Total data points: {len(X)}", verbose)
1143 |             self._print_if_verbose(f"Sample sizes to test: {numberset}", verbose)
1144 |             self._print_if_verbose(f"Parameters: E={E}, tau={tau}, h={h}", verbose)
1145 |             self._print_if_verbose(f"\nInput Statistics:", verbose)
1146 |             self._print_if_verbose(f"X: mean={stats['X_mean']:.3f}, std={stats['X_std']:.3f}", verbose)
1147 |             self._print_if_verbose(f"Y: mean={stats['Y_mean']:.3f}, std={stats['Y_std']:.3f}", verbose)
1148 |             self._print_if_verbose(f"Correlation: {stats['correlation']:.3f}", verbose)
1149 |             
1150 |         numbers = np.sort(numberset)
1151 |         positive = databank("vector", [len(numberset)])
1152 |         negative = databank("vector", [len(numberset)])
1153 |         dark = databank("vector", [len(numberset)])
1154 |         
1155 |         total_samples = len(numbers)
1156 |         
1157 |         for i, n in enumerate(numbers):
1158 |             if verbose:
1159 |                 self._print_if_verbose(f"\nProcessing sample size {n} ({i+1}/{total_samples})", verbose)
1160 |                 
1161 |             sample_indices = np.random.choice(len(X), size=n, replace=False)
1162 |             sample_x = X[sample_indices]
1163 |             sample_y = Y[sample_indices]
1164 |             
1165 |             results = self.pc_lightweight(
1166 |                 X=sample_x,
1167 |                 Y=sample_y,
1168 |                 E=E,
1169 |                 tau=tau,
1170 |                 metric=metric,
1171 |                 h=h,
1172 |                 weighted=weighted,
1173 |                 relative=relative,
1174 |                 verbose=False  # Suppress verbose output for individual calculations
1175 |             )
1176 |             
1177 |             positive[i] = results["Positive Causality"].values[0]
1178 |             negative[i] = results["Negative Causality"].values[0]
1179 |             dark[i] = results["Dark Causality"].values[0]
1180 |             
1181 |             if verbose:
1182 |                 self._print_if_verbose(f"Results for n={n}:", verbose)
1183 |                 self._print_if_verbose(f"  Positive: {positive[i]:.3f}", verbose)
1184 |                 self._print_if_verbose(f"  Negative: {negative[i]:.3f}", verbose)
1185 |                 self._print_if_verbose(f"  Dark: {dark[i]:.3f}", verbose)
1186 |         
1187 |         results_df = pd.DataFrame({
1188 |             "positive": positive,
1189 |             "negative": negative,
1190 |             "dark": dark
1191 |         }, index=numbers)
1192 |         
1193 |         if verbose:
1194 |             end_time = time.time()
1195 |             self._print_if_verbose(f"\nCross validation completed in {end_time - start_time:.2f} seconds", verbose)
1196 |             self._print_if_verbose("\nSummary Statistics:", verbose)
1197 |             self._print_if_verbose("Positive Causality:", verbose)
1198 |             self._print_if_verbose(f"  Mean: {np.mean(positive):.3f}", verbose)
1199 |             self._print_if_verbose(f"  Std: {np.std(positive):.3f}", verbose)
1200 |             self._print_if_verbose("Negative Causality:", verbose)
1201 |             self._print_if_verbose(f"  Mean: {np.mean(negative):.3f}", verbose)
1202 |             self._print_if_verbose(f"  Std: {np.std(negative):.3f}", verbose)
1203 |             self._print_if_verbose("Dark Causality:", verbose)
1204 |             self._print_if_verbose(f"  Mean: {np.mean(dark):.3f}", verbose)
1205 |             self._print_if_verbose(f"  Std: {np.std(dark):.3f}", verbose)
1206 |             
1207 |         return results_df
1208 | 
1209 |     @staticmethod
1210 |     def load_data(file_path: str, sep: str = ",", header: Union[int, None] = 0) -> pd.DataFrame:
1211 |         """Load data from a file into a pandas DataFrame.
1212 |         
1213 |         Args:
1214 |             file_path: Path to the data file
1215 |             sep: Separator used in the file (default: ",")
1216 |             header: Row number to use as column names (default: 0)
1217 |                    Use None if there is no header
1218 |             
1219 |         Returns:
1220 |             pd.DataFrame: Loaded data
1221 |             
1222 |         Raises:
1223 |             FileNotFoundError: If the file does not exist
1224 |             ValueError: If the file cannot be parsed
1225 |         """
1226 |         try:
1227 |             data = pd.read_csv(file_path, sep=sep, header=header)
1228 |             return data
1229 |         except FileNotFoundError:
1230 |             raise FileNotFoundError(f"Data file not found: {file_path}")
1231 |         except Exception as e:
1232 |             raise ValueError(f"Error loading data: {str(e)}")
1233 | 
1234 |     @staticmethod
1235 |     def get_supported_metrics() -> List[str]:
1236 |         """Return list of supported distance metrics
1237 |         
1238 |         Returns:
1239 |             List of supported metric names
1240 |         """
1241 |         return ["euclidean", "manhattan", "chebyshev", "minkowski"]
1242 |         
1243 |     def get_parameter_ranges(self) -> Dict[str, Tuple[int, int]]:
1244 |         """Return recommended parameter ranges
1245 |         
1246 |         Returns:
1247 |             Dictionary with parameter names and their recommended ranges
1248 |         """
1249 |         return {
1250 |             "E": (2, 10),  # Embedding dimension
1251 |             "tau": (1, 5), # Time delay
1252 |             "h": (1, 3)    # Prediction horizon
1253 |         }
1254 | 
1255 | 


--------------------------------------------------------------------------------