├── .github
    ├── dependabot.yml
    └── workflows
    │   └── check.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── assets
    └── changepoint_example.png
├── examples
    ├── classification_based_cpd.ipynb
    ├── configs
    │   └── test_config_exp.yml
    └── knn_based_cpd.ipynb
├── pyproject.toml
├── pysatl_cpd
    ├── __init__.py
    ├── analysis
    │   ├── __init__.py
    │   └── results_analyzer.py
    ├── core
    │   ├── __init__.py
    │   ├── algorithms
    │   │   ├── __init__.py
    │   │   ├── abstract_algorithm.py
    │   │   ├── bayesian
    │   │   │   ├── __init__.py
    │   │   │   ├── abstracts
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── idetector.py
    │   │   │   │   ├── ihazard.py
    │   │   │   │   ├── ilikelihood.py
    │   │   │   │   └── ilocalizer.py
    │   │   │   ├── detectors
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── drop.py
    │   │   │   │   └── threshold.py
    │   │   │   ├── hazards
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── constant.py
    │   │   │   ├── likelihoods
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── exponential_conjugate.py
    │   │   │   │   ├── gaussian.py
    │   │   │   │   ├── gaussian_conjugate.py
    │   │   │   │   └── heuristic_gaussian_vs_exponential.py
    │   │   │   └── localizers
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── argmax.py
    │   │   ├── bayesian_algorithm.py
    │   │   ├── bayesian_linear_heuristic.py
    │   │   ├── bayesian_online_algorithm.py
    │   │   ├── classification
    │   │   │   ├── __init__.py
    │   │   │   ├── abstracts
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── iclassifier.py
    │   │   │   │   ├── iquality_metric.py
    │   │   │   │   └── istatistic_test.py
    │   │   │   ├── classifiers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── decision_tree.py
    │   │   │   │   ├── knn.py
    │   │   │   │   ├── logistic_regression.py
    │   │   │   │   ├── rf.py
    │   │   │   │   └── svm.py
    │   │   │   ├── quality_metrics
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── classification
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── accuracy.py
    │   │   │   │   │   ├── f1.py
    │   │   │   │   │   └── mcc.py
    │   │   │   │   └── clustering
    │   │   │   │   │   └── __init__.py
    │   │   │   └── test_statistics
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── threshold_overcome.py
    │   │   ├── classification_algorithm.py
    │   │   ├── density
    │   │   │   ├── __init__.py
    │   │   │   └── abstracts
    │   │   │   │   └── density_based_algorithm.py
    │   │   ├── graph
    │   │   │   ├── __init__.py
    │   │   │   ├── abstracts
    │   │   │   │   ├── ibuilder.py
    │   │   │   │   ├── igraph.py
    │   │   │   │   └── igraph_cpd.py
    │   │   │   ├── builders
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── list.py
    │   │   │   │   └── matrix.py
    │   │   │   ├── graph_cpd.py
    │   │   │   ├── graph_list.py
    │   │   │   └── graph_matrix.py
    │   │   ├── graph_algorithm.py
    │   │   ├── kliep_algorithm.py
    │   │   ├── knn
    │   │   │   ├── __init__.py
    │   │   │   ├── abstracts
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── observation.py
    │   │   │   ├── classifier.py
    │   │   │   ├── graph.py
    │   │   │   └── heap.py
    │   │   ├── knn_algorithm.py
    │   │   ├── online_algorithm.py
    │   │   └── rulsif_algorithm.py
    │   ├── cpd_core.py
    │   ├── online_cpd_core.py
    │   ├── problem.py
    │   └── scrubber
    │   │   ├── __init__.py
    │   │   ├── abstract.py
    │   │   ├── data_providers.py
    │   │   └── linear.py
    ├── cpd_solver.py
    ├── generator
    │   ├── __init__.py
    │   ├── config_parser.py
    │   ├── dataset_description.py
    │   ├── distributions.py
    │   ├── generator.py
    │   └── saver.py
    ├── icpd_solver.py
    ├── labeled_data.py
    └── online_cpd_solver.py
└── tests
    ├── __init__.py
    ├── test_configs
        ├── test_config_1.yml
        └── test_config_exp.yml
    ├── test_core
        ├── __init__.py
        ├── test_algorithms
        │   ├── __init__.py
        │   ├── test_algorithms_utils
        │   │   ├── __init__.py
        │   │   └── bayesian
        │   │   │   ├── __init__.py
        │   │   │   ├── test_detectors_and_localizers.py
        │   │   │   ├── test_hazards.py
        │   │   │   └── test_likelihoods.py
        │   ├── test_bayesian_algorithm.py
        │   ├── test_bayesian_linear_heuristic.py
        │   ├── test_bayesian_online_algorithm.py
        │   ├── test_classification_algorithms.py
        │   └── test_graph_algorithm.py
        ├── test_cpd_core.py
        ├── test_online_cpd_core.py
        └── test_scrubber
        │   ├── __init__.py
        │   ├── test_dataproviders.py
        │   └── test_linear_scrubber.py
    ├── test_generator
        ├── __init__.py
        ├── test_distributions.py
        └── test_generator.py
    ├── test_labeled_data.py
    ├── test_online_solver.py
    └── test_solver.py


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "github-actions"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "monthly"
 7 |     groups:
 8 |       github-actions:
 9 |         patterns:
10 |           - "*"
11 |   - package-ecosystem: "pip"
12 |     directory: "/"
13 |     schedule:
14 |       interval: "weekly"
15 |     commit-message:
16 |       prefix: "deps: "
17 |     groups:
18 |       pip-dependencies:
19 |         patterns:
20 |           - "*"
21 | 


--------------------------------------------------------------------------------
/.github/workflows/check.yaml:
--------------------------------------------------------------------------------
 1 | name: Check code and run tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: [ "3.10", "3.11", "3.12", "3.13" ]
11 |     steps:
12 |       - uses: actions/checkout@v4
13 |       - name: Set up Python ${{ matrix.python-version }}
14 |         uses: actions/setup-python@v5
15 |         with:
16 |          python-version: ${{ matrix.python-version }}
17 |       - name: Install Poetry
18 |         run: |
19 |           pipx install poetry==2.1.0
20 | 
21 |       - name: Install dependencies
22 |         run: |
23 |           poetry install --with dev
24 | 
25 |       - name: Lint with ruff
26 |         run: |
27 |           poetry run ruff check
28 | 
29 |       - name: Check types
30 |         run: |
31 |           poetry run mypy
32 | 
33 |       - name: Run tests
34 |         run: |
35 |          poetry run pytest --cov=pysatl_cpd
36 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | .idea/
163 | /poetry.lock
164 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.5.0
 4 |     hooks:
 5 |       - id: check-yaml
 6 |       - id: end-of-file-fixer
 7 |       - id: trailing-whitespace
 8 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 9 |     rev: v0.9.6
10 |     hooks:
11 |       - id: ruff
12 |         args: [ --fix ]
13 |       - id: ruff-format
14 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # PySATL CPD project contributing guide
 2 | 
 3 | Thank you very much if you have decided to contribute to our project.
 4 | We follow very simple and clear open-source research community accepted guidelines for contributing.
 5 | The guideline instructions divided into sections depending on the part of the project you want to contribute.
 6 | 
 7 | ## Rules for adding commits
 8 | 
 9 | Create a new branch, if you want to add something new.
10 | Recommended naming branch is `<type>/<name of stuff>`.
11 | 
12 | Commits are added according to conventional commits.
13 | Those `<type>(<scope>): <body>`.
14 | 
15 | The `<type>` field must take one of these values:
16 | 
17 | * `feat` to add new functionality
18 | * `fix` to fix a bug in the project
19 | * `refactor` for code refactoring, such as renaming a variable
20 | * `test` to add tests, refactor them
21 | * `struct` for changes related to a change in the structure of the project (BUT NOT CODE), for example, changing
22 |   folder locations
23 | * `ci` for various ci/cd tasks
24 | * `docs` for changes in documentation
25 | * `chore` for changes outside the code, for example, gitignore and reamde updates
26 | 
27 | The `<body>` field contains the gist of the changes in the present imperative in English without the dot in at the end,
28 | the first word is a verb with a small letter.
29 | 
30 | Examples:
31 | 
32 | * Good: "feat: add module for future scrubber implementations"
33 | * Bad: "Added module for future scrubber implementations."
34 | 
35 | ## Source code developers guide
36 | 
37 | 1. Fork this repository using your GitHub account.
38 | 2. Install `git` and clone your forked copy of the `repo`.
39 | 3. Build project following build instructions in [README.md](./README.md) file, make sure everything is ok.
40 | 4. Run tests following instructions in [README.md](./README.md) file, make sure all tests passing.
41 | 5. Implement new feature or fix existing one in the source code.
42 | 6. Commit your changes.
43 | 7. Open a pull-request.
44 | 8. Wait for review from developers of the project.
45 | 9. Fix major and minor issues if presented.
46 | 10. Get your work merged into `main`!
47 | 
48 | ## Rules for collaborators
49 | 
50 | ### Basic Tips
51 | 
52 | 1. Don't use merge, only rebase (to keep a linear commit history)
53 | 2. Do not change other people's branches unless absolutely necessary
54 | 3. Recheck your commit history before creating a pull request
55 | 4. **Check you're on the right branch**, never commit directly in main
56 | 
57 | ### Rules for pull requests
58 | 
59 | **Forbidden** to merge your pull request into the branch yourself.
60 | 
61 | Each pull request must be reviewed by one of the maintainers
62 | 
63 | * Alexey Tatyanenko ([alexdtat](https://github.com/alexdtat))
64 | * Artemii Patov ([artemiipatov](https://github.com/artemiipatov))
65 | * Vladimir Kutuev ([vkutuev](https://github.com/vkutuev))
66 | 
67 | If you click on the green button, then **make sure** that it says `REBASE AND MERGE`!
68 | 
69 | The review takes place in the form of comments to pull requests, discussions in the team chat and personal
70 | communication.
71 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024-present PySATL Contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # PySATL-CPD
  2 | 
  3 | [status-shield]: https://img.shields.io/github/actions/workflow/status/PySATL/pysatl-cpd/.github/workflows/check.yaml?branch=main&event=push&style=for-the-badge&label=Checks
  4 | [status-url]: https://github.com/PySATL/pysatl-cpd/blob/main/.github/workflows/check.yaml
  5 | [license-shield]: https://img.shields.io/github/license/PySATL/pysatl-cpd.svg?style=for-the-badge&color=blue
  6 | [license-url]: LICENSE
  7 | 
  8 | [![Checks][status-shield]][status-url]
  9 | [![MIT License][license-shield]][license-url]
 10 | 
 11 | PySATL **Change point detection** subproject (*abbreviated pysatl-cpd*) is a module, designed for detecting anomalies in time series data, which refer to significant deviations from expected patterns or trends. Anomalies can indicate unusual events or changes in a system, making them crucial for monitoring and analysis in various fields such as finance, healthcare, and network security.
 12 | 
 13 | At the moment, the module implements the following CPD algorithms:
 14 | * Bayesian algorithm (scrubbing, online and linear heuristic online versions)
 15 | * Density based algorithms:
 16 |     * KLIEP
 17 |     * RuLSIF
 18 | * Graph algorithm
 19 | * k-NN based algorithm
 20 | * Algorithms, based on classifiers:
 21 |     * SVM
 22 |     * KNN
 23 |     * Decision Tree
 24 |     * Logistic Regression
 25 |     * Random Forest
 26 | ---
 27 | 
 28 | ## Requirements
 29 | 
 30 | - Python 3.10+
 31 | - Poetry 2.1.0+
 32 | 
 33 | ## Installation
 34 | 
 35 | Clone the repository:
 36 | 
 37 | ```bash
 38 | git clone https://github.com/PySATL/pysatl-cpd
 39 | ```
 40 | 
 41 | Install dependencies:
 42 | 
 43 | ```bash
 44 | poetry install
 45 | ```
 46 | 
 47 | ## Change point detection example:
 48 | 
 49 | ```python
 50 | from pathlib import Path
 51 | 
 52 | from pysatl_cpd.labeled_data import LabeledCpdData
 53 | 
 54 | # import change point detection solver
 55 | from pysatl_cpd.online_cpd_solver import OnlineCpdSolver
 56 | from pysatl_cpd.core.problem import CpdProblem
 57 | 
 58 | # import algorithm
 59 | from pysatl_cpd.core.algorithms.bayesian_online_algorithm import BayesianOnline
 60 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import GaussianConjugate
 61 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
 62 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
 63 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
 64 | 
 65 | 
 66 | labeled_data = LabeledCpdData.generate_cp_datasets(Path("examples/configs/test_config_exp.yml"))["example"]
 67 | 
 68 | # specify CPD algorithm with parameters
 69 | algorithm = BayesianOnline(
 70 |     learning_sample_size=5,
 71 |     likelihood=GaussianConjugate(),
 72 |     hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
 73 |     detector=ThresholdDetector(threshold=0.005),
 74 |     localizer=ArgmaxLocalizer(),
 75 | )
 76 | # make a solver object
 77 | solver = OnlineCpdSolver(CpdProblem(True), algorithm, labeled_data)
 78 | 
 79 | 
 80 | # then run algorithm
 81 | cpd_results = solver.run()
 82 | 
 83 | # print the results
 84 | print(cpd_results)
 85 | # output:
 86 | # Located change points: (200;400)
 87 | # Expected change point: (200;400)
 88 | # Difference: ()
 89 | # Computation time (sec): 0.2
 90 | 
 91 | # visualize data with located changepoints
 92 | cpd_results.visualize()
 93 | ```
 94 | ![example_of_output](assets/changepoint_example.png)
 95 | 
 96 | ## Development
 97 | 
 98 | Install requirements
 99 | 
100 | ```bash
101 | poetry install --with dev
102 | ```
103 | 
104 | ## Pre-commit
105 | 
106 | Install pre-commit hooks:
107 | 
108 | ```shell
109 | poetry run pre-commit install
110 | ```
111 | 
112 | Starting manually:
113 | 
114 | ```shell
115 | poetry run pre-commit run --all-files --color always --verbose --show-diff-on-failure
116 | ```
117 | 
118 | ## License
119 | 
120 | This project is licensed under the terms of the **MIT** license. See the [LICENSE](LICENSE) for more information.
121 | 


--------------------------------------------------------------------------------
/assets/changepoint_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/assets/changepoint_example.png


--------------------------------------------------------------------------------
/examples/configs/test_config_exp.yml:
--------------------------------------------------------------------------------
 1 | - name: example
 2 |   distributions:
 3 |     - type: exponential
 4 |       length: 200
 5 |       parameters:
 6 |         rate: 2.0
 7 |     - type: beta
 8 |       length: 200
 9 |       parameters:
10 |         alpha:  1.0
11 |         beta:  5.0
12 |     - type: uniform
13 |       length: 200
14 |       parameters:
15 |         min: 0
16 |         max: 0.5
17 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pysatl_cpd"
 3 | version = "0.1.0"
 4 | description = "Batch module for changepoint detection"
 5 | authors = [
 6 |     "Temerlan Akhmetov <Temerlan.axmetov.75@mail.ru>",
 7 |     "Alexey Tatyanenko <alexdtat@gmail.com>",
 8 |     "Artemii Patov <patov.988@gmail.com>",
 9 |     "Vladimir Kutuev <vladimir.kutuev@gmail.com>",
10 |     "Aleksei Ivanov <AXSoran47@yandex.ru>",
11 |     "Artem Romanyuk <wrdxwrdxwrdx@gmail.com>",
12 |     "Aleksandra Listkova <rsobaken@yandex.ru>",
13 | ]
14 | license = "MIT"
15 | readme = "README.md"
16 | repository = "https://github.com/PySATL/pysatl-cpd"
17 | 
18 | 
19 | [tool.poetry.dependencies]
20 | python = "^3.10"
21 | numpy = "^2.0.0"
22 | scipy = "^1.14.0"
23 | matplotlib = "^3.9.1"
24 | scikit-learn = "^1.5.2"
25 | PyQt5 = "^5.15.11"
26 | 
27 | [tool.poetry.group.dev.dependencies]
28 | pytest = "^8.2.2"
29 | mypy = "^1.10.1"
30 | ruff = "^0.11.2"
31 | pre-commit = "^4.1.0"
32 | pyyaml = "^6.0.1"
33 | matplotlib = "^3.9.1"
34 | ipykernel = "^6.29.5"
35 | hypothesis = "^6.122.1"
36 | scipy-stubs = "^1.15.2"
37 | types-pyyaml = "^6.0.12"
38 | microsoft-python-type-stubs = {git = "https://github.com/microsoft/python-type-stubs.git"}
39 | pytest-cov = "^6.0.0"
40 | 
41 | 
42 | [tool.ruff]
43 | line-length = 120
44 | indent-width = 4
45 | respect-gitignore = true
46 | exclude = ["*.ipynb"]
47 | 
48 | [tool.ruff.format]
49 | quote-style = "double"
50 | indent-style = "space"
51 | docstring-code-format = true
52 | skip-magic-trailing-comma = false
53 | line-ending = "auto"
54 | 
55 | [tool.ruff.lint]
56 | select = ["A", "E", "F", "I", "PL", "RUF", "SIM", "UP", "W"]
57 | ignore = ["PLR0913"]
58 | 
59 | [tool.mypy]
60 | files = "pysatl_cpd"
61 | mypy_path = "pysatl_cpd"
62 | strict = true
63 | 
64 | 
65 | [build-system]
66 | requires = ["poetry-core"]
67 | build-backend = "poetry.core.masonry.api"
68 | 


--------------------------------------------------------------------------------
/pysatl_cpd/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/analysis/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/analysis/results_analyzer.py:
--------------------------------------------------------------------------------
 1 | class CpdResultsAnalyzer:
 2 |     """Class for counting confusion matrix and other metrics on CPD results"""
 3 | 
 4 |     @staticmethod
 5 |     def count_confusion_matrix(
 6 |         predicted: list[int], actual: list[int], window: tuple[int, int] | None = None
 7 |     ) -> tuple[int, int, int, int]:
 8 |         """static method for counting confusion matrix for hypothesis of equality of change points on a window
 9 | 
10 |         :param: predicted: first array or list of change points, determined as prediction
11 |         :param: actual: second array or list of change points, determined as actual
12 |         :param: window: tuple of two indices (start, stop), determines a window for hypothesis
13 | 
14 |         :return: tuple of integers (true-positive, true-negative, false-positive, false-negative)
15 |         """
16 |         if not predicted and not actual:
17 |             raise ValueError("no results and no predictions")
18 |         if window is None:
19 |             window = (min(predicted + actual), max(predicted + actual))
20 |         predicted_set = set(predicted)
21 |         actual_set = set(actual)
22 |         tp = tn = fp = fn = 0
23 |         for i in range(window[0], window[1]):
24 |             if i in predicted_set:
25 |                 if i in actual_set:
26 |                     tp += 1
27 |                     continue
28 |                 fp += 1
29 |             elif i in actual_set:
30 |                 fn += 1
31 |                 continue
32 |             tn += 1
33 |         return tp, tn, fp, fn
34 | 
35 |     @staticmethod
36 |     def count_accuracy(predicted: list[int], actual: list[int], window: tuple[int, int] | None = None) -> float:
37 |         """static method for counting accuracy metric for hypothesis of equality of change points on a window
38 | 
39 |         :param: predicted: first array or list of change points, determined as prediction
40 |         :param: actual: second array or list of change points, determined as actual
41 |         :param: window: tuple of two indices (start, stop), determines a window for hypothesis
42 | 
43 |         :return: float, accuracy metric
44 |         """
45 |         tp, tn, fp, fn = CpdResultsAnalyzer.count_confusion_matrix(predicted, actual, window)
46 |         if tp + tn == 0:
47 |             return 0.0
48 |         return (tp + tn) / (tp + tn + fp + fn)
49 | 
50 |     @staticmethod
51 |     def count_precision(predicted: list[int], actual: list[int], window: tuple[int, int] | None = None) -> float:
52 |         """static method for counting precision metric for hypothesis of equality of change points on a window
53 | 
54 |         :param: predicted: first array or list of change points, determined as prediction
55 |         :param: actual: second array or list of change points, determined as actual
56 |         :param: window: tuple of two indices (start, stop), determines a window for hypothesis
57 | 
58 |         :return: float, precision metric
59 |         """
60 |         tp, _, fp, _ = CpdResultsAnalyzer.count_confusion_matrix(predicted, actual, window)
61 |         if tp == 0:
62 |             return 0.0
63 |         return tp / (tp + fp)
64 | 
65 |     @staticmethod
66 |     def count_recall(predicted: list[int], actual: list[int], window: tuple[int, int] | None = None) -> float:
67 |         """static method for counting recall metric for hypothesis of equality of change points on a window
68 | 
69 |         :param: predicted: first array or list of change points, determined as prediction
70 |         :param: actual: second array or list of change points, determined as actual
71 |         :param: window: tuple of two indices (start, stop), determines a window for hypothesis
72 | 
73 |         :return: float, recall metric
74 |         """
75 |         tp, _, _, fn = CpdResultsAnalyzer.count_confusion_matrix(predicted, actual, window)
76 |         if tp == 0:
77 |             return 0
78 |         return tp / (tp + fn)
79 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/abstract_algorithm.py:
--------------------------------------------------------------------------------
 1 | from typing import Protocol
 2 | 
 3 | import numpy as np
 4 | import numpy.typing as npt
 5 | 
 6 | 
 7 | class Algorithm(Protocol):
 8 |     """Protocol for change point detection algorithms' interface"""
 9 | 
10 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
11 |         """Function for finding change points in window
12 | 
13 |         :param window: part of global data for finding change points
14 |         :return: the number of change points in the window
15 |         """
16 |         ...
17 | 
18 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
19 |         """Function for finding coordinates of change points in window
20 | 
21 |         :param window: part of global data for finding change points
22 |         :return: list of window change points
23 |         """
24 |         ...
25 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for Bayesian CPD algorithm's customization blocks.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/abstracts/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for abstract base classes for Bayesian CPD algorithm.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/abstracts/idetector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Bayesian CPD algorithm detector's abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | 
10 | from typing import Protocol
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | 
16 | class IDetector(Protocol):
17 |     """
18 |     Protocol for detectors that detect a change point with given growth probabilities for run lengths.
19 |     """
20 | 
21 |     def detect(self, growth_probs: npt.NDArray[np.float64]) -> bool:
22 |         """
23 |         Checks whether a changepoint occurred with given growth probabilities at the time.
24 |         :param growth_probs: growth probabilities for run lengths at the time.
25 |         :return: boolean indicating whether a changepoint occurred
26 |         """
27 |         ...
28 | 
29 |     def clear(self) -> None:
30 |         """
31 |         Clears the detector's state.
32 |         """
33 |         ...
34 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/abstracts/ihazard.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Bayesian CPD algorithm hazard function's abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | 
10 | from typing import Protocol
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | 
16 | class IHazard(Protocol):
17 |     """
18 |     Hazard function protocol.
19 |     """
20 | 
21 |     def hazard(self, run_lengths: npt.NDArray[np.intp]) -> npt.NDArray[np.float64]:
22 |         """
23 |         Calculates the hazard function for given run lengths.
24 |         :param run_lengths: run lengths at the time.
25 |         :return: hazard function's values for given run lengths.
26 |         """
27 |         ...
28 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/abstracts/ilikelihood.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Bayesian CPD algorithm likelihood function's abstract base class and its' extension for a sample's
 3 | probability evaluation with estimated prior parameters.
 4 | """
 5 | 
 6 | __author__ = "Alexey Tatyanenko"
 7 | __copyright__ = "Copyright (c) 2025 PySATL project"
 8 | __license__ = "SPDX-License-Identifier: MIT"
 9 | 
10 | 
11 | from typing import Protocol
12 | 
13 | import numpy as np
14 | import numpy.typing as npt
15 | 
16 | 
17 | class ILikelihood(Protocol):
18 |     """
19 |     Likelihood function's protocol.
20 |     """
21 | 
22 |     def learn(self, learning_sample: npt.NDArray[np.float64]) -> None:
23 |         """
24 |         Learns first parameters of a likelihood function on a given sample.
25 |         :param learning_sample: a sample for parameter learning.
26 |         """
27 |         ...
28 | 
29 |     def predict(self, observation: np.float64) -> npt.NDArray[np.float64]:
30 |         """
31 |         Returns predictive probabilities for a given observation based on stored parameters.
32 |         :param observation: an observation from a sample.
33 |         :return: predictive probabilities for a given observation.
34 |         """
35 |         ...
36 | 
37 |     def update(self, observation: np.float64) -> None:
38 |         """
39 |         Updates parameters of a likelihood function according to the given observation.
40 |         :param observation: an observation from a sample.
41 |         """
42 |         ...
43 | 
44 |     def clear(self) -> None:
45 |         """
46 |         Clears likelihood function's state.
47 |         """
48 |         ...
49 | 
50 | 
51 | class ILikelihoodWithPriorProbability(ILikelihood, Protocol):
52 |     """
53 |     Likelihood which also allows to evaluate how probable is learning sample with learned prior parameters.
54 |     """
55 | 
56 |     def probability_of_learned_prior(self, sample: npt.NDArray[np.float64]) -> np.float64:
57 |         """
58 |         Evaluation of how probable is learning sample with learned prior parameters.
59 |         :param sample: a sample for the likelihood.
60 |         :return: probability of getting a learning sample with learned prior parameters.
61 |         """
62 |         ...
63 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/abstracts/ilocalizer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Bayesian CPD algorithm localizer's abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | 
10 | from typing import Protocol
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | 
16 | class ILocalizer(Protocol):
17 |     """
18 |     Protocol for localizers that localize a change point with given growth probabilities for run lengths.
19 |     """
20 | 
21 |     def localize(self, growth_probs: npt.NDArray[np.float64]) -> int:
22 |         """
23 |         Localizes a change point with given growth probabilities for run lengths.
24 |         :param growth_probs: growth probabilities for run lengths at the time.
25 |         :return: run length corresponding with a change point.
26 |         """
27 |         ...
28 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for implementations of Bayesian CPD algorithm detectors.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/detectors/drop.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Bayesian CPD algorithm detector analyzing drop of maximal run length's probability.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from typing import Optional
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | from pysatl_cpd.core.algorithms.bayesian.abstracts.idetector import IDetector
15 | 
16 | 
17 | class DropDetector(IDetector):
18 |     """
19 |     A detector that detects a change point if the instantaneous drop in the probability of the maximum run length
20 |     exceeds the threshold.
21 |     """
22 | 
23 |     def __init__(self, threshold: float):
24 |         """
25 |         Initializes the detector with given drop threshold.
26 |         :param threshold: threshold for a drop of the maximum run length's probability.
27 |         """
28 |         self.__previous_growth_prob: Optional[float] = None
29 | 
30 |         self._threshold = threshold
31 |         assert 0.0 <= self._threshold <= 1.0, "Drop threshold must be in [0.0, 1.0]"
32 | 
33 |     def detect(self, growth_probs: npt.NDArray[np.float64]) -> bool:
34 |         """
35 |         Checks whether a changepoint occurred with given growth probabilities at the time.
36 |         :param growth_probs: growth probabilities for run lengths at the time.
37 |         :return: boolean indicating whether a changepoint occurred.
38 |         """
39 |         if len(growth_probs) == 0:
40 |             return False
41 | 
42 |         last_growth_prob = growth_probs[-1]
43 |         if self.__previous_growth_prob is None:
44 |             self.__previous_growth_prob = last_growth_prob
45 |             return False
46 | 
47 |         drop = float(self.__previous_growth_prob - last_growth_prob)
48 | 
49 |         return drop >= self._threshold
50 | 
51 |     def clear(self) -> None:
52 |         """
53 |         Clears the detector's state.
54 |         """
55 |         self.__previous_growth_prob = None
56 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/detectors/threshold.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Bayesian CPD algorithm detector comparing maximal run length's probability with
 3 | a threshold.
 4 | """
 5 | 
 6 | __author__ = "Alexey Tatyanenko"
 7 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 8 | __license__ = "SPDX-License-Identifier: MIT"
 9 | 
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | from pysatl_cpd.core.algorithms.bayesian.abstracts.idetector import IDetector
15 | 
16 | 
17 | class ThresholdDetector(IDetector):
18 |     """
19 |     A detector that detects a change point if the probability of the maximum run length drops below the threshold.
20 |     """
21 | 
22 |     def __init__(self, threshold: float):
23 |         """
24 |         Detects a change point if the probability of the maximum run length drops below the threshold.
25 |         :param threshold: lower threshold for the maximum run length's probability.
26 |         """
27 |         self._threshold = threshold
28 |         assert 0.0 <= self._threshold <= 1.0, "Threshold must be in [0.0, 1.0]"
29 | 
30 |     def detect(self, growth_probs: npt.NDArray[np.float64]) -> bool:
31 |         """
32 |         Detects a change point if the probability of the maximum run length drops below the threshold.
33 |         :param growth_probs: growth probabilities for run lengths at the time.
34 |         :return: boolean indicating whether a changepoint occurred.
35 |         """
36 |         return len(growth_probs) > 0 and growth_probs[-1] < self._threshold
37 | 
38 |     def clear(self) -> None:
39 |         """
40 |         Clears the detector's state (for this detector it does nothing).
41 |         """
42 |         pass
43 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/hazards/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for implementations of Bayesian CPD algorithm hazard functions.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/hazards/constant.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Bayesian CPD algorithm constant hazard function corresponding to an exponential
 3 | distribution.
 4 | """
 5 | 
 6 | __author__ = "Alexey Tatyanenko"
 7 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 8 | __license__ = "SPDX-License-Identifier: MIT"
 9 | 
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | from pysatl_cpd.core.algorithms.bayesian.abstracts.ihazard import IHazard
15 | 
16 | 
17 | class ConstantHazard(IHazard):
18 |     """
19 |     A constant hazard function, corresponding to an exponential distribution with a given rate.
20 |     """
21 | 
22 |     def __init__(self, rate: float):
23 |         """
24 |         Initializes the constant hazard function with a given rate of an underlying exponential distribution.
25 |         :param rate: rate of an underlying exponential distribution.
26 |         """
27 |         self._rate = np.float64(rate)
28 |         assert self._rate >= 1.0, "Hazard rate cannot be less than 1.0"
29 | 
30 |     def hazard(self, run_lengths: npt.NDArray[np.intp]) -> npt.NDArray[np.float64]:
31 |         """
32 |         Calculates the constant hazard function.
33 |         :param run_lengths: run lengths at the time.
34 |         :return: hazard function's values for given run lengths.
35 |         """
36 |         return np.ones(len(run_lengths)) / self._rate
37 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/likelihoods/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for implementations of Bayesian CPD algorithm likelihood functions.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/likelihoods/exponential_conjugate.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for exponential likelihood function with gamma prior used in Bayesian change point detection. Also contains its'
  3 | extension for a sample's probability evaluation with estimated prior parameters.
  4 | """
  5 | 
  6 | __author__ = "Alexey Tatyanenko"
  7 | __copyright__ = "Copyright (c) 2025 PySATL project"
  8 | __license__ = "SPDX-License-Identifier: MIT"
  9 | 
 10 | from typing import Optional
 11 | 
 12 | import numpy as np
 13 | import scipy.stats
 14 | from numpy import typing as npt
 15 | 
 16 | from pysatl_cpd.core.algorithms.bayesian.abstracts.ilikelihood import ILikelihood, ILikelihoodWithPriorProbability
 17 | 
 18 | 
 19 | class ExponentialConjugate(ILikelihood):
 20 |     """
 21 |     Class implementing exponential likelihood function with conjugate gamma prior for Bayesian change point detection.
 22 |     Note: it's support is [0; +inf)
 23 |     """
 24 | 
 25 |     def __init__(self) -> None:
 26 |         self._shape_prior: Optional[np.float64] = None
 27 |         self._scale_prior: Optional[np.float64] = None
 28 | 
 29 |         self.__shapes: npt.NDArray[np.float64] = np.array([])
 30 |         self.__scales: npt.NDArray[np.float64] = np.array([])
 31 | 
 32 |     def learn(self, learning_sample: npt.NDArray[np.float64]) -> None:
 33 |         """
 34 |         Learns starting prior parameters to model exponential distribution's likelihood function.
 35 |         :param learning_sample: sample to learn starting prior parameters.
 36 |         :return:
 37 |         """
 38 |         self._shape_prior = np.float64(learning_sample.shape[0])
 39 |         self._scale_prior = np.sum(learning_sample)
 40 | 
 41 |         assert self._shape_prior is not None
 42 |         assert self._scale_prior is not None
 43 | 
 44 |         self.__shapes = np.array([self._shape_prior])
 45 |         self.__scales = np.array([self._scale_prior])
 46 | 
 47 |     def update(self, observation: np.float64) -> None:
 48 |         """
 49 |         Updates parameters (calculating posterior parameters) after a given new observation.
 50 |         :param observation: a new observation of time series.
 51 |         :return:
 52 |         """
 53 |         assert self._shape_prior is not None
 54 |         assert self._scale_prior is not None
 55 | 
 56 |         self.__shapes = np.append([self._shape_prior], (self.__shapes + 1.0))
 57 |         self.__scales = np.append([self._scale_prior], (self.__scales + observation))
 58 | 
 59 |     def predict(self, observation: np.float64) -> npt.NDArray[np.float64]:
 60 |         """
 61 |         Calculates predictive posterior probabilities of exponential likelihood for corresponding values of run length.
 62 |         :param observation: a new observation of time series.
 63 |         :return: an array of predictive posterior probabilities (densities).
 64 |         """
 65 |         assert self._shape_prior is not None
 66 |         assert self._scale_prior is not None
 67 | 
 68 |         predictive_probabilities = scipy.stats.lomax.pdf(
 69 |             x=observation,
 70 |             c=self.__shapes,
 71 |             loc=0.0,
 72 |             scale=self.__scales,
 73 |         )
 74 | 
 75 |         # In case of negative scale parameter corresponding distribution does not exist, so substitution of
 76 |         # an observation results in a NaN-value. In context of algorithm it can be assumed that this probability is 0.
 77 |         without_nans = np.nan_to_num(x=predictive_probabilities, nan=0.0)
 78 | 
 79 |         return np.array(without_nans)
 80 | 
 81 |     def clear(self) -> None:
 82 |         """
 83 |         Clears a current state of the likelihood, setting parameters to default init values.
 84 |         :return:
 85 |         """
 86 |         self._shape_prior = None
 87 |         self._scale_prior = None
 88 | 
 89 |         self.__shapes = np.array([])
 90 |         self.__scales = np.array([])
 91 | 
 92 | 
 93 | class ExponentialConjugateWithPriorProbability(ExponentialConjugate, ILikelihoodWithPriorProbability):
 94 |     """
 95 |     Exponential likelihood, supporting a sample's probability evaluation with estimated prior parameters.
 96 |     """
 97 | 
 98 |     def __init__(self) -> None:
 99 |         super().__init__()
100 | 
101 |     def probability_of_learned_prior(self, sample: npt.NDArray[np.float64]) -> np.float64:
102 |         """
103 |         Evaluates probability of a sample with learned prior parameters of exponential conjugate likelihood.
104 |         :param sample: sample for probability's evaluation.
105 |         :return: probability of a sample with learned prior parameters of exponential conjugate likelihood.
106 |         """
107 |         assert self._shape_prior is not None
108 |         assert self._scale_prior is not None
109 | 
110 |         probabilities_of_learning_sample = scipy.stats.lomax.pdf(
111 |             x=sample,
112 |             c=self._shape_prior,
113 |             loc=0.0,
114 |             scale=self._scale_prior,
115 |         )
116 | 
117 |         without_nans = np.nan_to_num(x=probabilities_of_learning_sample, nan=0.0)
118 | 
119 |         probability_of_learning_sample = np.prod(without_nans)
120 |         return np.float64(probability_of_learning_sample)
121 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/likelihoods/gaussian.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Bayesian CPD algorithm gaussian (normal) likelihood function with mean and standard
 3 | deviation learning.
 4 | """
 5 | 
 6 | __author__ = "Alexey Tatyanenko"
 7 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 8 | __license__ = "SPDX-License-Identifier: MIT"
 9 | 
10 | import numpy as np
11 | import numpy.typing as npt
12 | from scipy import stats
13 | from typing_extensions import deprecated
14 | 
15 | from pysatl_cpd.core.algorithms.bayesian.abstracts.ilikelihood import ILikelihood
16 | 
17 | 
18 | @deprecated("Use GaussianConjugate instead")
19 | class Gaussian(ILikelihood):
20 |     """
21 |     Likelihood for Gaussian (a.k.a. normal) distribution, parametrized by mean and standard deviation.
22 |     """
23 | 
24 |     def __init__(self) -> None:
25 |         """
26 |         Initializes the GaussianLikelihood, parametrized by mean and standard deviation (without any concrete values).
27 |         """
28 |         self.__means = np.array([])
29 |         self.__standard_deviations = np.array([])
30 | 
31 |         self.__sample_sum = 0.0
32 |         self.__squared_sample_sum = 0.0
33 |         self.__gap_size = 0
34 | 
35 |     def __update_parameters_lists(self) -> None:
36 |         """
37 |         Updates the parameters lists based on accumulated sums, assuming we have at least 2 observations.
38 |         """
39 |         assert self.__gap_size > 1
40 |         new_mean = self.__sample_sum / self.__gap_size
41 |         variance = (self.__squared_sample_sum - (self.__sample_sum**2.0) / self.__gap_size) / (self.__gap_size - 1)
42 |         assert variance > 0.0
43 |         assert len(self.__means) == len(self.__standard_deviations)
44 | 
45 |         new_standard_deviation = np.sqrt(variance)
46 | 
47 |         self.__means = np.append(self.__means, new_mean)
48 |         self.__standard_deviations = np.append(self.__standard_deviations, new_standard_deviation)
49 | 
50 |     def learn(self, learning_sample: npt.NDArray[np.float64]) -> None:
51 |         """
52 |         Learns first mean and stander deviations from a given sample.
53 |         :param learning_sample: a sample for parameter learning.
54 |         :return:
55 |         """
56 |         assert len(self.__means) == len(self.__standard_deviations) == 0
57 |         assert self.__gap_size == 0
58 | 
59 |         self.__sample_sum += sum(learning_sample)
60 |         for observation in learning_sample:
61 |             self.__squared_sample_sum += observation**2.0
62 | 
63 |         self.__gap_size = len(learning_sample)
64 |         # self.__squared_sample_sum += sum(learning_sample ** 2.)
65 | 
66 |         self.__update_parameters_lists()
67 | 
68 |     def update(self, observation: np.float64) -> None:
69 |         """
70 |         Updates the means and standard deviations lists according to the given observation.
71 |         :param observation: an observation from a sample.
72 |         :return:
73 |         """
74 |         self.__sample_sum += observation
75 |         self.__squared_sample_sum += observation**2
76 |         self.__gap_size += 1
77 | 
78 |         self.__update_parameters_lists()
79 | 
80 |     def predict(self, observation: np.float64) -> npt.NDArray[np.float64]:
81 |         """
82 |         Returns predictive probabilities for a given observation based on stored means and standard deviations.
83 |         :param observation: an observation from a sample.
84 |         :return: predictive probabilities for a given observation.
85 |         """
86 |         return np.array(stats.norm(self.__means, self.__standard_deviations).pdf(observation))
87 | 
88 |     def clear(self) -> None:
89 |         """
90 |         Clears parameters of gaussian likelihood.
91 |         :return:
92 |         """
93 |         self.__means = np.array([])
94 |         self.__standard_deviations = np.array([])
95 | 
96 |         self.__sample_sum = 0.0
97 |         self.__squared_sample_sum = 0.0
98 |         self.__gap_size = 0
99 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/likelihoods/heuristic_gaussian_vs_exponential.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for prediction model for Bayesian online CPD, which supports heuristic selection of gaussian (normal) or
 3 | exponential conjugate likelihood based on estimation from learning sample.
 4 | """
 5 | 
 6 | from typing import Optional
 7 | 
 8 | import numpy as np
 9 | from numpy import typing as npt
10 | 
11 | from pysatl_cpd.core.algorithms.bayesian.abstracts.ilikelihood import ILikelihood, ILikelihoodWithPriorProbability
12 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.exponential_conjugate import (
13 |     ExponentialConjugateWithPriorProbability,
14 | )
15 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import GaussianConjugateWithPriorProbability
16 | 
17 | 
18 | class HeuristicGaussianVsExponential(ILikelihood):
19 |     """
20 |     Prediction model class with heuristic selection of gaussian (normal) or exponential conjugate likelihood based on
21 |     estimation from learning sample.
22 |     """
23 | 
24 |     def __init__(self) -> None:
25 |         self.__likelihood: Optional[ILikelihoodWithPriorProbability] = None
26 | 
27 |     def learn(self, learning_sample: npt.NDArray[np.float64]) -> None:
28 |         """
29 |         Learns prior parameters for gaussian and exponential likelihoods, evaluates which makes a learning sample more
30 |         probable and saves acquired likelihood for further work.
31 |         :param learning_sample: a sample to estimate prior parameters and compare likelihoods.
32 |         :return:
33 |         """
34 |         gaussian = GaussianConjugateWithPriorProbability()
35 |         exponential = ExponentialConjugateWithPriorProbability()
36 | 
37 |         gaussian.learn(learning_sample)
38 |         exponential.learn(learning_sample)
39 | 
40 |         gaussian_probability = gaussian.probability_of_learned_prior(learning_sample)
41 |         exponential_probability = exponential.probability_of_learned_prior(learning_sample)
42 | 
43 |         self.__likelihood = gaussian if gaussian_probability >= exponential_probability else exponential
44 | 
45 |     def predict(self, observation: np.float64) -> npt.NDArray[np.float64]:
46 |         """
47 |         Returns prediction from an underlying likelihood.
48 |         :param observation: a new observation of time series.
49 |         :return: an array of predictive posterior probabilities (densities).
50 |         """
51 |         assert self.__likelihood is not None, "Underlying likelihood must not be None"
52 | 
53 |         return self.__likelihood.predict(observation)
54 | 
55 |     def update(self, observation: np.float64) -> None:
56 |         """
57 |         Updates an underlying likelihood's state (calculates posterior parameters).
58 |         :param observation: a new observation of time series.
59 |         :return:
60 |         """
61 |         assert self.__likelihood is not None, "Underlying likelihood must not be None"
62 | 
63 |         self.__likelihood.update(observation)
64 | 
65 |     def clear(self) -> None:
66 |         """
67 |         Sets an underlying likelihood to None.
68 |         :return:
69 |         """
70 |         self.__likelihood = None
71 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/localizers/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for implementations of Bayesian CPD algorithm localizers.
3 | """
4 | 
5 | __author__ = "Alexey Tatyanenko"
6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
7 | __license__ = "SPDX-License-Identifier: MIT"
8 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian/localizers/argmax.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Bayesian CPD algorithm localizer selecting the most probable run length.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2024 Alexey Tatyanenko"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import numpy as np
10 | import numpy.typing as npt
11 | 
12 | from pysatl_cpd.core.algorithms.bayesian.abstracts.ilocalizer import ILocalizer
13 | 
14 | 
15 | class ArgmaxLocalizer(ILocalizer):
16 |     """
17 |     A localizer that localizes a change point corresponding with the most probable non-max run length.
18 |     """
19 | 
20 |     def localize(self, growth_probs: npt.NDArray[np.float64]) -> int:
21 |         """
22 |         Localizes a change point corresponding with the most probable non-max run length.
23 |         :param growth_probs: growth probabilities for run lengths at the time.
24 |         :return: the most probable non-max run length corresponding change point;
25 |         in case of one-element array returns it.
26 |         """
27 |         max_run_length = growth_probs.shape[0]
28 |         assert max_run_length > 0, "Run length distribution should not be empty"
29 | 
30 |         return 0 if max_run_length == 1 else int(growth_probs[:-1].argmax())
31 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/bayesian_linear_heuristic.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for chanhe point detection online algorithm, based on Bayesian online algorithm with heuristic, turning it into
  3 | an algorithm with linear time complexity with a cost of some information loss.
  4 | """
  5 | 
  6 | __author__ = "Alexey Tatyanenko"
  7 | __copyright__ = "Copyright (c) 2025 PySATL project"
  8 | __license__ = "SPDX-License-Identifier: MIT"
  9 | 
 10 | import copy
 11 | from typing import Optional
 12 | 
 13 | import numpy as np
 14 | from numpy import typing as npt
 15 | 
 16 | from pysatl_cpd.core.algorithms.bayesian_online_algorithm import BayesianOnline
 17 | from pysatl_cpd.core.algorithms.online_algorithm import OnlineAlgorithm
 18 | 
 19 | 
 20 | class BayesianLinearHeuristic(OnlineAlgorithm):
 21 |     """An online change point detection algorithm, based on changing the main Bayesian online algorithm instance to the
 22 |     duplicating time after some time. Note: this heuristic, however makes an algorithm linear on big time series, leads
 23 |     to some information loss, which may lead to some unstability in output's correctness."""
 24 | 
 25 |     def __init__(self, algorithm: BayesianOnline, time_before_duplicate_start: int, duplicate_preparation_time: int):
 26 |         """Initializes the Bayesian change point detection algorithm with linear time-complexity heuristc..
 27 | 
 28 |         :param algorithm: The base algorithm instance to use for detection/localization.
 29 |         :param time_before_duplicate_start: Time steps before starting duplicate algorithm's preparation (training
 30 |         and Bayesian modeling).
 31 |         :param duplicate_preparation_time: Time steps required to prepare (train and perform Bayesian modeling) the
 32 |         duplicating algorithm.
 33 |         :raises ValueError: If time constraints are not satisfied.
 34 |         :return:
 35 |         """
 36 |         if not (time_before_duplicate_start > duplicate_preparation_time > 0):
 37 |             raise ValueError(
 38 |                 "time_before_duplicate_start must be greater than duplicate_preparation_time, which must be positive"
 39 |             )
 40 | 
 41 |         self.__original_algorithm = copy.deepcopy(algorithm)
 42 |         self.__time_before_duplicate_start = time_before_duplicate_start
 43 |         self.__duplicate_preparation_time = duplicate_preparation_time
 44 |         self.__main_algorithm = copy.deepcopy(algorithm)
 45 |         self.__duplicating_algorithm: Optional[BayesianOnline] = None
 46 |         self.__time = 0
 47 |         self.__last_algorithm_start_time = 0
 48 | 
 49 |     @property
 50 |     def __work_time(self) -> int:
 51 |         """
 52 |         Returns the number of steps since the last algorithm start.
 53 |         :return: the number of steps since the last algorithm start.
 54 |         """
 55 |         return self.__time - self.__last_algorithm_start_time
 56 | 
 57 |     def _handle_duplicate_preparation(
 58 |         self, observation: np.float64 | npt.NDArray[np.float64], method_name: str
 59 |     ) -> None:
 60 |         """
 61 |         Manages the creation and training, Bayesian modeling of the duplicating algorithm.
 62 | 
 63 |         :param observation: a new observation from a time series.
 64 |         :param method_name: the method to call on the duplicating algorithm ('detect'/'localize').
 65 |         :return:
 66 |         """
 67 |         work_time = self.__work_time
 68 |         stage_end = self.__time_before_duplicate_start + self.__duplicate_preparation_time
 69 | 
 70 |         # Start initializing duplicating algorithm
 71 |         if work_time == self.__time_before_duplicate_start:
 72 |             self.__duplicating_algorithm = copy.deepcopy(self.__original_algorithm)
 73 | 
 74 |         # Train the duplicating algorithm amd perform a Bayesian modeling during preparation period
 75 |         elif self.__time_before_duplicate_start < work_time < stage_end:
 76 |             if self.__duplicating_algorithm is not None:
 77 |                 getattr(self.__duplicating_algorithm, method_name)(observation)
 78 | 
 79 |         # Switch to the prepared duplicating algorithm
 80 |         elif work_time == stage_end:
 81 |             assert self.__duplicating_algorithm is not None, "Duplicating algorithm must be initialized"
 82 |             self.__main_algorithm = copy.deepcopy(self.__duplicating_algorithm)
 83 |             self.__duplicating_algorithm = None
 84 |             self.__last_algorithm_start_time = self.__time - self.__duplicate_preparation_time
 85 | 
 86 |     def detect(self, observation: np.float64 | npt.NDArray[np.float64]) -> bool:
 87 |         """
 88 |         Processes an observation and returns whether a change point was detected by a main algorithm.
 89 |         :param observation: a new observation from a time series. Note: only univariate data is supported for now.
 90 |         :return: whether a change point was detected by a main algorithm.
 91 |         """
 92 |         if observation is npt.NDArray[np.float64]:
 93 |             raise TypeError("Multivariate observations are not supported")
 94 |         assert self.__main_algorithm is not None, "Main algorithm must be initialized"
 95 | 
 96 |         # Run main detection
 97 |         if self.__main_algorithm.detect(observation):
 98 |             self.__last_algorithm_start_time = self.__time
 99 |             self.__duplicating_algorithm = None
100 |             self.__time += 1
101 |             return True
102 | 
103 |         # Manage duplicating algorithm training
104 |         self._handle_duplicate_preparation(observation, "detect")
105 |         self.__time += 1
106 |         return False
107 | 
108 |     def localize(self, observation: np.float64 | npt.NDArray[np.float64]) -> Optional[int]:
109 |         """
110 |         Processes an observation and returns the change point if localized by the main algorithm.
111 |         :param observation: a new observation from a time series. Note: only univariate data is supported for now.
112 |         :return: a change point, if it was localized, None otherwise.
113 |         """
114 |         if observation is npt.NDArray[np.float64]:
115 |             raise TypeError("Multivariate observations are not supported")
116 |         assert self.__main_algorithm is not None, "Main algorithm must be initialized"
117 | 
118 |         # Run main localization
119 |         if (result := self.__main_algorithm.localize(observation)) is not None:
120 |             change_point = self.__last_algorithm_start_time + result
121 |             self.__last_algorithm_start_time = change_point
122 |             self.__duplicating_algorithm = None
123 |             self.__time += 1
124 |             return change_point
125 | 
126 |         # Manage duplicating algorithm training
127 |         self._handle_duplicate_preparation(observation, "localize")
128 |         self.__time += 1
129 |         return None
130 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/abstracts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/abstracts/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/abstracts/iclassifier.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Classification CPD algorithm's classifier abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | 
15 | class Classifier(ABC):
16 |     """Classifier's abstract base class."""
17 | 
18 |     @abstractmethod
19 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
20 |         """Trains binary classifier on the given sample.
21 |         The observations before barrier belong to the class 0, after barrier --- to the class 1.
22 | 
23 |         :param sample: sample for training classifier.
24 |         :param barrier: index of observation that splits the given sample.
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     @abstractmethod
29 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
30 |         """Classifies the elements of a sample into one of two classes, based on training with the barrier.
31 | 
32 |         :param sample: sample to classify.
33 |         """
34 |         raise NotImplementedError
35 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/abstracts/iquality_metric.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Classification CPD algorithm's quality metric abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | 
15 | class QualityMetric(ABC):
16 |     """Quality metric's abstract base class."""
17 | 
18 |     @abstractmethod
19 |     def assess_barrier(self, classes: npt.NDArray[np.intp], time: int) -> float:
20 |         """Evaluates quality function based on classificator in the specified point.
21 | 
22 |         :param classes: Classes of observations, predicted by the classifier.
23 |         :param time: Index of barrier in the given sample to calculate quality.
24 |         :return: Quality assessment.
25 |         """
26 |         raise NotImplementedError
27 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/abstracts/istatistic_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Classification CPD algorithm's test statistic abstract base class.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from abc import ABC, abstractmethod
10 | 
11 | 
12 | class TestStatistic(ABC):
13 |     """Test statistic's abstract base class."""
14 | 
15 |     @abstractmethod
16 |     def get_change_points(self, classifier_assessments: list[float]) -> list[int]:
17 |         """Separates change points from other points in sample based on some criterion.
18 | 
19 |         :param classifier_assessments: List of quality assessments evaluated in each point of the sample.
20 |         :return: Change points in the current window.
21 |         """
22 |         raise NotImplementedError
23 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/classifiers/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/decision_tree.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of decision tree classifier for cpd.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from typing import cast
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | import sklearn.tree as sk
14 | 
15 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
16 | 
17 | 
18 | class DecisionTreeClassifier(Classifier):
19 |     """
20 |     The class implementing decision tree classifier for cpd.
21 |     """
22 | 
23 |     def __init__(self) -> None:
24 |         """
25 |         Initializes a new instance of decision tree classifier for cpd.
26 |         """
27 |         self.__model: sk.DecisionTreeClassifier | None = None
28 | 
29 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
30 |         """Trains classifier on the given sample.
31 | 
32 |         :param sample: sample for training classifier.
33 |         :param barrier: index of observation that splits the given sample.
34 |         """
35 |         classes = np.array([0 if i <= barrier else 1 for i in range(len(sample))])
36 |         self.__model = sk.DecisionTreeClassifier()
37 |         self.__model.fit(sample, classes)
38 | 
39 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
40 |         """Classifies observations in the given sample based on training with barrier.
41 | 
42 |         :param sample: sample to classify.
43 |         """
44 |         assert self.__model is not None
45 |         return cast(npt.NDArray[np.intp], self.__model.predict(sample))
46 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/knn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of knn classifier for cpd.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import typing as tp
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | from sklearn.neighbors import KNeighborsClassifier
14 | 
15 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
16 | 
17 | 
18 | class KNNClassifier(Classifier):
19 |     """
20 |     The class implementing knn classifier for cpd.
21 |     """
22 | 
23 |     def __init__(
24 |         self, k: int, distance: tp.Literal["manhattan", "euclidean", "minkowski", "hamming"] = "minkowski"
25 |     ) -> None:
26 |         """
27 |         Initializes a new instance of knn classifier for cpd.
28 |         :param k: number of neighbours in the knn graph relative to each point.
29 |         :param distance: Metric to use for distance computation.
30 |         Default is "minkowski", which results in the standard Euclidean distance when p = 2.
31 |         """
32 |         self.__k = k
33 |         self.__distance: tp.Literal["manhattan", "euclidean", "minkowski", "hamming"] = distance
34 |         self.__model: KNeighborsClassifier | None = None
35 | 
36 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
37 |         """Trains classifier on the given sample.
38 | 
39 |         :param sample: sample for training classifier.
40 |         :param barrier: index of observation that splits the given sample.
41 |         """
42 |         classes = np.array([0 if i <= barrier else 1 for i in range(len(sample))])
43 |         self.__model = KNeighborsClassifier(n_neighbors=self.__k, metric=self.__distance)
44 |         self.__model.fit(sample, classes)
45 | 
46 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
47 |         """Classifies observations in the given sample based on training with barrier.
48 | 
49 |         :param sample: sample to classify.
50 |         """
51 |         assert self.__model is not None
52 |         return self.__model.predict(sample)
53 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of classifier based on logistic regression for cpd.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from typing import cast
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | from sklearn.linear_model import LogisticRegression
14 | 
15 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
16 | 
17 | 
18 | class LogisticRegressionClassifier(Classifier):
19 |     """
20 |     The class implementing classifier based on logistic regression for cpd.
21 |     """
22 | 
23 |     def __init__(self) -> None:
24 |         """
25 |         Initializes a new instance of classifier based on logistic regression for cpd.
26 |         """
27 |         self.__model: LogisticRegression | None = None
28 | 
29 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
30 |         """Trains classifier on the given sample.
31 | 
32 |         :param sample: sample for training classifier.
33 |         :param barrier: index of observation that splits the given sample.
34 |         """
35 |         classes = np.array([0 if i <= barrier else 1 for i in range(len(sample))])
36 |         self.__model = LogisticRegression()
37 |         self.__model.fit(sample, classes)
38 | 
39 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
40 |         """Classifies observations in the given sample based on training with barrier.
41 | 
42 |         :param sample: sample to classify.
43 |         """
44 |         assert self.__model is not None
45 |         return cast(npt.NDArray[np.intp], self.__model.predict(sample))
46 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/rf.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of random forest classifier for cpd.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from typing import cast
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | from sklearn.ensemble import RandomForestClassifier
14 | 
15 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
16 | 
17 | 
18 | class RFClassifier(Classifier):
19 |     """
20 |     The class implementing random forest classifier for cpd.
21 |     """
22 | 
23 |     def __init__(self) -> None:
24 |         """
25 |         Initializes a new instance of RF classifier for cpd.
26 |         """
27 |         self.__model: RandomForestClassifier | None = None
28 | 
29 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
30 |         """Trains classifier on the given sample.
31 | 
32 |         :param sample: sample for training classifier.
33 |         :param barrier: index of observation that splits the given sample.
34 |         """
35 |         classes = np.array([0 if i <= barrier else 1 for i in range(len(sample))])
36 |         self.__model = RandomForestClassifier()
37 |         self.__model.fit(sample, classes)
38 | 
39 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
40 |         """Classifies observations in the given sample based on training with barrier.
41 | 
42 |         :param sample: sample to classify.
43 |         """
44 |         assert self.__model is not None
45 |         return cast(npt.NDArray[np.intp], self.__model.predict(sample))
46 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/classifiers/svm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of svm classifier for cpd.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import typing as tp
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | from sklearn.svm import SVC
14 | 
15 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
16 | 
17 | 
18 | class SVMClassifier(Classifier):
19 |     """
20 |     The class implementing svm classifier for cpd.
21 |     """
22 | 
23 |     def __init__(self, kernel: tp.Literal["linear", "poly", "rbf", "sigmoid", "precomputed"] = "rbf") -> None:
24 |         """
25 |         Initializes a new instance of svm classifier for cpd.
26 |         :param kernel: specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used.
27 |         """
28 |         self.__kernel: tp.Literal["linear", "poly", "rbf", "sigmoid", "precomputed"] = kernel
29 |         self.__model: SVC | None = None
30 | 
31 |     def train(self, sample: npt.NDArray[np.float64], barrier: int) -> None:
32 |         """Trains classifier on the given sample.
33 | 
34 |         :param sample: sample for training classifier.
35 |         :param barrier: index of observation that splits the given sample.
36 |         """
37 |         classes = np.array([0 if i <= barrier else 1 for i in range(len(sample))])
38 |         self.__model = SVC(kernel=self.__kernel)
39 |         self.__model.fit(sample, classes)
40 | 
41 |     def predict(self, sample: npt.NDArray[np.float64]) -> npt.NDArray[np.intp]:
42 |         """Classifies observations in the given sample based on training with barrier.
43 | 
44 |         :param sample: sample to classify.
45 |         """
46 |         assert self.__model is not None
47 |         return tp.cast(npt.NDArray[np.intp], self.__model.predict(sample))
48 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/quality_metrics/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/classification/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/quality_metrics/classification/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/classification/accuracy.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of classifier's quality metric based on accuracy.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import numpy as np
10 | import numpy.typing as npt
11 | 
12 | from pysatl_cpd.core.algorithms.classification.abstracts.iquality_metric import QualityMetric
13 | 
14 | 
15 | class Accuracy(QualityMetric):
16 |     """
17 |     The class implementing quality metric based on accuracy.
18 |     """
19 | 
20 |     def assess_barrier(self, classes: npt.NDArray[np.intp], time: int) -> float:
21 |         """Evaluates quality function based on classificator in the specified point.
22 | 
23 |         :param classes: Classes of observations, predicted by the classifier.
24 |         :param time: Index of barrier in the given sample to calculate quality.
25 |         :return: Quality assessment.
26 |         """
27 |         before = classes[:time]
28 |         after = classes[time:]
29 |         before_length = time
30 |         sample_length = len(classes)
31 | 
32 |         true_positive = float(after.sum())
33 |         true_negative = before_length - float(before.sum())
34 | 
35 |         return (true_positive + true_negative) / sample_length
36 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/classification/f1.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of classifier's quality metric based on F1 score.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import numpy as np
10 | import numpy.typing as npt
11 | 
12 | from pysatl_cpd.core.algorithms.classification.abstracts.iquality_metric import QualityMetric
13 | 
14 | 
15 | class F1(QualityMetric):
16 |     """
17 |     The class implementing quality metric based on F1 score.
18 |     """
19 | 
20 |     def assess_barrier(self, classes: npt.NDArray[np.intp], time: int) -> float:
21 |         """Evaluates quality function based on classificator in the specified point.
22 | 
23 |         :param classes: Classes of observations, predicted by the classifier.
24 |         :param time: Index of barrier in the given sample to calculate quality.
25 |         :return: Quality assessment.
26 |         """
27 |         before = classes[:time]
28 |         after = classes[time:]
29 |         after_length = len(after)
30 | 
31 |         true_positive = float(after.sum())
32 |         false_positive = float(before.sum())
33 |         false_negative = after_length - true_positive
34 | 
35 |         return 2 * true_positive / (2 * true_positive + false_positive + false_negative)
36 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/classification/mcc.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of classifier's quality metric based on Matthews correlation coefficient.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from math import sqrt
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | from pysatl_cpd.core.algorithms.classification.abstracts.iquality_metric import QualityMetric
15 | 
16 | 
17 | class MCC(QualityMetric):
18 |     """
19 |     The class implementing quality metric based on Matthews correlation coefficient.
20 |     """
21 | 
22 |     def assess_barrier(self, classes: npt.NDArray[np.intp], time: int) -> float:
23 |         """Evaluates quality function based on classificator in the specified point.
24 | 
25 |         :param classes: Classes of observations, predicted by the classifier.
26 |         :param time: Index of barrier in the given sample to calculate quality.
27 |         :return: Quality assessment.
28 |         """
29 |         before = classes[:time]
30 |         after = classes[time:]
31 |         after_length = len(after)
32 |         before_length = time
33 | 
34 |         true_positive = after.sum()
35 |         false_positive = before.sum()
36 |         true_negative = before_length - false_positive
37 |         false_negative = after_length - true_positive
38 |         positive = true_positive + false_negative
39 |         negative = false_positive + true_negative
40 |         pp = true_positive + false_positive
41 |         pn = false_negative + true_negative
42 | 
43 |         if pp == 0 or pn == 0:
44 |             return -1.0
45 | 
46 |         tpr = true_positive / positive
47 |         tnr = true_negative / negative
48 |         ppv = true_positive / pp
49 |         npv = true_negative / pn
50 |         fnr = false_negative / positive
51 |         fpr = false_positive / negative
52 |         fo_rate = false_negative / pn
53 |         fdr = false_positive / pp
54 | 
55 |         return sqrt(tpr * tnr * ppv * npv) - sqrt(fnr * fpr * fo_rate * fdr)
56 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/quality_metrics/clustering/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/quality_metrics/clustering/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/test_statistics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/classification/test_statistics/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification/test_statistics/threshold_overcome.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of test statistic based on threshold overcome.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from pysatl_cpd.core.algorithms.classification.abstracts.istatistic_test import TestStatistic
10 | 
11 | 
12 | class ThresholdOvercome(TestStatistic):
13 |     """
14 |     The class implementing test statistic based on threshold overcome.
15 |     """
16 | 
17 |     def __init__(self, threshold: float) -> None:
18 |         """
19 |         Initializes a new instance of threshold overcome criterion.
20 | 
21 |         :param threshold: Threshold to overcome to detect the change point.
22 |         """
23 |         self.__threshold = threshold
24 | 
25 |     def get_change_points(self, classifier_assessments: list[float]) -> list[int]:
26 |         """Separates change points from other points in sample based on some criterion.
27 | 
28 |         :param classifier_assessments: List of quality assessments evaluated in each point of the sample.
29 |         :return: Change points in the current window.
30 |         """
31 |         return [i for i, v in enumerate(classifier_assessments) if v > self.__threshold]
32 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/classification_algorithm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for implementation of CPD algorithm based on classification.
  3 | """
  4 | 
  5 | __author__ = "Artemii Patov"
  6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
  7 | __license__ = "SPDX-License-Identifier: MIT"
  8 | 
  9 | 
 10 | import numpy as np
 11 | import numpy.typing as npt
 12 | 
 13 | from pysatl_cpd.core.algorithms.abstract_algorithm import Algorithm
 14 | from pysatl_cpd.core.algorithms.classification.abstracts.iclassifier import Classifier
 15 | from pysatl_cpd.core.algorithms.classification.abstracts.iquality_metric import QualityMetric
 16 | from pysatl_cpd.core.algorithms.classification.abstracts.istatistic_test import TestStatistic
 17 | 
 18 | 
 19 | class ClassificationAlgorithm(Algorithm):
 20 |     """
 21 |     The class implementing change point detection algorithm based on classification.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self, classifier: Classifier, quality_metric: QualityMetric, test_statistic: TestStatistic, indent_coeff: float
 26 |     ) -> None:
 27 |         """
 28 |         Initializes a new instance of classification based change point detection algorithm.
 29 | 
 30 |         :param classifier: Classifier for sample classification.
 31 |         :param quality_metric: Metric to assess independence of the two samples
 32 |         resulting from splitting the original sample.
 33 |         :param test_statistic: Criterion to separate change points from other points in sample.
 34 |         :param indent_coeff: Coefficient for evaluating indent from window borders.
 35 |         The indentation is calculated by multiplying the given coefficient by the size of window.
 36 |         """
 37 |         self.__classifier = classifier
 38 |         self.__test_statistic = test_statistic
 39 |         self.__quality_metric = quality_metric
 40 | 
 41 |         self.__shift_coeff = indent_coeff
 42 | 
 43 |         self.__change_points: list[int] = []
 44 |         self.__change_points_count = 0
 45 | 
 46 |     @property
 47 |     def test_statistic(self) -> TestStatistic:
 48 |         return self.__test_statistic
 49 | 
 50 |     @test_statistic.setter
 51 |     def test_statistic(self, test_statistic: TestStatistic) -> None:
 52 |         self.__test_statistic = test_statistic
 53 | 
 54 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
 55 |         """Finds change points in window.
 56 | 
 57 |         :param window: part of global data for finding change points.
 58 |         :return: the number of change points in the window.
 59 |         """
 60 |         self.__process_data(window)
 61 |         return self.__change_points_count
 62 | 
 63 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
 64 |         """Finds coordinates of change points (localizes them) in window.
 65 | 
 66 |         :param window: part of global data for finding change points.
 67 |         :return: list of window change points.
 68 |         """
 69 |         self.__process_data(window)
 70 |         return self.__change_points.copy()
 71 | 
 72 |     def __process_data(self, window: npt.NDArray[np.float64]) -> None:
 73 |         """
 74 |         Processes a window of data to detect/localize all change points depending on working mode.
 75 | 
 76 |         :param window: part of global data for change points analysis.
 77 |         """
 78 |         sample_size = len(window)
 79 |         if sample_size == 0:
 80 |             return
 81 | 
 82 |         # Examining each point.
 83 |         # Boundaries are always change points.
 84 |         first_point = int(sample_size * self.__shift_coeff)
 85 |         last_point = int(sample_size * (1 - self.__shift_coeff))
 86 |         assessments = []
 87 | 
 88 |         for time in range(first_point, last_point):
 89 |             train_sample, test_sample = ClassificationAlgorithm.__split_sample(window)
 90 |             self.__classifier.train(train_sample, int(time / 2))
 91 |             classes = self.__classifier.predict(test_sample)
 92 | 
 93 |             quality = self.__quality_metric.assess_barrier(classes, int(time / 2))
 94 |             assessments.append(quality)
 95 | 
 96 |         change_points = self.__test_statistic.get_change_points(assessments)
 97 | 
 98 |         # Shifting change points coordinates according to their place in window.
 99 |         self.__change_points = list(map(lambda x: x + first_point, change_points))
100 |         self.__change_points_count = len(change_points)
101 | 
102 |     # Splits the given sample into train and test samples.
103 |     # Strategy: even elements goes to the train sample; odd goes to the test sample
104 |     # Soon classification algorithm will be more generalized: the split strategy will be one of the parameters.
105 |     @staticmethod
106 |     def __split_sample(
107 |         sample: npt.NDArray[np.float64],
108 |     ) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
109 |         train_sample = []
110 |         test_sample = []
111 | 
112 |         # Univariate distribution case. We need to make 2-dimensional array manually.
113 |         if np.ndim(sample) == 1:
114 |             sample = np.reshape(sample, (-1, 1))
115 | 
116 |         for i, x in enumerate(sample):
117 |             if i % 2 == 0:
118 |                 train_sample.append(x)
119 |             else:
120 |                 test_sample.append(x)
121 | 
122 |         return np.array(train_sample), np.array(test_sample)
123 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/density/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/density/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/density/abstracts/density_based_algorithm.py:
--------------------------------------------------------------------------------
  1 | from abc import abstractmethod
  2 | from collections.abc import Callable
  3 | from typing import TypeAlias
  4 | 
  5 | import numpy as np
  6 | import numpy.typing as npt
  7 | from scipy.optimize import minimize
  8 | 
  9 | from pysatl_cpd.core.algorithms.abstract_algorithm import Algorithm
 10 | 
 11 | _TObjFunc: TypeAlias = Callable[[npt.NDArray[np.float64], npt.NDArray[np.float64]], float]
 12 | _TMetrics: TypeAlias = dict[str, int | float]
 13 | 
 14 | 
 15 | class DensityBasedAlgorithm(Algorithm):
 16 |     @staticmethod
 17 |     def _kernel_density_estimation(observation: npt.NDArray[np.float64], bandwidth: float) -> npt.NDArray[np.float64]:
 18 |         """Perform kernel density estimation on the given observations without fitting a model.
 19 | 
 20 |         :param observation: the data points for which to estimate the density.
 21 |         :param bandwidth: the bandwidth parameter for the kernel density estimation.
 22 | 
 23 |         :return: estimated density values for the observations.
 24 |         """
 25 |         n = len(observation)
 26 |         x_grid = np.linspace(np.min(observation) - 3 * bandwidth, np.max(observation) + 3 * bandwidth, 1000)
 27 |         kde_values = np.zeros_like(x_grid)
 28 |         for x in observation:
 29 |             kde_values += np.exp(-0.5 * ((x_grid - x) / bandwidth) ** 2)
 30 | 
 31 |         kde_values /= n * bandwidth * np.sqrt(2 * np.pi)
 32 |         return kde_values
 33 | 
 34 |     def _calculate_weights(
 35 |         self,
 36 |         test_value: npt.NDArray[np.float64],
 37 |         reference_value: npt.NDArray[np.float64],
 38 |         bandwidth: float,
 39 |         objective_function: _TObjFunc,
 40 |     ) -> npt.NDArray[np.float64]:
 41 |         """Calculate the weights based on the density ratio between test and reference values.
 42 | 
 43 |         :param test_value: the test data points.
 44 |         :param reference_value: the reference data points.
 45 |         :param bandwidth: the bandwidth parameter for the kernel density estimation.
 46 |         :param objective_function: the objective function to minimize.
 47 | 
 48 |         :return: the calculated density ratios normalized to their mean.
 49 |         """
 50 |         test_density = self._kernel_density_estimation(test_value, bandwidth)
 51 |         reference_density = self._kernel_density_estimation(reference_value, bandwidth)
 52 | 
 53 |         def objective_function_wrapper(alpha: npt.NDArray[np.float64], /) -> float:
 54 |             """Wrapper for the objective function to calculate the density ratio.
 55 | 
 56 |             :param alpha: relative parameter that controls the weighting between the numerator distribution
 57 |                 and the denominator distribution in the density ratio estimation.
 58 | 
 59 |             :return: the value of the objective function to minimize.
 60 |             """
 61 |             objective_density_ratio = np.exp(test_density - reference_density - alpha)
 62 |             return objective_function(objective_density_ratio, alpha)
 63 | 
 64 |         res = minimize(objective_function_wrapper, np.zeros(len(test_value)), method="L-BFGS-B")
 65 |         optimized_alpha: npt.NDArray[np.float64] = res.x
 66 |         density_ratio: npt.NDArray[np.float64] = np.exp(test_density - reference_density - optimized_alpha)
 67 |         return density_ratio / np.mean(density_ratio)
 68 | 
 69 |     @abstractmethod
 70 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
 71 |         # maybe rtype tuple[int]
 72 |         """Function for finding change points in window
 73 | 
 74 |         :param window: part of global data for finding change points
 75 |         :return: list of right borders of window change points
 76 |         """
 77 |         raise NotImplementedError
 78 | 
 79 |     @abstractmethod
 80 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
 81 |         """Function for finding coordinates of change points in window
 82 | 
 83 |         :param window: part of global data for finding change points
 84 |         :return: list of window change points
 85 |         """
 86 |         raise NotImplementedError
 87 | 
 88 |     @staticmethod
 89 |     def evaluate_detection_accuracy(true_change_points: list[int], detected_change_points: list[int]) -> _TMetrics:
 90 |         """Evaluate the accuracy of change point detection.
 91 | 
 92 |         :param true_change_points: list of true change point indices.
 93 |         :param detected_change_points: list of detected change point indices.
 94 | 
 95 |         :return: a dictionary with evaluation metrics (precision, recall, F1 score).
 96 |         """
 97 |         true_positive = len(set(true_change_points) & set(detected_change_points))
 98 |         false_positive = len(set(detected_change_points) - set(true_change_points))
 99 |         false_negative = len(set(true_change_points) - set(detected_change_points))
100 | 
101 |         precision = true_positive / (true_positive + false_positive) if true_positive + false_positive > 0 else 0.0
102 |         recall = true_positive / (true_positive + false_negative) if true_positive + false_negative > 0 else 0.0
103 |         f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0.0
104 | 
105 |         return {
106 |             "precision": precision,
107 |             "recall": recall,
108 |             "f1_score": f1_score,
109 |             "true_positive": true_positive,
110 |             "false_positive": false_positive,
111 |             "false_negative": false_negative,
112 |         }
113 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/graph/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/abstracts/ibuilder.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from collections.abc import Callable, Iterable
 3 | from typing import Any
 4 | 
 5 | import numpy as np
 6 | import numpy.typing as npt
 7 | 
 8 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 9 | 
10 | 
11 | class IBuilder(ABC):
12 |     def __init__(
13 |         self, data: Iterable[np.float64] | Iterable[npt.NDArray[np.float64]], compare: Callable[[Any, Any], bool]
14 |     ):
15 |         """
16 |         Initialize the builder with data and a comparison function.
17 | 
18 |         :param data: List of elements to be used in building the graph.
19 |         :param compare: Callable that takes two elements and returns a boolean indicating
20 |                         if an edge should exist between them.
21 |         """
22 |         self.data = list(data)
23 |         self.compare = compare
24 |         self.num_of_edges: int = 0
25 | 
26 |     @abstractmethod
27 |     def build_graph(self) -> IGraph:
28 |         """
29 |         Abstract method to build and return a graph representation.
30 | 
31 |         :return: An instance of IGraph representing the built graph.
32 |         """
33 |         raise NotImplementedError
34 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/abstracts/igraph.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | 
 4 | class IGraph(ABC):
 5 |     def __init__(self, num_of_edges: int, len_data: int) -> None:
 6 |         """
 7 |         Initialize the IGraph with the number of edges and the length of data.
 8 | 
 9 |         :param num_of_edges: Number of edges in the graph.
10 |         :param len_data: Number of nodes in the graph.
11 |         """
12 |         self.num_of_edges: int = num_of_edges
13 |         self.len: int = len_data
14 | 
15 |     @abstractmethod
16 |     def check_edges_exist(self, thao: int) -> int:
17 |         """
18 |         Calculate the number of edges that exist between nodes up to a certain index (thao)
19 |         and nodes from that index to the end.
20 | 
21 |         :param thao: Index dividing the nodes into two sets.
22 |         :return: Number of edges existing between the two sets of nodes.
23 |         """
24 |         raise NotImplementedError
25 | 
26 |     @abstractmethod
27 |     def sum_of_squares_of_degrees_of_nodes(self) -> int:
28 |         """
29 |         Calculate the sum of the squares of the degrees of all nodes.
30 | 
31 |         :return: Sum of the squares of the degrees of the nodes.
32 |         """
33 |         raise NotImplementedError
34 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/abstracts/igraph_cpd.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | 
 3 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 4 | 
 5 | 
 6 | class IGraphCPD(ABC):
 7 |     def __init__(self, graph: IGraph):
 8 |         """
 9 |         Initialize the IGraphCPD with the given graph.
10 | 
11 |         :param graph: An instance of IGraph representing the graph.
12 |         """
13 |         self.graph = graph
14 |         self.size = graph.len
15 | 
16 |     @abstractmethod
17 |     def calculation_e(self, thao: int) -> float:
18 |         """
19 |         Calculate the mathematical expectation (E) using the given formula.
20 | 
21 |         :param thao: Index dividing the nodes into two sets.
22 |         :return: Calculated expectation value.
23 |         """
24 |         raise NotImplementedError
25 | 
26 |     @abstractmethod
27 |     def calculation_var(self, thao: int) -> float:
28 |         """
29 |         Calculate the variance using the given formula.
30 | 
31 |         :param thao: Index dividing the nodes into two sets.
32 |         :return: Calculated variance value.
33 |         """
34 |         raise NotImplementedError
35 | 
36 |     @abstractmethod
37 |     def calculation_z(self, thao: int) -> float:
38 |         """
39 |         Calculate the Z statistic.
40 | 
41 |         :param thao: Index dividing the nodes into two sets.
42 |         :return: Calculated Z statistic.
43 |         """
44 |         raise NotImplementedError
45 | 
46 |     @abstractmethod
47 |     def find_changepoint(self, border: float) -> list[int]:
48 |         """
49 |         Find change points in the data based on the Z statistic.
50 | 
51 |         :param border: Threshold value for detecting change points.
52 |         :return: List of detected change points.
53 |         """
54 |         raise NotImplementedError
55 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/builders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/graph/builders/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/builders/list.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Callable
 2 | from typing import Any
 3 | 
 4 | from pysatl_cpd.core.algorithms.graph.abstracts.ibuilder import IBuilder
 5 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 6 | from pysatl_cpd.core.algorithms.graph.graph_list import GraphList
 7 | 
 8 | 
 9 | class AdjacencyListBuilder(IBuilder):
10 |     def __init__(self, data: list[Any], comparing_function: Callable[[Any, Any], bool]):
11 |         super().__init__(data, comparing_function)
12 | 
13 |     def build(self) -> dict[int, list[Any]]:  # Adjacency List
14 |         """
15 |         Build the adjacency list from the provided data.
16 | 
17 |         :return: A dictionary representing the adjacency list where keys are node indices and values
18 |                  are lists of adjacent nodes.
19 |         """
20 |         unique_edges = set()
21 |         count_nodes = len(self.data)
22 |         adjacency_list: dict[int, list[Any]] = {index: [] for index in range(count_nodes)}
23 |         for i in range(count_nodes):
24 |             for j in range(count_nodes):
25 |                 if self.compare(self.data[i], self.data[j]) and (i != j):
26 |                     adjacency_list[i].append(self.data[j])
27 |                     edge = tuple(sorted((i, j)))
28 |                     unique_edges.add(edge)
29 |         self.num_of_edges = len(unique_edges)
30 | 
31 |         # for i in range(0, len(self.data)):
32 |         #     print(f"{self.data[i]}: {adjacency_list[i]}")
33 | 
34 |         return adjacency_list
35 | 
36 |     def build_graph(self) -> IGraph:
37 |         graph = self.build()
38 |         return GraphList(graph, self.data, self.num_of_edges)
39 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/builders/matrix.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Callable, Iterable
 2 | from typing import Any
 3 | 
 4 | import numpy as np
 5 | import numpy.typing as npt
 6 | 
 7 | from pysatl_cpd.core.algorithms.graph.abstracts.ibuilder import IBuilder
 8 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 9 | from pysatl_cpd.core.algorithms.graph.graph_matrix import GraphMatrix
10 | 
11 | 
12 | class AdjacencyMatrixBuilder(IBuilder):
13 |     def __init__(
14 |         self,
15 |         data: Iterable[np.float64] | Iterable[npt.NDArray[np.float64]],
16 |         comparing_function: Callable[[Any, Any], bool],
17 |     ):
18 |         super().__init__(data, comparing_function)
19 | 
20 |     def build_matrix(self) -> npt.NDArray[np.int8]:  # Adjacency Matrix
21 |         """
22 |         Build the adjacency matrix from the provided data.
23 | 
24 |         :return: A NumPy ndarray representing the adjacency matrix where element [i, j] is 1 if
25 |                  there is an edge between node i and node j, otherwise 0.
26 |         """
27 |         count_edges = 0
28 |         count_nodes = len(self.data)
29 |         adjacency_matrix = np.zeros((count_nodes, count_nodes), dtype=np.int8)
30 | 
31 |         for i in range(count_nodes):
32 |             for j in range(count_nodes):
33 |                 if self.compare(self.data[i], self.data[j]) and (i != j):
34 |                     adjacency_matrix[i, j] = 1
35 |                     count_edges += 1
36 |         self.num_of_edges = count_edges // 2
37 | 
38 |         return adjacency_matrix
39 | 
40 |     def build_graph(self) -> IGraph:
41 |         graph = self.build_matrix()
42 |         return GraphMatrix(graph, self.num_of_edges)
43 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/graph_cpd.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 4 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph_cpd import IGraphCPD
 5 | 
 6 | 
 7 | class GraphCPD(IGraphCPD):
 8 |     def __init__(self, graph: IGraph):
 9 |         super().__init__(graph)
10 | 
11 |     def calculation_e(self, thao: int) -> float:
12 |         p1 = ((2 * thao) * (self.size - thao)) / (self.size * (self.size - 1))
13 |         return p1 * self.graph.num_of_edges
14 | 
15 |     def calculation_var(self, thao: int) -> float:
16 |         p1 = ((2 * thao) * (self.size - thao)) / (self.size * (self.size - 1))
17 |         p2 = (4 * thao * (thao - 1) * (self.size - thao) * (self.size - thao - 1)) / (
18 |             self.size * (self.size - 1) * (self.size - 2) * (self.size - 3)
19 |         )
20 |         var = (
21 |             p1 * self.graph.num_of_edges
22 |             + (0.5 * p1 - p2) * self.graph.sum_of_squares_of_degrees_of_nodes()
23 |             + (p2 - p1**2) * self.graph.num_of_edges**2
24 |         )
25 |         return var
26 | 
27 |     def calculation_z(self, thao: int) -> float:
28 |         zg = -((self.graph.check_edges_exist(thao) - self.calculation_e(thao)) / math.sqrt(self.calculation_var(thao)))
29 |         return zg
30 | 
31 |     def find_changepoint(self, border: float) -> list[int]:
32 |         change_point_list: list[int] = []
33 |         for t in range(1, self.size):
34 |             if self.calculation_z(t) > border:
35 |                 change_point_list.append(t)
36 |         return change_point_list
37 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/graph_list.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, TypeAlias
 2 | 
 3 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 4 | 
 5 | _TAdjList: TypeAlias = dict[int, list[Any]]
 6 | 
 7 | 
 8 | class GraphList(IGraph):
 9 |     def __init__(self, graph: _TAdjList, data: list[Any], num_of_edges: int) -> None:
10 |         """
11 |         Initialize the GraphList with the adjacency list, data, and number of edges.
12 | 
13 |         :param graph: Adjacency list representing the graph.
14 |         :param data: List of elements representing the nodes.
15 |         :param num_of_edges: Number of edges in the graph.
16 |         """
17 |         super().__init__(num_of_edges, len(data))
18 |         self.graph = graph
19 |         self.data = data
20 | 
21 |     def __getitem__(self, item: int) -> Any:
22 |         """
23 |         Get the list of adjacent nodes for a given node.
24 | 
25 |         :param item: Node index.
26 |         :return: List of adjacent nodes.
27 |         """
28 |         return self.graph[item]
29 | 
30 |     def check_edges_exist(self, thao: int) -> int:
31 |         count_edges = 0
32 |         for node_1 in range(thao):
33 |             for node_2 in range(thao, self.len):
34 |                 if self.data[node_2] in self.graph[node_1]:
35 |                     count_edges += 1
36 |         return count_edges
37 | 
38 |     def sum_of_squares_of_degrees_of_nodes(self) -> int:
39 |         sum_squares = 0
40 |         for node in range(0, self.len):
41 |             sum_squares += len(self.graph[node]) ** 2
42 |         return sum_squares
43 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph/graph_matrix.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import numpy.typing as npt
 4 | 
 5 | from pysatl_cpd.core.algorithms.graph.abstracts.igraph import IGraph
 6 | 
 7 | 
 8 | class GraphMatrix(IGraph):
 9 |     def __init__(self, graph: npt.NDArray[Any], num_of_edges: int):
10 |         """
11 |         Initialize the GraphMatrix with the adjacency matrix and number of edges.
12 | 
13 |         :param graph: Adjacency matrix representing the graph.
14 |         :param num_of_edges: Number of edges in the graph.
15 |         """
16 |         super().__init__(num_of_edges, len(graph))
17 |         self.mtx = graph
18 | 
19 |     def __getitem__(self, item: int) -> Any:
20 |         """
21 |         Get the row of the adjacency matrix for a given node.
22 | 
23 |         :param item: Node index.
24 |         :return: Row of the adjacency matrix corresponding to the node.
25 |         """
26 |         return self.mtx[item]
27 | 
28 |     def check_edges_exist(self, thao: int) -> int:
29 |         count_edges = 0
30 |         for node_before in range(thao):
31 |             for node_after in range(thao, self.len):
32 |                 if self.mtx[node_before, node_after] == 1:
33 |                     count_edges += 1
34 |         return count_edges
35 | 
36 |     def sum_of_squares_of_degrees_of_nodes(self) -> int:
37 |         sum_squares = 0
38 |         for node_1 in range(0, self.len):
39 |             node_degree = 0
40 |             for node_2 in range(0, self.len):
41 |                 if self.mtx[node_1, node_2] == 1:
42 |                     node_degree += 1
43 |             node_degree = node_degree**2
44 |             sum_squares += node_degree
45 |         return sum_squares
46 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/graph_algorithm.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Callable
 2 | from typing import Any
 3 | 
 4 | import numpy as np
 5 | import numpy.typing as npt
 6 | 
 7 | from .abstract_algorithm import Algorithm
 8 | from .graph.builders.matrix import AdjacencyMatrixBuilder
 9 | from .graph.graph_cpd import GraphCPD
10 | 
11 | 
12 | class GraphAlgorithm(Algorithm):
13 |     def __init__(self, compare_func: Callable[[Any, Any], bool], threshold: float):
14 |         self.compare = compare_func
15 |         self.threshold = threshold
16 | 
17 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
18 |         builder = AdjacencyMatrixBuilder(window, self.compare)
19 |         graph = builder.build_graph()
20 |         cpd = GraphCPD(graph)
21 |         num_cpd: list[int] = cpd.find_changepoint(self.threshold)
22 |         return num_cpd
23 | 
24 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
25 |         builder = AdjacencyMatrixBuilder(window, self.compare)
26 |         graph = builder.build_graph()
27 |         cpd = GraphCPD(graph)
28 |         num_cpd: list[int] = cpd.find_changepoint(self.threshold)
29 |         return len(num_cpd)
30 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/kliep_algorithm.py:
--------------------------------------------------------------------------------
 1 | from typing import cast
 2 | 
 3 | import numpy as np
 4 | import numpy.typing as npt
 5 | from numpy import dtype, float64, ndarray
 6 | 
 7 | from pysatl_cpd.core.algorithms.density.abstracts.density_based_algorithm import DensityBasedAlgorithm
 8 | 
 9 | 
10 | class KliepAlgorithm(DensityBasedAlgorithm):
11 |     """Kullback-Leibler Importance Estimation Procedure (KLIEP) algorithm
12 |     for change point detection.
13 | 
14 |     KLIEP estimates the density ratio between two distributions and uses
15 |     the importance weights for detecting changes in the data distribution.
16 |     """
17 | 
18 |     def __init__(self, bandwidth: float, regularization_coef: float, threshold: float = 1.1):
19 |         """Initialize the KLIEP algorithm.
20 | 
21 |         Args:
22 |             bandwidth (float): bandwidth parameter for density estimation.
23 |             regularization_coef (float): regularization parameter.
24 |             threshold (float, optional): threshold for detecting change points.
25 |             Defaults to 1.1.
26 |         """
27 |         self.bandwidth = bandwidth
28 |         self.regularization_coef = regularization_coef
29 |         self.threshold = np.float64(threshold)
30 | 
31 |     def _loss_function(self, density_ratio: npt.NDArray[np.float64], alpha: npt.NDArray[np.float64]) -> float:
32 |         """Loss function for KLIEP.
33 | 
34 |         Args:
35 |             density_ratio (np.ndarray): estimated density ratio.
36 |             alpha (np.ndarray): coefficients for the density ratio.
37 | 
38 |         Returns:
39 |             float: the computed loss value.
40 |         """
41 |         return -np.mean(density_ratio) + self.regularization_coef * np.sum(alpha**2)
42 | 
43 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
44 |         """Detect the number of change points in the given data window
45 |         using KLIEP.
46 | 
47 |         Args:
48 |             window (Iterable[float]): the data window to detect change points.
49 | 
50 |         Returns:
51 |             int: the number of detected change points.
52 |         """
53 | 
54 |         window_sample = np.array(window)
55 |         weights = self._calculate_weights(
56 |             test_value=window_sample,
57 |             reference_value=window_sample,
58 |             bandwidth=self.bandwidth,
59 |             objective_function=self._loss_function,
60 |         )
61 | 
62 |         return np.count_nonzero(weights > self.threshold)
63 | 
64 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
65 |         """Localize the change points in the given data window using KLIEP.
66 | 
67 |         Args:
68 |             window (Iterable[float]): the data window to localize
69 |             change points.
70 | 
71 |         Returns:
72 |             List[int]: the indices of the detected change points.
73 |         """
74 |         window_sample = np.array(window)
75 |         weights: ndarray[tuple[int, ...], dtype[float64]] = self._calculate_weights(
76 |             test_value=window_sample,
77 |             reference_value=window_sample,
78 |             bandwidth=self.bandwidth,
79 |             objective_function=self._loss_function,
80 |         )
81 | 
82 |         return cast(list[int], np.where(weights > self.threshold)[0].tolist())
83 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/knn/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/abstracts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/algorithms/knn/abstracts/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/abstracts/observation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for abstractions used in heap, needed to clearly distinguish observations made at different times.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from dataclasses import dataclass, field
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | 
15 | @dataclass(order=True)
16 | class Observation:
17 |     """
18 |     Abstraction over observation that consists of the time of the point in time series and the value of it.
19 |     """
20 | 
21 |     time: int
22 |     value: np.float64 | npt.NDArray[np.float64] = field(compare=False)
23 | 
24 | 
25 | @dataclass(order=True)
26 | class Neighbour:
27 |     """
28 |     Abstraction over neighbour that consists of the distance to the main point and the observation-neighbour itself.
29 |     """
30 | 
31 |     distance: float
32 |     observation: Observation
33 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/classifier.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for implementation of classifier based on nearest neighbours for cpd.
  3 | """
  4 | 
  5 | __author__ = "Artemii Patov"
  6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
  7 | __license__ = "SPDX-License-Identifier: MIT"
  8 | 
  9 | import typing as tp
 10 | from math import sqrt
 11 | 
 12 | import numpy as np
 13 | import numpy.typing as npt
 14 | 
 15 | from .graph import KNNGraph
 16 | 
 17 | 
 18 | class KNNClassifier:
 19 |     """
 20 |     The class implementing classifier based on nearest neighbours.
 21 |     """
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         metric: tp.Callable[[np.float64 | npt.NDArray[np.float64], np.float64 | npt.NDArray[np.float64]], float],
 26 |         k: int = 7,
 27 |         delta: float = 1e-12,
 28 |     ) -> None:
 29 |         """
 30 |         Initializes a new instance of KNN classifier for cpd.
 31 | 
 32 |         :param metric: function for calculating distance between points in time series.
 33 |         :param k: number of neighbours in the knn graph relative to each point.
 34 |         Default is 7, which is generally the most optimal value (based on the experiments results).
 35 |         :param delta: delta for comparing float values of the given observations.
 36 |         """
 37 |         self.__k = k
 38 |         self.__metric = metric
 39 |         self.__delta = delta
 40 | 
 41 |         self.__window: npt.NDArray[np.float64] | None = None
 42 |         self.__knn_graph: KNNGraph | None = None
 43 | 
 44 |     def classify(self, window: npt.NDArray[np.float64]) -> None:
 45 |         """Applies classificator to the given sample.
 46 | 
 47 |         :param window: part of global data for finding change points.
 48 |         """
 49 |         self.__window = window
 50 |         self.__knn_graph = KNNGraph(window, self.__metric, self.__k, self.__delta)
 51 |         self.__knn_graph.build()
 52 | 
 53 |     def assess_barrier(self, time: int) -> float:
 54 |         """
 55 |         Calculates quality function in specified point.
 56 | 
 57 |         :param time: index of point in the given sample to calculate statistics relative to it.
 58 |         """
 59 |         assert self.__window is not None
 60 |         window_size = len(self.__window)
 61 | 
 62 |         assert self.__knn_graph is not None, "Graph should not be None."
 63 | 
 64 |         k = self.__k
 65 |         n = window_size
 66 |         n_1 = time
 67 |         n_2 = n - time
 68 | 
 69 |         if n <= k:
 70 |             # Unable to analyze sample due to its size.
 71 |             # Returns negative number that will be less than the statistics in this case,
 72 |             # but big enough not to spoil overall statistical picture.
 73 |             return -k
 74 | 
 75 |         h = 4 * (n_1 - 1) * (n_2 - 1) / ((n - 2) * (n - 3))
 76 | 
 77 |         sum_1 = (1 / n) * sum(
 78 |             self.__knn_graph.check_for_neighbourhood(j, i)
 79 |             for i in range(window_size)
 80 |             for j in self.__knn_graph.get_neighbours(i)
 81 |         )
 82 | 
 83 |         sum_2 = (1 / n) * (
 84 |             2
 85 |             * sum(
 86 |                 self.__knn_graph.check_for_neighbourhood(m, i)
 87 |                 for j in range(window_size)
 88 |                 for i in self.__knn_graph.get_neighbours(j)
 89 |                 for m in range(j + 1, window_size)
 90 |             )
 91 |             + sum(len(self.__knn_graph.get_neighbours(i)) for i in range(window_size))
 92 |         )
 93 | 
 94 |         expectation = 4 * k * n_1 * n_2 / (n - 1)
 95 |         variance = (expectation / k) * (h * (sum_1 + k - (2 * k**2 / (n - 1))) + (1 - h) * (sum_2 - k**2))
 96 |         deviation = sqrt(variance)
 97 | 
 98 |         permutation = np.arange(window_size)
 99 |         random_variable_value = self.__calculate_random_variable(permutation, time, window_size)
100 | 
101 |         if deviation == 0:
102 |             # if the deviation is zero, it likely means that the time is 1 or the data is constant.
103 |             # In this case we cannot detect any change-points.
104 |             # Thus, we can return negative number that will be less than the statistics in this case.
105 |             return -k
106 | 
107 |         statistics = -(random_variable_value - expectation) / deviation
108 | 
109 |         return statistics
110 | 
111 |     def __calculate_random_variable(self, permutation: npt.NDArray[np.intp], t: int, window_size: int) -> int:
112 |         """
113 |         Calculates a random variable from a permutation and a fixed point.
114 | 
115 |         :param permutation: random permutation of observations.
116 |         :param t: fixed point that splits the permutation.
117 |         :return: value of the random variable.
118 |         """
119 | 
120 |         def b(i: int, j: int) -> bool:
121 |             pi = int(permutation[i])
122 |             pj = int(permutation[j])
123 |             return (pi <= t < pj) or (pj <= t < pi)
124 | 
125 |         assert self.__knn_graph is not None
126 |         s = sum(
127 |             (self.__knn_graph.check_for_neighbourhood(i, j) + self.__knn_graph.check_for_neighbourhood(j, i)) * b(i, j)
128 |             for i in range(window_size)
129 |             for j in range(window_size)
130 |         )
131 | 
132 |         return s
133 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/graph.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of neareset neighbours graph.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import typing as tp
10 | from collections import deque
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | from .abstracts.observation import Observation
16 | from .heap import NNHeap
17 | 
18 | 
19 | class KNNGraph:
20 |     """
21 |     The class implementing nearest neighbours graph.
22 |     """
23 | 
24 |     def __init__(
25 |         self,
26 |         window: npt.NDArray[np.float64],
27 |         metric: tp.Callable[[np.float64 | npt.NDArray[np.float64], np.float64 | npt.NDArray[np.float64]], float],
28 |         k: int = 7,
29 |         delta: float = 1e-12,
30 |     ) -> None:
31 |         """
32 |         Initializes a new instance of KNN graph.
33 | 
34 |         :param window: an overall sample the graph is based on.
35 |         :param metric: function for calculating the distance between two points in time series.
36 |         :param k: number of neighbours in the knn graph relative to each point.
37 |         Default is 7, which is generally the most optimal value (based on the experiments results).
38 |         :param delta: delta for comparing float values of the given observations.
39 |         """
40 |         self.__window: list[Observation] = [Observation(t, v) for t, v in enumerate(window)]
41 |         self.__metric: tp.Callable[[Observation, Observation], float] = lambda obs1, obs2: metric(
42 |             obs1.value, obs2.value
43 |         )
44 |         self.__k = k
45 |         self.__delta = delta
46 | 
47 |         self.__graph: deque[NNHeap] = deque(maxlen=len(self.__window))
48 | 
49 |     def build(self) -> None:
50 |         """
51 |         Builds KNN graph according to the given parameters.
52 |         """
53 |         for i in range(len(self.__window)):
54 |             heap = NNHeap(self.__k, self.__metric, self.__window[-i - 1], self.__delta)
55 |             heap.build(self.__window)
56 |             self.__graph.appendleft(heap)
57 | 
58 |     def get_neighbours(self, obs_index: int) -> list[int]:
59 |         return self.__graph[obs_index].get_neighbours_indices()
60 | 
61 |     def check_for_neighbourhood(self, first_index: int, second_index: int) -> bool:
62 |         """
63 |         Checks if the second observation is among the k nearest neighbours of the first observation.
64 | 
65 |         :param first_index: index of main observation.
66 |         :param second_index: index of possible neighbour.
67 |         :return: true if the second point is the neighbour of the first one, false otherwise.
68 |         """
69 |         neighbour = self.__window[second_index]
70 |         return self.__graph[first_index].find_in_heap(neighbour)
71 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn/heap.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of nearest neighbours heap.
 3 | """
 4 | 
 5 | __author__ = "Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import heapq
10 | import typing as tp
11 | from math import isclose
12 | 
13 | from .abstracts.observation import Neighbour, Observation
14 | 
15 | 
16 | class NNHeap:
17 |     """
18 |     The class implementing nearest neighbours heap --- helper abstraction for KNN graph.
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         size: int,
24 |         metric: tp.Callable[[Observation, Observation], float],
25 |         main_observation: Observation,
26 |         delta: float,
27 |     ) -> None:
28 |         """
29 |         Initializes a new instance of NNHeap.
30 | 
31 |         :param size: size of the heap.
32 |         :param metric: function for calculating distance between two observations.
33 |         :param main_observation: the central point relative to which the nearest neighbours are sought.
34 |         :param delta: delta for comparing float values of the given observations.
35 |         """
36 |         self.__size = size
37 |         self.__metric = metric
38 |         self.__main_observation = main_observation
39 | 
40 |         self.__heap: list[Neighbour] = []
41 |         self.__delta = delta
42 | 
43 |     def build(self, neighbours: list[Observation]) -> None:
44 |         """
45 |         Builds a nearest neighbour heap relative to the main observation with the given neighbours.
46 | 
47 |         :param neighbours: list of neighbours.
48 |         """
49 |         for neighbour in neighbours:
50 |             self.__add(neighbour)
51 | 
52 |     def find_in_heap(self, observation: Observation) -> bool:
53 |         """
54 |         Checks if the given observation is among the nearest neighbours of the main observation.
55 | 
56 |         :param observation: observation to test.
57 |         """
58 | 
59 |         def predicate(x: Neighbour) -> bool:
60 |             return isclose(self.__metric(x.observation, observation), 0.0, rel_tol=self.__delta) and (
61 |                 x.observation.time == observation.time
62 |             )
63 | 
64 |         return any(predicate(i) for i in self.__heap)
65 | 
66 |     def get_neighbours_indices(self) -> list[int]:
67 |         return [n.observation.time for n in self.__heap]
68 | 
69 |     def __add(self, observation: Observation) -> None:
70 |         """
71 |         Adds observation to heap.
72 | 
73 |         :param observation: observation to add.
74 |         """
75 |         if observation is self.__main_observation:
76 |             return
77 | 
78 |         # Sign conversion is needed to convert the smallest element heap to the greatest element heap.
79 |         neg_distance = -self.__metric(self.__main_observation, observation)
80 |         neighbour = Neighbour(neg_distance, observation)
81 | 
82 |         if len(self.__heap) == self.__size and neighbour.distance > self.__heap[0].distance:
83 |             heapq.heapreplace(self.__heap, neighbour)
84 |         elif len(self.__heap) < self.__size:
85 |             heapq.heappush(self.__heap, neighbour)
86 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/knn_algorithm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module for implementation of CPD algorithm based on knn classification.
  3 | """
  4 | 
  5 | __author__ = "Artemii Patov"
  6 | __copyright__ = "Copyright (c) 2024 Artemii Patov"
  7 | __license__ = "SPDX-License-Identifier: MIT"
  8 | 
  9 | import typing as tp
 10 | 
 11 | import numpy as np
 12 | import numpy.typing as npt
 13 | 
 14 | from pysatl_cpd.core.algorithms.abstract_algorithm import Algorithm
 15 | from pysatl_cpd.core.algorithms.classification.abstracts.istatistic_test import TestStatistic
 16 | from pysatl_cpd.core.algorithms.knn.classifier import KNNClassifier
 17 | 
 18 | 
 19 | class KNNAlgorithm(Algorithm):
 20 |     """
 21 |     The class implementing change point detection algorithm based on k-NN classifier. Works only with non-constant data.
 22 |     """
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         distance_func: tp.Callable[[np.float64 | npt.NDArray[np.float64], np.float64 | npt.NDArray[np.float64]], float],
 27 |         test_statistic: TestStatistic,
 28 |         indent_coeff: float,
 29 |         k: int = 7,
 30 |         delta: float = 1e-12,
 31 |     ) -> None:
 32 |         """
 33 |         Initializes a new instance of k-NN based change point detection algorithm.
 34 | 
 35 |         :param distance_func: function for calculating the distance between two points in time series.
 36 |         :param test_statistic: Criterion to separate change points from other points in sample.
 37 |         :param indent_coeff: Coefficient for evaluating indent from window borders.
 38 |         The indentation is calculated by multiplying the given coefficient by the size of window.
 39 |         :param k: number of neighbours in the knn graph relative to each point.
 40 |         Default is 7, which is generally the most optimal value (based on the experiments results).
 41 |         :param delta: delta for comparing float values of the given observations.
 42 |         """
 43 |         self.__test_statistic = test_statistic
 44 | 
 45 |         self.__shift_coeff = indent_coeff
 46 |         self.__classifier = KNNClassifier(distance_func, k, delta)
 47 | 
 48 |         self.__change_points: list[int] = []
 49 |         self.__change_points_count = 0
 50 | 
 51 |     @property
 52 |     def test_statistic(self) -> TestStatistic:
 53 |         return self.__test_statistic
 54 | 
 55 |     @test_statistic.setter
 56 |     def test_statistic(self, test_statistic: TestStatistic) -> None:
 57 |         self.__test_statistic = test_statistic
 58 | 
 59 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
 60 |         """Finds change points in window.
 61 | 
 62 |         :param window: part of global data for finding change points.
 63 |         :return: the number of change points in the window.
 64 |         """
 65 |         self.__process_data(window)
 66 |         return self.__change_points_count
 67 | 
 68 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
 69 |         """Finds coordinates of change points (localizes them) in window.
 70 | 
 71 |         :param window: part of global data for finding change points.
 72 |         :return: list of window change points.
 73 |         """
 74 |         self.__process_data(window)
 75 |         return self.__change_points.copy()
 76 | 
 77 |     def __process_data(self, window: npt.NDArray[np.float64]) -> None:
 78 |         """
 79 |         Processes a window of data to detect/localize all change points depending on working mode.
 80 | 
 81 |         :param window: part of global data for change points analysis.
 82 |         """
 83 |         sample_size = len(window)
 84 |         if sample_size == 0:
 85 |             return
 86 | 
 87 |         self.__classifier.classify(window)
 88 | 
 89 |         # Examining each point.
 90 |         # Boundaries are always change points.
 91 |         first_point = int(sample_size * self.__shift_coeff)
 92 |         last_point = int(sample_size * (1 - self.__shift_coeff))
 93 |         assessments = []
 94 | 
 95 |         for time in range(first_point, last_point):
 96 |             quality = self.__classifier.assess_barrier(time)
 97 |             assessments.append(quality)
 98 | 
 99 |         change_points = self.__test_statistic.get_change_points(assessments)
100 | 
101 |         # Shifting change points coordinates according to their place in window.
102 |         self.__change_points = list(map(lambda x: x + first_point, change_points))
103 |         self.__change_points_count = len(change_points)
104 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/online_algorithm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for online change point detection algorithm's interface.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from typing import Optional, Protocol
10 | 
11 | import numpy as np
12 | import numpy.typing as npt
13 | 
14 | 
15 | class OnlineAlgorithm(Protocol):
16 |     """
17 |     Protocol for online change point detection algorithm's interface.
18 |     """
19 | 
20 |     def detect(self, observation: np.float64 | npt.NDArray[np.float64]) -> bool:
21 |         """
22 |         Method for a step of detection of a change point.
23 |         :param observation: new observation of a time series.
24 |         :return: bool observation whether a change point was detected after processing the new observation.
25 |         """
26 |         ...
27 | 
28 |     def localize(self, observation: np.float64 | npt.NDArray[np.float64]) -> Optional[int]:
29 |         """
30 |         Method for a step of localization of a change point.
31 |         :param observation: new observation of a time series
32 |         :return: absolute location of a change point, acquired after processing the new observation,
33 |         or None if there wasn't any.
34 |         """
35 |         ...
36 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/algorithms/rulsif_algorithm.py:
--------------------------------------------------------------------------------
 1 | from typing import cast
 2 | 
 3 | import numpy as np
 4 | import numpy.typing as npt
 5 | 
 6 | from pysatl_cpd.core.algorithms.density.abstracts.density_based_algorithm import DensityBasedAlgorithm
 7 | 
 8 | 
 9 | class RulsifAlgorithm(DensityBasedAlgorithm):
10 |     """Relative Unconstrained Least-Squares Importance Fitting (RULSIF)
11 |     algorithm for change point detection.
12 | 
13 |     RULSIF estimates the density ratio between two distributions and uses
14 |     the importance weights for detecting changes in the data distribution.
15 |     """
16 | 
17 |     def __init__(self, bandwidth: float, regularization_coef: float, threshold: float = 1.1):
18 |         """Initialize the RULSIF algorithm.
19 | 
20 |         Args:
21 |             bandwidth (float): bandwidth parameter for density estimation.
22 |             regularization_coef (float): regularization parameter.
23 |             threshold (float, optional): threshold for detecting change points.
24 |             Defaults to 1.1.
25 |         """
26 |         self.bandwidth = bandwidth
27 |         self.regularization_coef = regularization_coef
28 |         self.threshold = threshold
29 | 
30 |     def _loss_function(self, density_ratio: npt.NDArray[np.float64], alpha: npt.NDArray[np.float64]) -> float:
31 |         """Loss function for RULSIF.
32 | 
33 |         Args:
34 |             density_ratio (np.ndarray): estimated density ratio.
35 |             alpha (np.ndarray): coefficients for the density ratio.
36 | 
37 |         Returns:
38 |             float: the computed loss value.
39 |         """
40 |         return np.mean((density_ratio - 1) ** 2) + self.regularization_coef * np.sum(alpha**2)
41 | 
42 |     def detect(self, window: npt.NDArray[np.float64]) -> int:
43 |         """Detect the number of change points in the given data window
44 |         using RULSIF.
45 | 
46 |         Args:
47 |             window (Iterable[float]): the data window to detect change points.
48 | 
49 |         Returns:
50 |             int: the number of detected change points.
51 |         """
52 |         window_sample = np.array(window)
53 |         weights = self._calculate_weights(
54 |             test_value=window_sample,
55 |             reference_value=window_sample,
56 |             bandwidth=self.bandwidth,
57 |             objective_function=self._loss_function,
58 |         )
59 | 
60 |         return np.count_nonzero(weights > self.threshold)
61 | 
62 |     def localize(self, window: npt.NDArray[np.float64]) -> list[int]:
63 |         """Localize the change points in the given data window using RULSIF.
64 | 
65 |         Args:
66 |             window (Iterable[float]): the data window to localize change points.
67 | 
68 |         Returns:
69 |             List[int]: the indices of the detected change points.
70 |         """
71 |         window_sample = np.array(window)
72 |         weights = self._calculate_weights(
73 |             test_value=window_sample,
74 |             reference_value=window_sample,
75 |             bandwidth=self.bandwidth,
76 |             objective_function=self._loss_function,
77 |         )
78 | 
79 |         return cast(list[int], np.where(weights > self.threshold)[0].tolist())
80 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/cpd_core.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Artem Romanyuk, Vladimir Kutuev"
 2 | __copyright__ = "Copyright (c) 2025 PySATL project"
 3 | __license__ = "SPDX-License-Identifier: MIT"
 4 | 
 5 | from .algorithms.abstract_algorithm import Algorithm
 6 | from .scrubber.abstract import Scrubber
 7 | 
 8 | 
 9 | class CpdCore:
10 |     """Change Point Detection core"""
11 | 
12 |     def __init__(
13 |         self,
14 |         scrubber: Scrubber,
15 |         algorithm: Algorithm,
16 |     ) -> None:
17 |         """Change Point Detection core algorithm
18 | 
19 |         :param scrubber: scrubber for dividing data into windows
20 |             and subsequent processing of data windows
21 |             by change point detection algorithms
22 |         :param algorithm: change point detection algorithm
23 |         :return: list of found change points
24 |         """
25 |         self.scrubber = scrubber
26 |         self.algorithm = algorithm
27 | 
28 |     def localize(self) -> list[int]:
29 |         """Find change points
30 | 
31 |         :return: list of change points
32 |         """
33 |         change_points: list[int] = []
34 |         for window in self.scrubber.__iter__():
35 |             window_change_points = self.algorithm.localize(window.values)
36 |             change_points.extend(map(lambda i: window.indices[i], window_change_points))
37 |         return change_points
38 | 
39 |     def detect(self) -> int:
40 |         """Count change points
41 | 
42 |         :return: number of change points
43 |         """
44 |         change_points_count = 0
45 |         for window in self.scrubber.__iter__():
46 |             change_points_count += self.algorithm.detect(window.values)
47 |         return change_points_count
48 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/online_cpd_core.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for online-CPD core, which presents access to algorithms as iterators over provdied data.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from collections.abc import Iterator
10 | 
11 | from pysatl_cpd.core.algorithms.online_algorithm import OnlineAlgorithm
12 | from pysatl_cpd.core.scrubber.data_providers import DataProvider
13 | 
14 | 
15 | class OnlineCpdCore:
16 |     """
17 |     Class that presents online CPD-algorithm as detection or localization iterator over the provided data.
18 |     """
19 | 
20 |     def __init__(self, algorithm: OnlineAlgorithm, data_provider: DataProvider) -> None:
21 |         self.algorithm = algorithm
22 |         self.data_provider = data_provider
23 | 
24 |     def detect(self) -> Iterator[bool]:
25 |         """
26 |         Iteratively tries to detect a change point in the provided data.
27 |         :return: whether a change point after processed observation was detected.
28 |         """
29 |         for observation in self.data_provider:
30 |             yield self.algorithm.detect(observation)
31 | 
32 |     def localize(self) -> Iterator[int | None]:
33 |         """
34 |         Iteratively tries to localize a change point in the provided data.
35 |         :return: change point location, if it was successfully localized, or None, otherwise.
36 |         """
37 |         for observation in self.data_provider:
38 |             yield self.algorithm.localize(observation)
39 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/problem.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class CpdProblem:
 6 |     """Specification of the solving problem
 7 | 
 8 |     :param to_localize: is it necessary to localize change points, defaults to False
 9 |     """
10 | 
11 |     to_localize: bool = True
12 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/scrubber/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/core/scrubber/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/core/scrubber/abstract.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for Abstract Scrubber description.
 3 | """
 4 | 
 5 | __author__ = "Romanyuk Artem, Vladimir Kutuev"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | 
10 | from abc import ABC, abstractmethod
11 | from collections.abc import Iterator
12 | from dataclasses import dataclass
13 | 
14 | import numpy as np
15 | import numpy.typing as npt
16 | 
17 | from pysatl_cpd.core.scrubber.data_providers import DataProvider
18 | 
19 | 
20 | @dataclass
21 | class ScrubberWindow:
22 |     values: npt.NDArray[np.float64]
23 |     indices: list[int]
24 | 
25 | 
26 | class Scrubber(ABC):
27 |     """A scrubber for dividing data into windows
28 |     and subsequent processing of data windows
29 |     by change point detection algorithms
30 |     """
31 | 
32 |     def __init__(self, data_provider: DataProvider) -> None:
33 |         """A scrubber for dividing data into windows
34 |         and subsequent processing of data windows
35 |         by change point detection algorithms
36 | 
37 |         """
38 |         self._data_provider = data_provider
39 | 
40 |     @abstractmethod
41 |     def __iter__(self) -> Iterator[ScrubberWindow]:
42 |         """Function for dividing data into parts to feed into the change point detection algorithm
43 | 
44 |         :return: Iterator of data windows for change point detection algorithm
45 |         """
46 |         ...
47 | 
48 |     @property
49 |     def data(self) -> Iterator[np.float64] | Iterator[npt.NDArray[np.float64]]:
50 |         return iter(self._data_provider)
51 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/scrubber/data_providers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module contains classes providing data from different sources to scrubbers.
 3 | """
 4 | 
 5 | __author__ = "Vladimir Kutuev"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from collections.abc import Iterator
10 | from typing import Protocol, runtime_checkable
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | from pysatl_cpd.labeled_data import LabeledCpdData
16 | 
17 | 
18 | @runtime_checkable
19 | class DataProvider(Protocol):
20 |     """Interface for abstracting the scrubber from the data source and its format"""
21 | 
22 |     def __iter__(self) -> Iterator[np.float64] | Iterator[npt.NDArray[np.float64]]:
23 |         """
24 |         :return: an iterator over the data
25 |         """
26 |         ...
27 | 
28 | 
29 | class ListUnivariateProvider(DataProvider):
30 |     """Provides data from list of floats"""
31 | 
32 |     def __init__(self, data: list[float]) -> None:
33 |         self._data = data
34 | 
35 |     def __iter__(self) -> Iterator[np.float64] | Iterator[npt.NDArray[np.float64]]:
36 |         return map(np.float64, self._data)
37 | 
38 | 
39 | class ListMultivariateProvider(DataProvider):
40 |     """Provides data from list of NumPy ndarrays"""
41 | 
42 |     def __init__(self, data: list[npt.NDArray[np.float64]]) -> None:
43 |         self._data = data
44 | 
45 |     def __iter__(self) -> Iterator[np.float64] | Iterator[npt.NDArray[np.float64]]:
46 |         return iter(self._data)
47 | 
48 | 
49 | class LabeledDataProvider(DataProvider):
50 |     """Provides data from LabeledData instance"""
51 | 
52 |     def __init__(self, data: LabeledCpdData) -> None:
53 |         self._data = data.raw_data
54 | 
55 |     def __iter__(self) -> Iterator[np.float64] | Iterator[npt.NDArray[np.float64]]:
56 |         return iter(self._data)
57 | 


--------------------------------------------------------------------------------
/pysatl_cpd/core/scrubber/linear.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for implementation of Linear Scrubber.
 3 | """
 4 | 
 5 | __author__ = "Vladimir Kutuev, Artemii Patov"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | from collections.abc import Iterator
10 | from itertools import islice
11 | 
12 | import numpy as np
13 | import numpy.typing as npt
14 | 
15 | from pysatl_cpd.core.scrubber.data_providers import DataProvider
16 | 
17 | from .abstract import Scrubber, ScrubberWindow
18 | 
19 | 
20 | class LinearScrubber(Scrubber):
21 |     """A linear scrubber for dividing data into windows by moving them through data"""
22 | 
23 |     def __init__(
24 |         self,
25 |         data_provider: DataProvider,
26 |         window_length: int = 100,
27 |         shift_factor: float = 1.0 / 3.0,
28 |     ):
29 |         """A linear scrubber for dividing data into windows by moving them through data
30 | 
31 |         :param window_length: length of data window
32 |         :param shift_factor: how far will the window move relative to the length
33 |         """
34 |         super().__init__(data_provider)
35 |         self._window_length = window_length
36 |         self._shift_factor = shift_factor
37 | 
38 |     def __iter__(self) -> Iterator[ScrubberWindow]:
39 |         window_start = 0
40 |         shift = max(1, int(self._window_length * self._shift_factor))
41 |         provided_data_it = iter(self._data_provider)
42 |         next_slice = np.array(list(islice(provided_data_it, self._window_length)))
43 |         window_data: npt.NDArray[np.float64] = np.array([])
44 |         while next_slice.size > 0:
45 |             window_data = (
46 |                 np.concat((np.delete(window_data, np.s_[:shift], 0), next_slice), axis=0)
47 |                 if len(window_data) > 0
48 |                 else next_slice
49 |             )
50 |             window_end = window_start + min(self._window_length, len(window_data))
51 |             yield ScrubberWindow(window_data, list(range(window_start, window_end)))
52 |             window_start += shift
53 |             window_end += shift
54 |             next_slice = np.array(list(islice(provided_data_it, shift)))
55 | 


--------------------------------------------------------------------------------
/pysatl_cpd/cpd_solver.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module contains class for solving change point detection problem.
 3 | """
 4 | 
 5 | __author__ = "Aleksei Ivanov, Artem Romanyuk, Vladimir Kutuev"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import time
10 | 
11 | from .core.algorithms.abstract_algorithm import Algorithm
12 | from .core.cpd_core import CpdCore
13 | from .core.problem import CpdProblem
14 | from .core.scrubber.abstract import Scrubber
15 | from .core.scrubber.data_providers import LabeledDataProvider
16 | from .icpd_solver import CpdLocalizationResults, ICpdSolver
17 | from .labeled_data import LabeledCpdData
18 | 
19 | 
20 | class CpdSolver(ICpdSolver):
21 |     """Class, that grants a convenient interface to
22 |     work with CPD algorithms"""
23 | 
24 |     def __init__(
25 |         self,
26 |         scenario: CpdProblem,
27 |         algorithm: Algorithm,
28 |         algorithm_input: Scrubber | tuple[LabeledCpdData, type[Scrubber]],
29 |     ) -> None:
30 |         """pysatl_cpd object constructor
31 | 
32 |         :param: scenario: scenario specify
33 |         :param: algorithm: CPD algorithm, that will search for change points
34 |         :param: scrubber: scrubber object for splitting data into parts
35 |         """
36 |         self._labeled_data: LabeledCpdData | None = None
37 |         self._cpd_core: CpdCore
38 |         match algorithm_input:
39 |             case Scrubber() as scrubber:
40 |                 self._cpd_core = CpdCore(scrubber, algorithm)
41 |             case (data, scrubber_type):
42 |                 self._labeled_data = data
43 |                 self._cpd_core = CpdCore(scrubber_type(LabeledDataProvider(data)), algorithm)
44 | 
45 |         self._scenario = scenario
46 | 
47 |     def run(self) -> CpdLocalizationResults | int:
48 |         """Execute CPD algorithm and return container with its results
49 | 
50 |         :return: CpdLocalizationResults object, containing algo result CP and expected CP if needed,
51 |         or number of detected change points.
52 |         """
53 |         time_start = time.perf_counter()
54 |         if not self._scenario.to_localize:
55 |             return self._cpd_core.detect()
56 |         algo_results = self._cpd_core.localize()
57 |         time_end = time.perf_counter()
58 |         expected_change_points: list[int] | None = None
59 |         if isinstance(self._labeled_data, LabeledCpdData):
60 |             expected_change_points = self._labeled_data.change_points
61 |         data = self._cpd_core.scrubber.data
62 |         return CpdLocalizationResults(data, algo_results, expected_change_points, time_end - time_start)
63 | 


--------------------------------------------------------------------------------
/pysatl_cpd/generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/pysatl_cpd/generator/__init__.py


--------------------------------------------------------------------------------
/pysatl_cpd/generator/dataset_description.py:
--------------------------------------------------------------------------------
  1 | from io import StringIO
  2 | from itertools import accumulate
  3 | 
  4 | from .distributions import Distribution
  5 | 
  6 | 
  7 | class SampleDescription:
  8 |     """Contains dataset description:
  9 | 
 10 |     * sub-samples lengths;
 11 |     * sub-samples distributions.
 12 | 
 13 |     Also can represent it in AsciiDoc format.
 14 |     """
 15 | 
 16 |     _name: str
 17 |     _samples_length: list[int]
 18 |     _samples_distributions: list[Distribution]
 19 | 
 20 |     def __init__(
 21 |         self,
 22 |         name: str,
 23 |         samples_length: list[int],
 24 |         samples_distributions: list[Distribution],
 25 |     ) -> None:
 26 |         """
 27 |         Creates new DatasetDescription instance.
 28 | 
 29 |         :param name: Name for the sample.
 30 |         :param samples_length: List of sub-samples length.
 31 |         :param samples_distributions: List of sub-samples distributions.
 32 |         """
 33 |         self._name = name
 34 |         self._samples_length = samples_length
 35 |         self._samples_distributions = samples_distributions
 36 |         assert len(self._samples_length) == len(self._samples_distributions)
 37 | 
 38 |     @property
 39 |     def name(self) -> str:
 40 |         return self._name
 41 | 
 42 |     @property
 43 |     def changepoints(self) -> list[int]:
 44 |         return list(accumulate(self._samples_length))[:-1]
 45 | 
 46 |     @property
 47 |     def length(self) -> list[int]:
 48 |         return self._samples_length
 49 | 
 50 |     @property
 51 |     def distributions(self) -> list[Distribution]:
 52 |         return self._samples_distributions
 53 | 
 54 |     def to_asciidoc(self, image_path: str | None = None) -> str:
 55 |         """
 56 |         Converts `DatasetDescription` instance to string in AsciiDoc format.
 57 |         This description contain information about sample length, sub-samples lengths and distributions,
 58 |         changepoints indices in sample.
 59 | 
 60 |         Example
 61 |         -------
 62 |         .. code-block::
 63 | 
 64 |             = Sample 20-normal-0-1-20-normal-10-1
 65 | 
 66 |             [horizontal]
 67 |             Sample length:: 40
 68 |             Sub-samples lengths:: [20, 20]
 69 |             Change points:: [20]
 70 | 
 71 |             == Distributions
 72 | 
 73 |             . normal
 74 |             [horizontal]
 75 |             mean:: 0.0
 76 |             variance:: 1.0
 77 |             . normal
 78 |             [horizontal]
 79 |             mean:: 10.0
 80 |             variance:: 1.0
 81 | 
 82 |         :return: Dataset description string in AsciiDoc format.
 83 |         """
 84 |         description = StringIO()
 85 |         description.write(f"= Sample {self._name}\n\n")
 86 |         description.write("[horizontal]\n")
 87 |         description.write(f"Sample length:: {sum(self._samples_length)}\n")
 88 |         description.write(f"Subsamples lengths:: {self._samples_length}\n")
 89 |         description.write(f"Change points:: {self.changepoints}\n\n")
 90 |         description.write("== Distributions\n\n")
 91 |         for i in range(len(self._samples_length)):
 92 |             distr = self._samples_distributions[i]
 93 |             description.write(f". {distr.name}\n")
 94 |             description.write("[horizontal]\n")
 95 |             for k, v in distr.params.items():
 96 |                 description.write(f"{k}:: {v}\n")
 97 | 
 98 |         if image_path:
 99 |             description.write("\n")
100 |             description.write(f"image::{image_path}[Sample]\n")
101 | 
102 |         return description.getvalue()
103 | 
104 | 
105 | class DatasetDescriptionBuilder:
106 |     """Builder for `DatasetDescription` instance."""
107 | 
108 |     def __init__(self) -> None:
109 |         """Creates new DatasetDescriptionBuilder empty instance."""
110 |         self._distributions: dict[int, tuple[int, Distribution]] = dict()
111 |         self._name: str | None = None
112 | 
113 |     def set_name(self, name: str) -> None:
114 |         """Set name for dataset
115 | 
116 |         :param name: name for dataset"""
117 |         self._name = name
118 | 
119 |     def add_distribution(
120 |         self, distribution_type: str, distribution_length: int, distribution_parameters: dict[str, str]
121 |     ) -> None:
122 |         """Add new distribution to dataset
123 | 
124 |         :param distribution_type: type of distribution
125 |         :param distribution_length: length of distribution in dataset
126 |         :param distribution_parameters: special distribution parameters"""
127 |         distribution_index = len(self._distributions)
128 |         distribution = Distribution.from_str(distribution_type, distribution_parameters)
129 |         self._distributions[distribution_index] = (distribution_length, distribution)
130 | 
131 |     def build(self) -> SampleDescription:
132 |         """
133 |         Validate parameters and create `DatasetDescription` instance.
134 | 
135 |         :return: New `DatasetDescription` instance.
136 |         """
137 |         assert self._name
138 |         assert len(self._distributions)
139 |         lengths, distributions = zip(*self._distributions.values())
140 |         return SampleDescription(self._name, list(lengths), list(distributions))
141 | 


--------------------------------------------------------------------------------
/pysatl_cpd/generator/generator.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from enum import Enum
 3 | from pathlib import Path
 4 | 
 5 | import numpy as np
 6 | import numpy.typing as npt
 7 | 
 8 | from .config_parser import ConfigParser
 9 | from .distributions import Distribution
10 | from .saver import DatasetSaver
11 | 
12 | 
13 | class Generators(Enum):
14 |     SCIPY = "scipy"
15 | 
16 |     def __str__(self) -> str:
17 |         return self.value
18 | 
19 | 
20 | class DatasetGenerator(ABC):
21 |     """
22 |     An interface for dataset generators using different backends (e.g. SciPy or Numpy)
23 |     to create a sample with a given distributions and lengths.
24 |     """
25 | 
26 |     @abstractmethod
27 |     def generate_sample(self, distributions: list[Distribution], lengths: list[int]) -> npt.NDArray[np.float64]:
28 |         """
29 |         Creates a sample consists of subsamples with given `distributions` and `lengths`.
30 | 
31 |         :param distributions: List of distributions for subsamples.
32 |         :param lengths: List of subsamples lengths.
33 |         :return: Created sample.
34 |         """
35 |         raise NotImplementedError()
36 | 
37 |     @staticmethod
38 |     def get_generator(generator_backend: Generators) -> "DatasetGenerator":
39 |         match generator_backend:
40 |             case Generators.SCIPY:
41 |                 return ScipyDatasetGenerator()
42 |             case _:
43 |                 raise ValueError("Unknown generator")
44 | 
45 |     def generate_datasets(
46 |         self, config_path: Path, saver: DatasetSaver | None = None
47 |     ) -> dict[str, tuple[npt.NDArray[np.float64], list[int]]]:
48 |         """Generate pairs of dataset and change points by config file
49 | 
50 |         :param config_path: path to config file
51 |         :param saver: saver of saving files (if saver is None, then the data does not need to be saved),
52 |          defaults to None
53 | 
54 |         :return: dictionary with names and pairs of dataset and change points
55 |         """
56 |         config_parser: ConfigParser = ConfigParser(config_path)
57 | 
58 |         datasets = dict()
59 | 
60 |         for descr in config_parser:
61 |             sample = self.generate_sample(descr.distributions, descr.length)
62 |             current_point = 0
63 |             change_points = []
64 |             for length in descr.length[:-1]:
65 |                 current_point += length
66 |                 change_points.append(current_point)
67 |             datasets[descr.name] = (sample, change_points)
68 |             if saver:
69 |                 saver.save_sample(sample, descr)
70 |         return datasets
71 | 
72 | 
73 | class ScipyDatasetGenerator(DatasetGenerator):
74 |     """
75 |     Dataset generator using SciPy to create samples.
76 |     """
77 | 
78 |     def generate_sample(self, distributions: list[Distribution], lengths: list[int]) -> npt.NDArray[np.float64]:
79 |         return np.concatenate(
80 |             [distribution.scipy_sample(length) for distribution, length in zip(distributions, lengths)]
81 |         )
82 | 


--------------------------------------------------------------------------------
/pysatl_cpd/generator/saver.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Final
 3 | 
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | import numpy.typing as npt
 7 | 
 8 | from pysatl_cpd.generator.dataset_description import SampleDescription
 9 | 
10 | 
11 | class DatasetSaver:
12 |     """
13 |     Saves samples and descriptions to specified directory.
14 |     """
15 | 
16 |     SAMPLE_DATA: Final[str] = "sample.csv"
17 |     DESCRIPTION: Final[str] = "sample.adoc"
18 |     SAMPLE_IMAGE: Final[str] = "sample.png"
19 |     CHANGEPOINTS_DATA: Final[str] = "changepoints.csv"
20 | 
21 |     _out_dir: Path
22 |     _replace: bool
23 | 
24 |     def __init__(self, out_dir: Path, replace: bool):
25 |         """
26 |         :param out_dir: Directory to save samples and descriptions.
27 |         :param replace: Whether sample should be saved if it already exists.
28 |         """
29 |         if not out_dir.exists():
30 |             out_dir.mkdir()
31 |         self._replace = replace
32 |         self._out_dir = out_dir
33 | 
34 |     def save_sample(self, sample: npt.NDArray[np.float64], description: SampleDescription) -> bool:
35 |         """
36 |         Save sample, list of changepoints, sample plot and AsciiDoc description.
37 | 
38 |         :param sample: Sample to save.
39 |         :param description: Description of the saving `sample`.
40 |         :return: Whether sample and description have been saved to output directory.
41 |         """
42 |         sample_dir: Path = self._out_dir.joinpath(description.name)
43 |         if sample_dir.exists() and not self._replace:
44 |             return False
45 |         if not sample_dir.exists():
46 |             sample_dir.mkdir()
47 |         # Save generated sample
48 |         sample_file: Path = sample_dir.joinpath(DatasetSaver.SAMPLE_DATA)
49 |         np.savetxt(sample_file, sample, delimiter=",")
50 |         # Save changepoints list
51 |         changepoints_file: Path = sample_dir.joinpath(DatasetSaver.CHANGEPOINTS_DATA)
52 |         changepoints: list[int] = description.changepoints
53 |         with open(changepoints_file, "w") as cf:
54 |             for cp in changepoints:
55 |                 cf.write(f"{cp}\n")
56 |         # Save sample plot
57 |         image_file: Path = sample_dir.joinpath(DatasetSaver.SAMPLE_IMAGE)
58 |         plt.plot(sample)
59 |         plt.vlines(x=changepoints, ymin=sample.min(), ymax=sample.max(), colors="orange", ls="--")
60 |         plt.savefig(image_file)
61 |         plt.close()
62 |         # Save description
63 |         description_file: Path = sample_dir.joinpath(DatasetSaver.DESCRIPTION)
64 |         with open(description_file, "w") as df:
65 |             df.write(description.to_asciidoc(DatasetSaver.SAMPLE_IMAGE))
66 | 
67 |         return True
68 | 


--------------------------------------------------------------------------------
/pysatl_cpd/labeled_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Module contains wrapper for generated or labeled dataset.
  3 | """
  4 | 
  5 | __author__ = "Artem Romanyuk, Vladimir Kutuev"
  6 | __copyright__ = "Copyright (c) 2025 PySATL project"
  7 | __license__ = "SPDX-License-Identifier: MIT"
  8 | 
  9 | import os
 10 | from collections.abc import Iterator
 11 | from pathlib import Path
 12 | 
 13 | import numpy as np
 14 | import numpy.typing as npt
 15 | 
 16 | from pysatl_cpd.generator.generator import DatasetGenerator, ScipyDatasetGenerator
 17 | from pysatl_cpd.generator.saver import DatasetSaver
 18 | 
 19 | 
 20 | class LabeledCpdData:
 21 |     """Class for generating and storing labeled data,
 22 |     needed in pysatl_cpd"""
 23 | 
 24 |     def __init__(
 25 |         self,
 26 |         raw_data: npt.NDArray[np.float64],
 27 |         change_points: list[int],
 28 |     ) -> None:
 29 |         """LabeledCPData object constructor
 30 | 
 31 |         :param: raw_data: data, that will be passed into CPD algo
 32 |         :param: change_points: expected results after passing raw_data into CPD algo
 33 |         """
 34 |         self.raw_data = raw_data
 35 |         self.change_points = change_points
 36 | 
 37 |     def __iter__(self) -> Iterator[npt.NDArray[np.float64]]:
 38 |         """labeledCPData iterator"""
 39 |         return self.raw_data.__iter__()
 40 | 
 41 |     def __str__(self) -> str:
 42 |         """Shows main info about LabeledCPData object"""
 43 |         return f"data={self.raw_data}, change_points={self.change_points}"
 44 | 
 45 |     def __len__(self) -> int:
 46 |         return len(self.raw_data)
 47 | 
 48 |     @staticmethod
 49 |     def generate_cp_datasets(
 50 |         config_path: Path,
 51 |         generator: DatasetGenerator = ScipyDatasetGenerator(),
 52 |         to_save: bool = False,
 53 |         output_directory: Path = Path(),
 54 |         to_replace: bool = True,
 55 |     ) -> dict[str, "LabeledCpdData"]:
 56 |         """Method for generating labeled data, that contains CP with specific
 57 |         distribution
 58 | 
 59 |         :param config_path: path to config file
 60 |         :param generator: DataGenerator object, defaults to ScipyDatasetGenerator()
 61 |         :param to_save: is it necessary to save the data, defaults to False
 62 |         :param output_directory: directory to save data, defaults to Path()
 63 |         :param to_replace: is it necessary to replace the files in directory
 64 | 
 65 |         :return: dict of pairs: name, LabeledCPData (pairs of data and change points)"""
 66 |         # maybe create default config
 67 |         if not os.path.exists(config_path):
 68 |             raise ValueError("Incorrect config path")
 69 |         if to_save:
 70 |             datasets = generator.generate_datasets(config_path, DatasetSaver(output_directory, to_replace))
 71 |         else:
 72 |             datasets = generator.generate_datasets(config_path)
 73 |         labeled_data_dict = dict()
 74 |         for name in datasets:
 75 |             data, change_points = datasets[name]
 76 |             labeled_data_dict[name] = LabeledCpdData(data, change_points)
 77 |         return labeled_data_dict
 78 | 
 79 |     @staticmethod
 80 |     def read_generated_datasets(datasets_directory: Path) -> dict[str, "LabeledCpdData"]:
 81 |         """Read already generated datasets from directory
 82 | 
 83 |         :param datasets_directory: directory with datasets
 84 |         :return: dict of pairs: name, LabeledCPData (pairs of data and change points)"""
 85 |         datasets = dict()
 86 |         for dataset_directory in os.scandir(datasets_directory):
 87 |             dataset_files = dict()
 88 |             with os.scandir(dataset_directory) as entries:
 89 |                 for file in entries:
 90 |                     dataset_files[file.name] = file
 91 |             if "changepoints.csv" not in dataset_files or "sample.csv" not in dataset_files:
 92 |                 raise ValueError(f"{datasets_directory} is not datasets directory")
 93 |             with open(dataset_files["sample.csv"]) as sample:
 94 |                 raw_data = sample.readlines()
 95 |                 data: list[npt.NDArray[np.float64]] | npt.NDArray[np.float64]
 96 |                 try:
 97 |                     data = np.array(list(map(np.float64, raw_data)))
 98 |                 except ValueError:
 99 |                     data = np.array([list(map(np.float64, vals.split(","))) for vals in raw_data])
100 |             with open(dataset_files["changepoints.csv"]) as changepoints:
101 |                 change_points = list(map(int, changepoints.readlines()))
102 |             datasets[dataset_directory.name] = LabeledCpdData(data, change_points)
103 |         return datasets
104 | 


--------------------------------------------------------------------------------
/pysatl_cpd/online_cpd_solver.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module contains class for solving change point detection problem with an online CPD algorithm.
 3 | """
 4 | 
 5 | __author__ = "Alexey Tatyanenko"
 6 | __copyright__ = "Copyright (c) 2025 PySATL project"
 7 | __license__ = "SPDX-License-Identifier: MIT"
 8 | 
 9 | import time
10 | 
11 | from pysatl_cpd.core.algorithms.online_algorithm import OnlineAlgorithm
12 | from pysatl_cpd.core.online_cpd_core import OnlineCpdCore
13 | from pysatl_cpd.core.problem import CpdProblem
14 | from pysatl_cpd.core.scrubber.data_providers import DataProvider, LabeledDataProvider
15 | from pysatl_cpd.icpd_solver import CpdLocalizationResults, ICpdSolver
16 | from pysatl_cpd.labeled_data import LabeledCpdData
17 | 
18 | 
19 | class OnlineCpdSolver(ICpdSolver):
20 |     """Class, that grants a convenient interface to
21 |     work with online-CPD algorithms"""
22 | 
23 |     def __init__(
24 |         self,
25 |         scenario: CpdProblem,
26 |         algorithm: OnlineAlgorithm,
27 |         algorithm_input: DataProvider | LabeledCpdData,
28 |     ) -> None:
29 |         """pysatl_cpd object constructor
30 | 
31 |         :param: scenario: scenario specify
32 |         :param: algorithm: online-CPD algorithm, that will search for change points
33 |         :param: algorithm_input: data provider or labeled data to construct corresponding data provider.
34 |         """
35 |         self._labeled_data: LabeledCpdData | None = None
36 |         self._cpd_core: OnlineCpdCore
37 |         match algorithm_input:
38 |             case LabeledCpdData() as data:
39 |                 self._labeled_data = data
40 |                 self._cpd_core = OnlineCpdCore(
41 |                     data_provider=LabeledDataProvider(data),
42 |                     algorithm=algorithm,
43 |                 )
44 |             case DataProvider() as data_provider:
45 |                 self._cpd_core = OnlineCpdCore(
46 |                     data_provider=data_provider,
47 |                     algorithm=algorithm,
48 |                 )
49 | 
50 |         self._scenario = scenario
51 | 
52 |     def run(self) -> CpdLocalizationResults | int:
53 |         """Execute online-CPD algorithm and return container with its results
54 | 
55 |         :return: CpdLocalizationResults object, containing algo result CP and expected CP if needed
56 |         """
57 |         time_start = time.perf_counter()
58 |         if not self._scenario.to_localize:
59 |             return sum(self._cpd_core.detect())
60 | 
61 |         algo_results = [cp for cp in self._cpd_core.localize() if cp is not None]
62 | 
63 |         time_end = time.perf_counter()
64 |         expected_change_points: list[int] | None = None
65 |         if isinstance(self._labeled_data, LabeledCpdData):
66 |             expected_change_points = self._labeled_data.change_points
67 |         data = iter(self._cpd_core.data_provider)
68 |         return CpdLocalizationResults(data, algo_results, expected_change_points, time_end - time_start)
69 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_configs/test_config_1.yml:
--------------------------------------------------------------------------------
 1 | - name: 20-normal-0-1-no-change-point
 2 |   distributions:
 3 |     - type: normal
 4 |       length: 20
 5 |       parameters:
 6 |         mean: 0
 7 |         variance: 1
 8 | - name: 20-exponential-1-no-change-point
 9 |   distributions:
10 |     - type: exponential
11 |       length: 20
12 |       parameters:
13 |         rate: 1
14 | - name: 20-weibull-1-1-no-change-point
15 |   distributions:
16 |     - type: weibull
17 |       length: 20
18 |       parameters:
19 |         shape: 1
20 |         scale: 1
21 | - name: 20-uniform-0-1-no-change-point
22 |   distributions:
23 |     - type: uniform
24 |       length: 20
25 |       parameters:
26 |         min: 0
27 |         max: 1
28 | - name: 20-beta-1-1-no-change-point
29 |   distributions:
30 |     - type: beta
31 |       length: 20
32 |       parameters:
33 |         alpha: 1
34 |         beta: 1
35 | - name: 20-gamma-1-1-no-change-point
36 |   distributions:
37 |     - type: gamma
38 |       length: 20
39 |       parameters:
40 |         alpha: 1
41 |         beta: 1
42 | - name: 20-t-2-no-change-point
43 |   distributions:
44 |     - type: t
45 |       length: 20
46 |       parameters:
47 |         n: 2
48 | - name: 20-lognorm-1-no-change-point
49 |   distributions:
50 |     - type: lognorm
51 |       length: 20
52 |       parameters:
53 |         s: 1
54 | - name: 20-multivariate_normal-0-1-no-change-point
55 |   distributions:
56 |     - type: multivariate_normal
57 |       length: 20
58 |       parameters:
59 |         mean: '["0.0", "1.0"]'
60 | - name: 100-normal-0-1-no-change-point
61 |   distributions:
62 |     - type: normal
63 |       length: 100
64 |       parameters:
65 |         mean: 0
66 |         variance: 1
67 | - name: 20-normal-0-1-20-normal-10-1
68 |   distributions:
69 |     - type: normal
70 |       length: 20
71 |       parameters:
72 |         mean: 0
73 |         variance: 1
74 |     - type: normal
75 |       length: 20
76 |       parameters:
77 |         mean: 10
78 |         variance: 1
79 | - name: 20-multivariate_normal-0-0-20-multivariate_normal-10-10
80 |   distributions:
81 |     - type: multivariate_normal
82 |       length: 20
83 |       parameters:
84 |         mean: '["0", "0"]'
85 |     - type: multivariate_normal
86 |       length: 20
87 |       parameters:
88 |         mean: '["10", "10"]'
89 | 


--------------------------------------------------------------------------------
/tests/test_configs/test_config_exp.yml:
--------------------------------------------------------------------------------
 1 | - name: exp
 2 |   distributions:
 3 |     - type: exponential
 4 |       length: 1000
 5 |       parameters:
 6 |         rate: 2.0
 7 |     - type: beta
 8 |       length: 500
 9 |       parameters:
10 |         alpha:  1.0
11 |         beta:  5.0
12 | 


--------------------------------------------------------------------------------
/tests/test_core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_core/__init__.py


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_core/test_algorithms/__init__.py


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_algorithms_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_core/test_algorithms/test_algorithms_utils/__init__.py


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_algorithms_utils/bayesian/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_core/test_algorithms/test_algorithms_utils/bayesian/__init__.py


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_algorithms_utils/bayesian/test_detectors_and_localizers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from pysatl_cpd.core.algorithms.bayesian.detectors.drop import DropDetector
 5 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
 6 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
 7 | 
 8 | 
 9 | @pytest.fixture(
10 |     params=[pytest.param((ThresholdDetector, 0.8), id="Threshold"), pytest.param((DropDetector, 0.1), id="Drop")],
11 |     scope="function",
12 | )
13 | def detector(request):
14 |     cls, threshold = request.param
15 |     return cls(threshold)
16 | 
17 | 
18 | def generate_test_data(has_cp=True):
19 |     run_length_probs = np.full(100, 1.0)
20 |     run_length_probs[-1] = 50 if has_cp else 500
21 | 
22 |     return run_length_probs / run_length_probs.sum()
23 | 
24 | 
25 | class TestDetectors:
26 |     def test_detection(self, detector):
27 |         before_cp = generate_test_data(has_cp=False)
28 |         after_cp = generate_test_data(has_cp=True)
29 |         print(before_cp[-1], after_cp[-1])
30 |         assert not detector.detect(before_cp), (
31 |             "Detector should not react in case of stable high probability of max run length"
32 |         )
33 |         assert detector.detect(after_cp), (
34 |             "Detector should react in case of significant abrupt drop of probability of max run length"
35 |         )
36 | 
37 |     def test_clear(self, detector):
38 |         cp_data = generate_test_data(has_cp=True)
39 | 
40 |         first_result = detector.detect(cp_data)
41 |         detector.clear()
42 | 
43 |         second_result = detector.detect(cp_data)
44 | 
45 |         assert first_result == second_result, "A state was not cleared correctly"
46 | 
47 | 
48 | class TestDropDetectorSpecific:
49 |     def test_gradual_change(self):
50 |         detector = DropDetector(0.1)
51 |         run_lengths = np.full(100, 1.0)
52 | 
53 |         for value in range(500, 490, -1):
54 |             run_lengths[-1] = value
55 |             data = run_lengths / run_lengths.sum()
56 |             assert not detector.detect(data), "Drop detector should not react on a gradual probability decrease"
57 | 
58 | 
59 | class TestArgmaxLocalizer:
60 |     def test_localization(self):
61 |         change_point = 5
62 |         run_lengths = np.full(11, 0.05)
63 |         run_lengths[change_point] = 0.5
64 |         localizer = ArgmaxLocalizer()
65 |         result = localizer.localize(run_lengths)
66 |         assert result == change_point, f"Expected change at {change_point}, got {result}"
67 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_algorithms_utils/bayesian/test_hazards.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
 5 | 
 6 | 
 7 | class TestConstantHazard:
 8 |     @pytest.mark.parametrize("hazard_rate,max_run_length", [(1.1, 50), (10, 100), (200, 250), (500.325251, 500)])
 9 |     def test_constant_hazard_for_constants(self, hazard_rate, max_run_length):
10 |         constant_hazard = ConstantHazard(hazard_rate)
11 |         run_lengths = np.arange(max_run_length, dtype=np.intp)
12 |         hazard_probs = constant_hazard.hazard(run_lengths)
13 |         assert hazard_probs.shape[0] == max_run_length, (
14 |             f"Expected {max_run_length} probabilities, got {hazard_probs.shape[0]}"
15 |         )
16 |         assert np.all(hazard_probs == 1 / hazard_rate), f"Hazard probabilities must be {1 / hazard_rate}"
17 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_algorithms_utils/bayesian/test_likelihoods.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.exponential_conjugate import (
  5 |     ExponentialConjugate,
  6 |     ExponentialConjugateWithPriorProbability,
  7 | )
  8 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import (
  9 |     GaussianConjugate,
 10 |     GaussianConjugateWithPriorProbability,
 11 | )
 12 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.heuristic_gaussian_vs_exponential import (
 13 |     HeuristicGaussianVsExponential,
 14 | )
 15 | 
 16 | 
 17 | @pytest.fixture(scope="module")
 18 | def set_seed():
 19 |     np.random.seed(42)
 20 | 
 21 | 
 22 | @pytest.fixture(
 23 |     params=[
 24 |         (GaussianConjugate, {"pre_loc": 0, "pre_scale": 1, "post_loc": 5, "post_scale": 2}),
 25 |         (ExponentialConjugate, {"pre_scale": 1 / 0.5, "post_scale": 1 / 2}),
 26 |         (HeuristicGaussianVsExponential, {"pre_scale": 1 / 0.5, "post_loc": 5, "post_scale": 2}),
 27 |     ],
 28 |     ids=["Gaussian", "Exponential", "HeuristicGaussianVsExponential"],
 29 | )
 30 | def likelihood_config(request):
 31 |     return request.param
 32 | 
 33 | 
 34 | @pytest.fixture
 35 | def test_data(likelihood_config, set_seed):
 36 |     likelihood_cls, params = likelihood_config
 37 |     size = 500
 38 |     change_point = 250
 39 | 
 40 |     match likelihood_cls():
 41 |         case GaussianConjugate():
 42 |             data = np.concatenate(
 43 |                 [
 44 |                     np.random.normal(params["pre_loc"], params["pre_scale"], change_point),
 45 |                     np.random.normal(params["post_loc"], params["post_scale"], size - change_point),
 46 |                 ]
 47 |             )
 48 |         case ExponentialConjugate():
 49 |             data = np.concatenate(
 50 |                 [
 51 |                     np.random.exponential(params["pre_scale"], change_point),
 52 |                     np.random.exponential(params["post_scale"], size - change_point),
 53 |                 ]
 54 |             )
 55 |         case HeuristicGaussianVsExponential():
 56 |             data = np.concatenate(
 57 |                 [
 58 |                     np.random.exponential(params["pre_scale"], change_point),
 59 |                     np.random.normal(params["post_loc"], params["post_scale"], size - change_point),
 60 |                 ]
 61 |             )
 62 |         case _:
 63 |             raise ValueError("Unsupported likelihood")
 64 | 
 65 |     return data
 66 | 
 67 | 
 68 | class TestConjugateLikelihood:
 69 |     @pytest.fixture(autouse=True)
 70 |     def setup(self, test_data, likelihood_config):
 71 |         self.likelihood_cls = likelihood_config[0]
 72 |         self.data = test_data
 73 |         self.size = 500
 74 |         self.change_point = 250
 75 |         self.learning_steps = 50
 76 | 
 77 |     def test_learning_and_update(self):
 78 |         likelihood = self.likelihood_cls()
 79 |         likelihood.learn(self.data[: self.learning_steps])
 80 | 
 81 |         metrics = {"after_learn": None, "before_cp": None, "after_cp": None}
 82 | 
 83 |         for time in range(self.learning_steps, self.size):
 84 |             observation = np.float64(self.data[time])
 85 |             pred_probs = likelihood.predict(observation)
 86 | 
 87 |             assert len(pred_probs) == time - self.learning_steps + 1
 88 | 
 89 |             current_mean = np.mean(pred_probs)
 90 |             if time == self.learning_steps + 1:
 91 |                 metrics["after_learn"] = current_mean
 92 |             elif time == self.change_point - 1:
 93 |                 metrics["before_cp"] = current_mean
 94 |             elif time == self.change_point + 1:
 95 |                 metrics["after_cp"] = current_mean
 96 | 
 97 |             likelihood.update(observation)
 98 | 
 99 |         assert not np.isclose(metrics["after_learn"], metrics["before_cp"], atol=0.05)
100 |         assert not np.isclose(metrics["before_cp"], metrics["after_cp"], atol=0.05)
101 | 
102 |     @pytest.mark.parametrize("data_size", [51, 100], ids=["small", "medium"])
103 |     def test_clear(self, data_size):
104 |         likelihood = self.likelihood_cls()
105 |         test_data = self.data[:data_size]
106 | 
107 |         likelihood.learn(test_data[:-2])
108 |         first = likelihood.predict(np.float64(test_data[-1]))
109 | 
110 |         likelihood.clear()
111 |         likelihood.learn(test_data[:-2])
112 |         second = likelihood.predict(np.float64(test_data[-1]))
113 | 
114 |         np.testing.assert_array_equal(first, second)
115 | 
116 | 
117 | class TestPriorProbabilityOfSample:
118 |     @pytest.fixture(autouse=True)
119 |     def setup_teardown(self):
120 |         np.random.seed(42)
121 |         self.data_size = 20
122 | 
123 |     @pytest.fixture(
124 |         params=[("exponential", "normal"), ("normal", "exponential")],
125 |         ids=[
126 |             "exponential data",
127 |             "normal_data",
128 |         ],
129 |     )
130 |     def test_scenario(self, request):
131 |         return request.param
132 | 
133 |     @pytest.fixture
134 |     def datasets(self):
135 |         return {
136 |             "exponential": np.random.exponential(size=self.data_size),
137 |             "normal": np.random.normal(size=self.data_size),
138 |         }
139 | 
140 |     def test_probabilities_of_samples(self, test_scenario, datasets):
141 |         target_likelihood, compared_likelihood = test_scenario
142 |         target_data = datasets[target_likelihood]
143 | 
144 |         target_likelihood = (
145 |             ExponentialConjugateWithPriorProbability()
146 |             if target_likelihood == "exponential"
147 |             else GaussianConjugateWithPriorProbability()
148 |         )
149 |         compare_likelihood = (
150 |             ExponentialConjugateWithPriorProbability()
151 |             if compared_likelihood == "exponential"
152 |             else GaussianConjugateWithPriorProbability()
153 |         )
154 | 
155 |         target_likelihood.learn(target_data)
156 |         compare_likelihood.learn(target_data)
157 | 
158 |         target_prob = target_likelihood.probability_of_learned_prior(target_data)
159 |         compare_prob = compare_likelihood.probability_of_learned_prior(target_data)
160 | 
161 |         assert target_prob > compare_prob, (
162 |             f"{target_likelihood} likelihood should have higher probability "
163 |             f"for {target_likelihood} data than {compare_likelihood} likelihood"
164 |         )
165 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_bayesian_algorithm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
  5 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
  6 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import (
  7 |     GaussianConjugate,
  8 | )
  9 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
 10 | from pysatl_cpd.core.algorithms.bayesian_algorithm import BayesianAlgorithm
 11 | 
 12 | 
 13 | def set_seed():
 14 |     np.random.seed(1)
 15 | 
 16 | 
 17 | def construct_bayesian_algorithm():
 18 |     return BayesianAlgorithm(
 19 |         learning_steps=50,
 20 |         likelihood=GaussianConjugate(),
 21 |         hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
 22 |         detector=ThresholdDetector(threshold=0.04),
 23 |         localizer=ArgmaxLocalizer(),
 24 |     )
 25 | 
 26 | 
 27 | @pytest.fixture(scope="function")
 28 | def data_params():
 29 |     return {
 30 |         "num_of_tests": 10,
 31 |         "size": 500,
 32 |         "change_point": 250,
 33 |         "tolerable_deviation": 25,
 34 |     }
 35 | 
 36 | 
 37 | @pytest.fixture
 38 | def generate_data(data_params):
 39 |     def _generate_data():
 40 |         set_seed()
 41 |         return np.concatenate(
 42 |             [
 43 |                 np.random.normal(loc=0, scale=1, size=data_params["change_point"]),
 44 |                 np.random.normal(loc=5, scale=2, size=data_params["size"] - data_params["change_point"]),
 45 |             ]
 46 |         )
 47 | 
 48 |     return _generate_data
 49 | 
 50 | 
 51 | @pytest.fixture(scope="function")
 52 | def outer_bayesian_algorithm():
 53 |     return construct_bayesian_algorithm()
 54 | 
 55 | 
 56 | @pytest.fixture
 57 | def inner_algorithm_factory():
 58 |     def _factory():
 59 |         return construct_bayesian_algorithm()
 60 | 
 61 |     return _factory
 62 | 
 63 | 
 64 | class TestBayesianAlgorithm:
 65 |     def test_consecutive_detection(self, outer_bayesian_algorithm, generate_data, data_params):
 66 |         for _ in range(data_params["num_of_tests"]):
 67 |             data = generate_data()
 68 |             result = outer_bayesian_algorithm.detect(data)
 69 |             assert result, "There was undetected change point in data"
 70 | 
 71 |     def test_correctness_of_consecutive_detection(
 72 |         self, outer_bayesian_algorithm, inner_algorithm_factory, generate_data, data_params
 73 |     ):
 74 |         for _ in range(data_params["num_of_tests"]):
 75 |             data = generate_data()
 76 |             inner_algorithm = inner_algorithm_factory()
 77 |             outer_result = outer_bayesian_algorithm.detect(data)
 78 |             inner_result = inner_algorithm.detect(data)
 79 |             assert outer_result == inner_result, "Consecutive and independent detection should give same results"
 80 | 
 81 |     def test_consecutive_localization(self, outer_bayesian_algorithm, generate_data, data_params):
 82 |         for _ in range(data_params["num_of_tests"]):
 83 |             data = generate_data()
 84 |             result = outer_bayesian_algorithm.localize(data)
 85 |             assert (
 86 |                 len(result) > 0
 87 |                 and data_params["change_point"] - data_params["tolerable_deviation"]
 88 |                 <= result[0]
 89 |                 <= data_params["change_point"] + data_params["tolerable_deviation"]
 90 |             ), "Incorrect change point localization"
 91 | 
 92 |     def test_correctness_of_consecutive_localization(
 93 |         self, outer_bayesian_algorithm, inner_algorithm_factory, generate_data, data_params
 94 |     ):
 95 |         for _ in range(data_params["num_of_tests"]):
 96 |             data = generate_data()
 97 |             inner_algorithm = inner_algorithm_factory()
 98 |             outer_result = outer_bayesian_algorithm.localize(data)
 99 |             inner_result = inner_algorithm.localize(data)
100 |             assert outer_result == inner_result, "Consecutive and independent localization should give same results"
101 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_bayesian_linear_heuristic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
  5 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
  6 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.heuristic_gaussian_vs_exponential import (
  7 |     HeuristicGaussianVsExponential,
  8 | )
  9 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
 10 | from pysatl_cpd.core.algorithms.bayesian_linear_heuristic import BayesianLinearHeuristic
 11 | from pysatl_cpd.core.algorithms.bayesian_online_algorithm import BayesianOnline
 12 | from pysatl_cpd.core.problem import CpdProblem
 13 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 14 | from pysatl_cpd.online_cpd_solver import OnlineCpdSolver
 15 | 
 16 | 
 17 | def generate_no_change_exponential(rate, n=40000, seed=None):
 18 |     np.random.seed(seed)
 19 |     return np.random.exponential(scale=1 / rate, size=n)
 20 | 
 21 | 
 22 | def generate_no_change_normal(mean, std, n=40000, seed=None):
 23 |     np.random.seed(seed)
 24 |     return np.random.normal(loc=mean, scale=std, size=n)
 25 | 
 26 | 
 27 | def generate_change_exp_to_exp(rate1, rate2, change_point, n=40000, seed=None):
 28 |     np.random.seed(seed)
 29 |     part1 = np.random.exponential(scale=1 / rate1, size=change_point)
 30 |     part2 = np.random.exponential(scale=1 / rate2, size=n - change_point)
 31 |     return np.concatenate([part1, part2])
 32 | 
 33 | 
 34 | def generate_change_norm_to_norm(mean1, std1, mean2, std2, change_point, n=40000, seed=None):
 35 |     np.random.seed(seed)
 36 |     part1 = np.random.normal(loc=mean1, scale=std1, size=change_point)
 37 |     part2 = np.random.normal(loc=mean2, scale=std2, size=n - change_point)
 38 |     return np.concatenate([part1, part2])
 39 | 
 40 | 
 41 | def generate_change_exp_to_norm(rate, mean, std, change_point, n=40000, seed=None):
 42 |     np.random.seed(seed)
 43 |     part1 = np.random.exponential(scale=1 / rate, size=change_point)
 44 |     part2 = np.random.normal(loc=mean, scale=std, size=n - change_point)
 45 |     return np.concatenate([part1, part2])
 46 | 
 47 | 
 48 | def generate_change_norm_to_exp(mean, std, rate, change_point, n=40000, seed=None):
 49 |     np.random.seed(seed)
 50 |     part1 = np.random.normal(loc=mean, scale=std, size=change_point)
 51 |     part2 = np.random.exponential(scale=1 / rate, size=n - change_point)
 52 |     return np.concatenate([part1, part2])
 53 | 
 54 | 
 55 | @pytest.fixture
 56 | def setup_algorithm():
 57 |     base_algorithm = BayesianOnline(
 58 |         learning_sample_size=20,
 59 |         likelihood=HeuristicGaussianVsExponential(),
 60 |         hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
 61 |         detector=ThresholdDetector(threshold=0.04),
 62 |         localizer=ArgmaxLocalizer(),
 63 |     )
 64 |     heuristic_algorithm = BayesianLinearHeuristic(
 65 |         algorithm=base_algorithm, time_before_duplicate_start=275, duplicate_preparation_time=225
 66 |     )
 67 |     return base_algorithm, heuristic_algorithm
 68 | 
 69 | 
 70 | @pytest.mark.parametrize(
 71 |     "data_generator, params, true_cp",
 72 |     [
 73 |         (generate_no_change_exponential, {"rate": 2.0}, None),
 74 |         (generate_no_change_normal, {"mean": 0.0, "std": 1.0}, None),
 75 |         (generate_change_exp_to_exp, {"rate1": 2.0, "rate2": 0.5, "change_point": 10000}, 10000),
 76 |         (
 77 |             generate_change_norm_to_norm,
 78 |             {"mean1": 0.0, "std1": 1.0, "mean2": 5.0, "std2": 1.0, "change_point": 15000},
 79 |             15000,
 80 |         ),
 81 |         (generate_change_exp_to_norm, {"rate": 2.0, "mean": 5.0, "std": 1.0, "change_point": 20000}, 20000),
 82 |         (generate_change_norm_to_exp, {"mean": 0.0, "std": 1.0, "rate": 0.5, "change_point": 25000}, 25000),
 83 |     ],
 84 | )
 85 | def test_cpd_detection(setup_algorithm, data_generator, params, true_cp):
 86 |     _, heuristic_algorithm = setup_algorithm
 87 | 
 88 |     data = data_generator(**params, n=40000, seed=42)
 89 |     data_provider = ListUnivariateProvider(list(data))
 90 | 
 91 |     solver_heuristic = OnlineCpdSolver(
 92 |         scenario=CpdProblem(True), algorithm=heuristic_algorithm, algorithm_input=data_provider
 93 |     )
 94 |     result_heuristic = solver_heuristic.run()
 95 | 
 96 |     if true_cp is None:
 97 |         print(result_heuristic.result)
 98 |         assert len(result_heuristic.result) < len(data) / 500, "There shouldn't be too much change points"
 99 |     else:
100 |         assert any(true_cp - 25 <= cp <= true_cp + 25 for cp in result_heuristic.result), (
101 |             f"No detected change point near {true_cp} in heuristic result"
102 |         )
103 | 
104 | 
105 | def test_time_comparison(setup_algorithm):
106 |     base_algorithm, heuristic_algorithm = setup_algorithm
107 | 
108 |     data = generate_change_exp_to_exp(rate1=2.0, rate2=0.5, change_point=10000, n=40000, seed=42)
109 |     data_provider = ListUnivariateProvider(list(data))
110 | 
111 |     solver_heuristic = OnlineCpdSolver(
112 |         scenario=CpdProblem(True), algorithm=heuristic_algorithm, algorithm_input=data_provider
113 |     )
114 |     time_heuristic = solver_heuristic.run().time_sec
115 | 
116 |     solver_base = OnlineCpdSolver(scenario=CpdProblem(True), algorithm=base_algorithm, algorithm_input=data_provider)
117 |     time_base = solver_base.run().time_sec
118 | 
119 |     print(time_heuristic, time_base)
120 |     assert time_heuristic < time_base, f"Heuristic time ({time_heuristic}) >= base time ({time_base})"
121 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_classification_algorithms.py:
--------------------------------------------------------------------------------
  1 | from itertools import product
  2 | 
  3 | import numpy as np
  4 | import numpy.typing as npt
  5 | import pytest
  6 | 
  7 | import pysatl_cpd.generator.distributions as dstr
  8 | from pysatl_cpd.core.algorithms.classification.classifiers.decision_tree import DecisionTreeClassifier
  9 | from pysatl_cpd.core.algorithms.classification.classifiers.knn import KNNClassifier
 10 | from pysatl_cpd.core.algorithms.classification.classifiers.rf import RFClassifier
 11 | from pysatl_cpd.core.algorithms.classification.classifiers.svm import SVMClassifier
 12 | from pysatl_cpd.core.algorithms.classification.quality_metrics.classification.f1 import F1
 13 | from pysatl_cpd.core.algorithms.classification.quality_metrics.classification.mcc import MCC
 14 | from pysatl_cpd.core.algorithms.classification.test_statistics.threshold_overcome import ThresholdOvercome
 15 | from pysatl_cpd.core.algorithms.classification_algorithm import ClassificationAlgorithm
 16 | from pysatl_cpd.core.algorithms.knn_algorithm import KNNAlgorithm
 17 | from pysatl_cpd.core.scrubber.data_providers import LabeledDataProvider
 18 | from pysatl_cpd.core.scrubber.linear import LinearScrubber
 19 | from pysatl_cpd.cpd_solver import CpdProblem, CpdSolver
 20 | from pysatl_cpd.labeled_data import LabeledCpdData
 21 | 
 22 | K = 7
 23 | CM_THRESHOLD = 4.5
 24 | INDENT_COEFF = 0.25
 25 | SHIFT_FACTOR = 0.5
 26 | WINDOW_SIZE = 48
 27 | SIZE = 200
 28 | CP_N = 100
 29 | TOLERABLE_DEVIATION = WINDOW_SIZE / 2
 30 | EXPECTED_CP = 100
 31 | CLASSIFIERS = ["knn", "svm", "rf", "dt"]
 32 | METRICS = ["mcc"]
 33 | 
 34 | 
 35 | def assert_result(actual):
 36 |     def in_interval(cp):
 37 |         return EXPECTED_CP - TOLERABLE_DEVIATION <= cp <= EXPECTED_CP + TOLERABLE_DEVIATION
 38 | 
 39 |     assert (len(actual) > 0 and all(in_interval(cp) for cp in actual)), "Incorrect change point localization"
 40 | 
 41 | 
 42 | def build_classification_alg(classifier_name, metric_name):
 43 |     match metric_name:
 44 |         case "f1":
 45 |             quality_metric = F1()
 46 |             threshold = 0.85
 47 |         case "mcc":
 48 |             quality_metric = MCC()
 49 |             threshold = 0.85
 50 |         case _:
 51 |             raise NotImplementedError("No such metric yet.")
 52 | 
 53 |     match classifier_name:
 54 |         case "knn":
 55 |             classifier = KNNClassifier(K)
 56 |         case "svm":
 57 |             classifier = SVMClassifier()
 58 |         case "dt":
 59 |             classifier = DecisionTreeClassifier()
 60 |         case "rf":
 61 |             classifier = RFClassifier()
 62 |         case _:
 63 |             raise NotImplementedError("No such classifier yet.")
 64 | 
 65 |     return ClassificationAlgorithm(classifier=classifier,
 66 |                                 quality_metric=quality_metric,
 67 |                                 test_statistic=ThresholdOvercome(threshold),
 68 |                                 indent_coeff=INDENT_COEFF)
 69 | 
 70 | 
 71 | def build_solver(alg, data):
 72 |     data_provider = LabeledDataProvider(LabeledCpdData(data, [EXPECTED_CP]))
 73 |     scrubber = LinearScrubber(data_provider, WINDOW_SIZE, SHIFT_FACTOR)
 74 |     return CpdSolver(CpdProblem(to_localize=True), algorithm=alg, algorithm_input=scrubber)
 75 | 
 76 | 
 77 | @pytest.fixture(scope="session")
 78 | def univariate_data():
 79 |     np.random.seed(1)
 80 |     left_distr = dstr.Distribution.from_str(
 81 |         str(dstr.Distributions.UNIFORM),
 82 |             {"min": "2.0", "max": "2.1"})
 83 |     right_distr = dstr.Distribution.from_str(
 84 |         str(dstr.Distributions.UNIFORM),
 85 |             {"min": "0.0", "max": "0.1"})
 86 |     return np.concatenate(
 87 |         [
 88 |             left_distr.scipy_sample(EXPECTED_CP),
 89 |             right_distr.scipy_sample(SIZE - EXPECTED_CP),
 90 |         ]
 91 |     )
 92 | 
 93 | 
 94 | @pytest.fixture(scope="session")
 95 | def multivariate_data():
 96 |     np.random.seed(1)
 97 |     left_distr = dstr.Distribution.from_str(
 98 |         str(dstr.Distributions.MULTIVARIATIVE_NORMAL),
 99 |             {"mean": str([0.0] * 10)})
100 |     right_distr = dstr.Distribution.from_str(
101 |         str(dstr.Distributions.MULTIVARIATIVE_NORMAL),
102 |             {"mean": str([5.0] * 10)})
103 |     return np.concatenate(
104 |         [
105 |             left_distr.scipy_sample(EXPECTED_CP),
106 |             right_distr.scipy_sample(SIZE - EXPECTED_CP)
107 |         ]
108 |     )
109 | 
110 | 
111 | class TestClassificationCpd:
112 |     @pytest.mark.parametrize(
113 |             "classifier_name, metric",
114 |             list(product(CLASSIFIERS, METRICS)),
115 |     )
116 |     def test_classification_cpd_univariate(self, classifier_name, metric, univariate_data):
117 |         alg = build_classification_alg(classifier_name, metric)
118 |         solver = build_solver(alg, univariate_data)
119 |         actual = solver.run().result
120 |         assert_result(actual)
121 | 
122 |     @pytest.mark.parametrize(
123 |             "classifier_name, metric",
124 |             list(product(CLASSIFIERS, METRICS)),
125 |     )
126 |     def test_classification_cpd_multivariate(self, classifier_name, metric, multivariate_data):
127 |         alg = build_classification_alg(classifier_name, metric)
128 |         solver = build_solver(alg, multivariate_data)
129 |         actual = solver.run().result
130 |         assert_result(actual)
131 | 
132 | 
133 | class TestKnnCpd:
134 |     @pytest.fixture(scope="function")
135 |     def knn_cpd_univariate(self):
136 |         def metric(obs1: float, obs2: float) -> float:
137 |             return abs(obs1 - obs2)
138 | 
139 |         return KNNAlgorithm(distance_func=metric,
140 |                             test_statistic=ThresholdOvercome(CM_THRESHOLD),
141 |                             indent_coeff=INDENT_COEFF,
142 |                             k=K)
143 | 
144 |     @pytest.fixture(scope="function")
145 |     def knn_cpd_multivariate(self):
146 |         def metric(obs1: npt.NDArray[np.float64], obs2: npt.NDArray[np.float64]) -> float:
147 |             return float(np.linalg.norm(obs1 - obs2))
148 | 
149 |         return KNNAlgorithm(distance_func=metric,
150 |                         test_statistic=ThresholdOvercome(CM_THRESHOLD),
151 |                         indent_coeff=INDENT_COEFF,
152 |                         k=K)
153 | 
154 |     def test_knn_cpd_univariate(self, knn_cpd_univariate, univariate_data):
155 |         solver = build_solver(knn_cpd_univariate, univariate_data)
156 |         actual = solver.run().result
157 |         assert_result(actual)
158 | 
159 |     def test_knn_cpd_multivariate(self, knn_cpd_multivariate, multivariate_data):
160 |         solver = build_solver(knn_cpd_multivariate, multivariate_data)
161 |         actual = solver.run().result
162 |         assert_result(actual)
163 | 


--------------------------------------------------------------------------------
/tests/test_core/test_algorithms/test_graph_algorithm.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pysatl_cpd.core.algorithms.graph_algorithm import GraphAlgorithm
 4 | 
 5 | 
 6 | def custom_comparison(node1, node2):
 7 |     arg = 5
 8 |     return abs(node1 - node2) <= arg
 9 | 
10 | 
11 | class TestGraphAlgorithm:
12 |     @pytest.mark.parametrize(
13 |         "alg_param,data,expected",
14 |         (((custom_comparison, 1.5), (50, 55, 60, 48, 52, 70, 75, 80, 90, 85, 95, 100, 50), [5]),),
15 |     )
16 |     def test_localize(self, alg_param, data, expected):
17 |         algorithm = GraphAlgorithm(*alg_param)
18 |         assert algorithm.localize(data) == expected
19 | 
20 |     @pytest.mark.parametrize(
21 |         "alg_param,data,expected",
22 |         (((custom_comparison, 1.5), (50, 55, 60, 48, 52, 70, 75, 80, 90, 85, 95, 100, 50), 1),),
23 |     )
24 |     def test_detect(self, alg_param, data, expected):
25 |         algorithm = GraphAlgorithm(*alg_param)
26 |         assert algorithm.detect(data) == expected
27 | 


--------------------------------------------------------------------------------
/tests/test_core/test_cpd_core.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from pysatl_cpd.core.algorithms.graph_algorithm import GraphAlgorithm
 4 | from pysatl_cpd.core.cpd_core import CpdCore
 5 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 6 | from pysatl_cpd.core.scrubber.linear import LinearScrubber
 7 | 
 8 | 
 9 | def custom_comparison(node1, node2):
10 |     arg = 5
11 |     return abs(node1 - node2) <= arg
12 | 
13 | 
14 | class TestCPDCore:
15 |     @pytest.mark.parametrize(
16 |         "data,alg_class,alg_param,expected",
17 |         (
18 |             (
19 |                 [50, 55, 60, 48, 52, 70, 75, 80, 90, 85, 95, 100, 50],
20 |                 GraphAlgorithm,
21 |                 (custom_comparison, 1.5),
22 |                 [5],
23 |             ),
24 |         ),
25 |     )
26 |     def test_run(self, data, alg_class, alg_param, expected):
27 |         scrubber = LinearScrubber(ListUnivariateProvider(data))
28 |         algorithm = alg_class(*alg_param)
29 | 
30 |         core = CpdCore(scrubber, algorithm)
31 |         assert core.localize() == expected
32 | 


--------------------------------------------------------------------------------
/tests/test_core/test_online_cpd_core.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
 5 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
 6 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import GaussianConjugate
 7 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
 8 | from pysatl_cpd.core.algorithms.bayesian_online_algorithm import BayesianOnline
 9 | from pysatl_cpd.core.online_cpd_core import OnlineCpdCore
10 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
11 | 
12 | DATA_PARAMS = {
13 |     "num_of_tests": 10,
14 |     "size": 500,
15 |     "change_point": 250,
16 |     "tolerable_deviation": 25,
17 | }
18 | 
19 | 
20 | @pytest.fixture(scope="session")
21 | def data_params():
22 |     return DATA_PARAMS
23 | 
24 | 
25 | def construct_bayesian_online_algorithm():
26 |     return BayesianOnline(
27 |         learning_sample_size=50,
28 |         likelihood=GaussianConjugate(),
29 |         hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
30 |         detector=ThresholdDetector(threshold=0.04),
31 |         localizer=ArgmaxLocalizer(),
32 |     )
33 | 
34 | 
35 | @pytest.fixture
36 | def algorithm():
37 |     return construct_bayesian_online_algorithm()
38 | 
39 | 
40 | @pytest.fixture(params=[True, False], ids=["with_cp", "without_cp"])
41 | def dataset(request, data_params):
42 |     np.random.seed(42 + request.param_index)
43 |     if request.param:
44 |         return np.concatenate(
45 |             [
46 |                 np.random.normal(0, 1, data_params["change_point"]),
47 |                 np.random.normal(5, 2, data_params["size"] - data_params["change_point"]),
48 |             ]
49 |         )
50 |     return np.random.normal(0, 1, data_params["size"])
51 | 
52 | 
53 | @pytest.fixture
54 | def online_core(dataset):
55 |     return OnlineCpdCore(
56 |         algorithm=construct_bayesian_online_algorithm(), data_provider=ListUnivariateProvider(list(dataset))
57 |     )
58 | 
59 | 
60 | class TestOnlineCpdCore:
61 |     @pytest.mark.parametrize("test_iteration", range(DATA_PARAMS["num_of_tests"]))
62 |     @pytest.mark.parametrize("mode", ["detect", "localize"])
63 |     def test_core_functionality(self, algorithm, online_core, dataset, data_params, mode, test_iteration):
64 |         core_iterator = getattr(online_core, mode)()
65 |         algo_method = getattr(algorithm, mode)
66 | 
67 |         for time_point in range(data_params["size"]):
68 |             observation = dataset[time_point]
69 |             algo_result = algo_method(observation)
70 |             core_result = next(core_iterator)
71 | 
72 |             assert algo_result == core_result, (
73 |                 f"Different results at {time_point} between manual {mode} and core {mode} iteration"
74 |             )
75 | 


--------------------------------------------------------------------------------
/tests/test_core/test_scrubber/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_core/test_scrubber/__init__.py


--------------------------------------------------------------------------------
/tests/test_core/test_scrubber/test_dataproviders.py:
--------------------------------------------------------------------------------
 1 | __author__ = "Vladimir Kutuev"
 2 | __copyright__ = "Copyright (c) 2025 PySATL project"
 3 | __license__ = "SPDX-License-Identifier: MIT"
 4 | 
 5 | from hypothesis import given, strategies
 6 | 
 7 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 8 | 
 9 | 
10 | class TestDataProviders:
11 |     @given(strategies.lists(strategies.floats(allow_nan=False), min_size=0, max_size=100))
12 |     def test_list_univariate(self, data: list[float]):
13 |         provider = ListUnivariateProvider(data)
14 |         provided_data = list(provider.__iter__())
15 |         assert len(data) == len(provided_data)
16 |         assert all(map(lambda t: t[0] == t[1], zip(data, provided_data)))
17 | 


--------------------------------------------------------------------------------
/tests/test_core/test_scrubber/test_linear_scrubber.py:
--------------------------------------------------------------------------------
 1 | import hypothesis.strategies as st
 2 | import numpy as np
 3 | from hypothesis import given, settings
 4 | 
 5 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 6 | from pysatl_cpd.core.scrubber.linear import LinearScrubber
 7 | 
 8 | 
 9 | class TestLinearScrubber:
10 |     @settings(max_examples=1000)
11 |     @given(st.integers(0, 100), st.integers(1, 100), st.floats(0.01, 1))
12 |     def test_get_windows(self, data_length, window_length, shift_factor):
13 |         data = [float(i) for i in range(data_length)]
14 |         scrubber = LinearScrubber(ListUnivariateProvider(data), window_length, shift_factor)
15 |         cur_index = 0
16 |         for window in iter(scrubber):
17 |             assert len(window.values) == len(window.indices)
18 |             assert np.array_equal(window.values, np.fromiter(data[cur_index : cur_index + window_length], np.float64))
19 |             cur_index += max(1, int(window_length * shift_factor))
20 | 
21 |     @settings(max_examples=1000)
22 |     @given(st.integers(0, 100), st.integers(1, 100), st.floats(0.01, 1), st.integers(0, 100))
23 |     def test_restart(self, data_length, window_length, shift_factor, window_start):
24 |         data = [i for i in range(data_length)]
25 |         scrubber = LinearScrubber(ListUnivariateProvider(data), window_length, shift_factor)
26 |         fst = list(scrubber)
27 |         snd = list(scrubber)
28 |         assert len(fst) == len(snd)
29 |         assert all(
30 |             map(lambda w: w[0].indices == w[1].indices and np.array_equal(w[0].values, w[1].values), zip(fst, snd))
31 |         )
32 | 


--------------------------------------------------------------------------------
/tests/test_generator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PySATL/pysatl-cpd/9f496f4cdf1401d3d405e28a86e82ab848bb6b52/tests/test_generator/__init__.py


--------------------------------------------------------------------------------
/tests/test_generator/test_distributions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import pysatl_cpd.generator.distributions as dstr
 4 | 
 5 | 
 6 | class TestDistributions:
 7 |     @pytest.mark.parametrize(
 8 |         "distribution, params, error",
 9 |         [
10 |             (dstr.Distributions.NORMAL, {"mean": "0"}, ValueError),
11 |             (dstr.Distributions.NORMAL, {"mean": "0", "var": "1"}, KeyError),
12 |             (dstr.Distributions.NORMAL, {"mean": "0", "variance": "1", "x": "5"}, ValueError),
13 |             (dstr.Distributions.NORMAL, {"mean": "0", "variance": "-1"}, ValueError),
14 |             (dstr.Distributions.EXPONENTIAL, {}, ValueError),
15 |             (dstr.Distributions.EXPONENTIAL, {"rt": "1"}, KeyError),
16 |             (dstr.Distributions.EXPONENTIAL, {"rate": "1", "x": "5"}, ValueError),
17 |             (dstr.Distributions.EXPONENTIAL, {"rate": "-1"}, ValueError),
18 |             (dstr.Distributions.WEIBULL, {"shape": "0"}, ValueError),
19 |             (dstr.Distributions.WEIBULL, {"shape": "0", "var": "1"}, KeyError),
20 |             (dstr.Distributions.WEIBULL, {"shape": "1", "scale": "1", "x": "5"}, ValueError),
21 |             (dstr.Distributions.WEIBULL, {"shape": "-1", "scale": "1"}, ValueError),
22 |             (dstr.Distributions.WEIBULL, {"shape": "1", "scale": "-1"}, ValueError),
23 |             (dstr.Distributions.UNIFORM, {"min": "0"}, ValueError),
24 |             (dstr.Distributions.UNIFORM, {"min": "-1", "MAX": "1"}, KeyError),
25 |             (dstr.Distributions.UNIFORM, {"min": "-1", "max": "1", "x": "5"}, ValueError),
26 |             (dstr.Distributions.UNIFORM, {"min": "1", "max": "-1"}, ValueError),
27 |             (dstr.Distributions.BETA, {"alpha": "1"}, ValueError),
28 |             (dstr.Distributions.BETA, {"alpha": "1", "x": "1"}, KeyError),
29 |             (dstr.Distributions.BETA, {"alpha": "1", "beta": "1", "x": "5"}, ValueError),
30 |             (dstr.Distributions.BETA, {"alpha": "-1", "beta": "1"}, ValueError),
31 |             (dstr.Distributions.BETA, {"alpha": "1", "beta": "-1"}, ValueError),
32 |             (dstr.Distributions.GAMMA, {"alpha": "1"}, ValueError),
33 |             (dstr.Distributions.GAMMA, {"alpha": "1", "x": "1"}, KeyError),
34 |             (dstr.Distributions.GAMMA, {"alpha": "1", "beta": "1", "x": "5"}, ValueError),
35 |             (dstr.Distributions.GAMMA, {"alpha": "-1", "beta": "1"}, ValueError),
36 |             (dstr.Distributions.GAMMA, {"alpha": "1", "beta": "-1"}, ValueError),
37 |             (dstr.Distributions.T, {}, ValueError),
38 |             (dstr.Distributions.T, {"N": "1"}, KeyError),
39 |             (dstr.Distributions.T, {"n": "1", "x": "5"}, ValueError),
40 |             (dstr.Distributions.T, {"n": "-1"}, ValueError),
41 |             (dstr.Distributions.LOGNORM, {}, ValueError),
42 |             (dstr.Distributions.LOGNORM, {"S": "1"}, KeyError),
43 |             (dstr.Distributions.LOGNORM, {"s": "1", "x": "5"}, ValueError),
44 |             (dstr.Distributions.LOGNORM, {"s": "-1"}, ValueError),
45 |             (dstr.Distributions.MULTIVARIATIVE_NORMAL, {}, ValueError),
46 |             (dstr.Distributions.MULTIVARIATIVE_NORMAL, {"Mean": "[0.0, 0.0]"}, KeyError),
47 |             (dstr.Distributions.MULTIVARIATIVE_NORMAL, {"mean": "[0.0, 0.0]", "x": "5"}, ValueError),
48 |             (dstr.Distributions.MULTIVARIATIVE_NORMAL, {"mean": "[]"}, ValueError),
49 |         ],
50 |     )
51 |     def test_distribution_params_validation_fail(self, distribution, params, error):
52 |         sample_len = 100
53 |         with pytest.raises(error):
54 |             d = dstr.Distribution.from_str(str(distribution), params)
55 |             assert len(d.scipy_sample(sample_len)) == sample_len
56 | 
57 |     @pytest.mark.parametrize(
58 |         "distribution, params",
59 |         [
60 |             (dstr.Distributions.NORMAL, {"mean": "0", "variance": "1"}),
61 |             (dstr.Distributions.EXPONENTIAL, {"rate": "1"}),
62 |             (dstr.Distributions.WEIBULL, {"shape": "1", "scale": "1"}),
63 |             (dstr.Distributions.UNIFORM, {"min": "0", "max": "1"}),
64 |             (dstr.Distributions.BETA, {"alpha": "1", "beta": "1"}),
65 |             (dstr.Distributions.GAMMA, {"alpha": "1", "beta": "1"}),
66 |             (dstr.Distributions.T, {"n": "1"}),
67 |             (dstr.Distributions.LOGNORM, {"s": "1"}),
68 |             (dstr.Distributions.MULTIVARIATIVE_NORMAL, {"mean": "[0.0, 0.1]"}),
69 |         ],
70 |     )
71 |     def test_distribution_generate(self, distribution, params):
72 |         sample_len = 100
73 |         d = dstr.Distribution.from_str(str(distribution), params)
74 |         assert len(d.scipy_sample(sample_len)) == sample_len
75 | 


--------------------------------------------------------------------------------
/tests/test_generator/test_generator.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from os import walk
 3 | from pathlib import Path
 4 | 
 5 | import pytest
 6 | 
 7 | from pysatl_cpd.generator.generator import ScipyDatasetGenerator
 8 | from pysatl_cpd.generator.saver import DatasetSaver
 9 | 
10 | 
11 | class TestGenerator:
12 |     config_path = "tests/test_configs/test_config_1.yml"
13 | 
14 |     @pytest.mark.parametrize(
15 |         "config_path_str,generator,configurations",
16 |         (
17 |             (
18 |                 config_path,
19 |                 ScipyDatasetGenerator(),
20 |                 {
21 |                     "20-normal-0-1-20-normal-10-1": [40, [20]],
22 |                     "20-multivariate_normal-0-0-20-multivariate_normal-10-10": [40, [20]],
23 |                     "20-normal-0-1-no-change-point": [20, []],
24 |                     "20-exponential-1-no-change-point": [20, []],
25 |                     "20-weibull-1-1-no-change-point": [20, []],
26 |                     "20-uniform-0-1-no-change-point": [20, []],
27 |                     "20-beta-1-1-no-change-point": [20, []],
28 |                     "20-gamma-1-1-no-change-point": [20, []],
29 |                     "20-t-2-no-change-point": [20, []],
30 |                     "20-lognorm-1-no-change-point": [20, []],
31 |                     "20-multivariate_normal-0-1-no-change-point": [20, []],
32 |                     "100-normal-0-1-no-change-point": [100, []],
33 |                 },
34 |             ),
35 |         ),
36 |     )
37 |     def test_generate_datasets(self, config_path_str, generator, configurations) -> None:
38 |         generated = generator.generate_datasets(Path(config_path_str))
39 |         for name in configurations:
40 |             data_length = len(generated[name][0])
41 |             assert data_length == configurations[name][0]
42 |             assert generated[name][1] == configurations[name][1]
43 | 
44 |     @pytest.mark.parametrize(
45 |         "config_path_str,generator,configurations",
46 |         (
47 |             (
48 |                 config_path,
49 |                 ScipyDatasetGenerator(),
50 |                 {
51 |                     "20-normal-0-1-20-normal-10-1": [40, [20]],
52 |                     "20-normal-0-1-no-change-point": [20, []],
53 |                     "100-normal-0-1-no-change-point": [100, []],
54 |                 },
55 |             ),
56 |         ),
57 |     )
58 |     def test_generate_datasets_save(self, config_path_str, generator, configurations) -> None:
59 |         with tempfile.TemporaryDirectory() as tempdir:
60 |             saver = DatasetSaver(Path(tempdir), True)
61 |             generated = generator.generate_datasets(Path(config_path_str), saver)
62 |             for name in configurations:
63 |                 data_length = sum(1 for _ in generated[name][0])
64 |                 assert data_length == configurations[name][0]
65 |                 assert generated[name][1] == configurations[name][1]
66 | 
67 |             directory = [file_names for (_, _, file_names) in walk(tempdir)]
68 |             for file_names in directory[1:]:
69 |                 assert sorted(file_names) == sorted(["changepoints.csv", "sample.adoc", "sample.png", "sample.csv"])
70 | 


--------------------------------------------------------------------------------
/tests/test_labeled_data.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | from os import walk
 3 | from pathlib import Path
 4 | 
 5 | import numpy as np
 6 | import pytest
 7 | 
 8 | from pysatl_cpd.labeled_data import LabeledCpdData
 9 | 
10 | 
11 | class TestLabeledCPData:
12 |     config_path = "tests/test_configs/test_config_1.yml"
13 |     data = LabeledCpdData([1, 2, 3], [4, 5, 6])
14 | 
15 |     def test_init(self) -> None:
16 |         assert self.data.raw_data == [1, 2, 3]
17 |         assert self.data.change_points == [4, 5, 6]
18 | 
19 |     def test_iter(self) -> None:
20 |         assert list(self.data.__iter__()) == [1, 2, 3]
21 | 
22 |     @pytest.mark.parametrize(
23 |         "config_path_str,expected_change_points_list,expected_lengths",
24 |         (
25 |             (
26 |                 config_path,
27 |                 {
28 |                     "20-normal-0-1-20-normal-10-1": [20],
29 |                     "20-normal-0-1-no-change-point": [],
30 |                     "100-normal-0-1-no-change-point": [],
31 |                 },
32 |                 {
33 |                     "20-normal-0-1-20-normal-10-1": 40,
34 |                     "20-normal-0-1-no-change-point": 20,
35 |                     "100-normal-0-1-no-change-point": 100,
36 |                 },
37 |             ),
38 |         ),
39 |     )
40 |     def test_generate_datasets(self, config_path_str, expected_change_points_list, expected_lengths) -> None:
41 |         generated = LabeledCpdData.generate_cp_datasets(Path(config_path_str))
42 |         for name in expected_lengths:
43 |             data_length = len(generated[name].raw_data)
44 |             assert data_length == expected_lengths[name]
45 |             assert generated[name].change_points == expected_change_points_list[name]
46 | 
47 |     @pytest.mark.parametrize(
48 |         "config_path_str,expected_change_points_list,expected_lengths",
49 |         (
50 |             (
51 |                 config_path,
52 |                 {
53 |                     "20-normal-0-1-20-normal-10-1": [20],
54 |                     "20-normal-0-1-no-change-point": [],
55 |                     "100-normal-0-1-no-change-point": [],
56 |                 },
57 |                 {
58 |                     "20-normal-0-1-20-normal-10-1": 40,
59 |                     "20-normal-0-1-no-change-point": 20,
60 |                     "100-normal-0-1-no-change-point": 100,
61 |                 },
62 |             ),
63 |         ),
64 |     )
65 |     def test_generate_datasets_save(self, config_path_str, expected_change_points_list, expected_lengths) -> None:
66 |         with tempfile.TemporaryDirectory() as tempdir:
67 |             generated = LabeledCpdData.generate_cp_datasets(
68 |                 Path(config_path_str), to_save=True, output_directory=Path(tempdir)
69 |             )
70 |             for name in expected_lengths:
71 |                 data_length = len(generated[name].raw_data)
72 |                 assert data_length == expected_lengths[name]
73 |                 assert generated[name].change_points == expected_change_points_list[name]
74 | 
75 |             directory = [file_names for (_, _, file_names) in walk(tempdir)]
76 |             for file_names in directory[1:]:
77 |                 assert sorted(file_names) == sorted(["changepoints.csv", "sample.adoc", "sample.png", "sample.csv"])
78 | 
79 |     @pytest.mark.parametrize(
80 |         "config_path_str",
81 |         (config_path,),
82 |     )
83 |     def test_read_generated_datasets(self, config_path_str):
84 |         with tempfile.TemporaryDirectory() as tempdir:
85 |             generated = LabeledCpdData.generate_cp_datasets(
86 |                 Path(config_path_str), to_save=True, output_directory=Path(tempdir)
87 |             )
88 |             read = LabeledCpdData.read_generated_datasets(Path(tempdir))
89 |             for name in generated:
90 |                 assert read[name].raw_data.shape == generated[name].raw_data.shape
91 |                 assert np.array_equal(read[name].raw_data, generated[name].raw_data)
92 |                 assert read[name].change_points == generated[name].change_points
93 | 


--------------------------------------------------------------------------------
/tests/test_online_solver.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pysatl_cpd.core.algorithms.bayesian.detectors.threshold import ThresholdDetector
  5 | from pysatl_cpd.core.algorithms.bayesian.hazards.constant import ConstantHazard
  6 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.exponential_conjugate import ExponentialConjugate
  7 | from pysatl_cpd.core.algorithms.bayesian.likelihoods.gaussian_conjugate import GaussianConjugate
  8 | from pysatl_cpd.core.algorithms.bayesian.localizers.argmax import ArgmaxLocalizer
  9 | from pysatl_cpd.core.algorithms.bayesian_online_algorithm import BayesianOnline
 10 | from pysatl_cpd.core.problem import CpdProblem
 11 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 12 | from pysatl_cpd.icpd_solver import CpdLocalizationResults
 13 | from pysatl_cpd.labeled_data import LabeledCpdData
 14 | from pysatl_cpd.online_cpd_solver import OnlineCpdSolver
 15 | 
 16 | DATA_PARAMS = {
 17 |     "num_tests": 10,
 18 |     "size": 500,
 19 |     "change_point": 250,
 20 |     "tolerable_deviation": 25,
 21 | }
 22 | 
 23 | 
 24 | @pytest.fixture(scope="session")
 25 | def data_params():
 26 |     return DATA_PARAMS
 27 | 
 28 | 
 29 | @pytest.fixture
 30 | def data_generator(data_params):
 31 |     def _generate(has_cp, test_iteration):
 32 |         seed = 42 + test_iteration
 33 |         np.random.seed(seed)
 34 |         if has_cp:
 35 |             return np.concatenate(
 36 |                 [
 37 |                     np.random.normal(0, 1, data_params["change_point"]),
 38 |                     np.random.normal(5, 2, data_params["size"] - data_params["change_point"]),
 39 |                 ]
 40 |             )
 41 |         return np.random.normal(0, 1, data_params["size"])
 42 | 
 43 |     return _generate
 44 | 
 45 | 
 46 | @pytest.fixture
 47 | def labeled_data_factory(data_params):
 48 |     def _factory(data, has_cp):
 49 |         return LabeledCpdData(raw_data=data, change_points=[data_params["change_point"]] if has_cp else None)
 50 | 
 51 |     return _factory
 52 | 
 53 | 
 54 | @pytest.fixture
 55 | def solver_factory():
 56 |     def _factory(data_input, with_localization):
 57 |         return OnlineCpdSolver(
 58 |             algorithm=BayesianOnline(
 59 |                 learning_sample_size=50,
 60 |                 likelihood=GaussianConjugate(),
 61 |                 hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
 62 |                 detector=ThresholdDetector(threshold=0.04),
 63 |                 localizer=ArgmaxLocalizer(),
 64 |             ),
 65 |             algorithm_input=data_input,
 66 |             scenario=CpdProblem(with_localization),
 67 |         )
 68 | 
 69 |     return _factory
 70 | 
 71 | 
 72 | def pytest_generate_tests(metafunc):
 73 |     if "test_iteration" in metafunc.fixturenames:
 74 |         metafunc.parametrize("test_iteration", range(DATA_PARAMS["num_tests"]))
 75 | 
 76 | 
 77 | class TestOnlineCpdSolver:
 78 |     @pytest.mark.parametrize(
 79 |         "has_cp,with_localization,is_labeled",
 80 |         [
 81 |             (False, True, True),
 82 |             (True, True, True),
 83 |             (False, True, False),
 84 |             (True, True, False),
 85 |             (False, False, True),
 86 |             (True, False, True),
 87 |             (False, False, False),
 88 |             (True, False, False),
 89 |         ],
 90 |     )
 91 |     def test_all_scenarios(
 92 |         self,
 93 |         data_generator,
 94 |         labeled_data_factory,
 95 |         solver_factory,
 96 |         has_cp,
 97 |         with_localization,
 98 |         is_labeled,
 99 |         test_iteration,
100 |         data_params,
101 |     ):
102 |         raw_data = data_generator(has_cp, test_iteration)
103 | 
104 |         data_input = labeled_data_factory(raw_data, has_cp) if is_labeled else ListUnivariateProvider(raw_data.tolist())
105 | 
106 |         solver = solver_factory(data_input, with_localization)
107 |         result = solver.run()
108 | 
109 |         if with_localization:
110 |             assert isinstance(result, CpdLocalizationResults), "Localization result must be CpdLocalizationResults"
111 |             if has_cp:
112 |                 assert len(result.result) == 1, "There must be only one change point"
113 |                 assert abs(result.result[0] - data_params["change_point"]) <= data_params["tolerable_deviation"], (
114 |                     "Change point must lie in tolerable interval"
115 |                 )
116 |                 if is_labeled:
117 |                     assert result.expected_result == [data_params["change_point"]], (
118 |                         "Labeled change point must be equal to generated one"
119 |                     )
120 |                 else:
121 |                     assert result.expected_result is None, "Expected result must be None for not labeled data"
122 |             else:
123 |                 assert result.result == [], "There must be no change points"
124 |         else:
125 |             assert isinstance(result, int), "Detection result must be a number of detected change points"
126 |             assert result == (1 if has_cp else 0), (
127 |                 "Number of change points must be equal to expected in the generated data"
128 |             )
129 | 
130 |     def test_exponential_with_negatives(self, data_params):
131 |         np.random.seed(42)
132 |         data = np.concatenate(
133 |             [
134 |                 np.random.exponential(1 / 2, data_params["change_point"]),
135 |                 np.random.normal(0, 1, data_params["size"] - data_params["change_point"]),
136 |             ]
137 |         )
138 | 
139 |         algorithm = BayesianOnline(
140 |             learning_sample_size=20,
141 |             likelihood=ExponentialConjugate(),
142 |             hazard=ConstantHazard(rate=1.0 / (1.0 - 0.5 ** (1.0 / 500))),
143 |             detector=ThresholdDetector(threshold=0.04),
144 |             localizer=ArgmaxLocalizer(),
145 |         )
146 | 
147 |         data_provider = ListUnivariateProvider(list(data))
148 | 
149 |         cpd = OnlineCpdSolver(
150 |             scenario=CpdProblem(True),
151 |             algorithm=algorithm,
152 |             algorithm_input=data_provider,
153 |         )
154 | 
155 |         cpd.run()
156 | 


--------------------------------------------------------------------------------
/tests/test_solver.py:
--------------------------------------------------------------------------------
  1 | import tempfile
  2 | from os import walk
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | import pytest
  7 | 
  8 | from pysatl_cpd.analysis.results_analyzer import CpdResultsAnalyzer
  9 | from pysatl_cpd.core.algorithms.graph_algorithm import GraphAlgorithm
 10 | from pysatl_cpd.core.problem import CpdProblem
 11 | from pysatl_cpd.core.scrubber.data_providers import ListUnivariateProvider
 12 | from pysatl_cpd.core.scrubber.linear import LinearScrubber
 13 | from pysatl_cpd.cpd_solver import CpdLocalizationResults, CpdSolver, LabeledCpdData
 14 | 
 15 | 
 16 | def custom_comparison(node1, node2):  # TODO: Remove it everywhere
 17 |     arg = 1
 18 |     return abs(node1 - node2) <= arg
 19 | 
 20 | 
 21 | class TestCpdSolver:
 22 |     def test_cpd_localization_no_changepoint(self) -> None:
 23 |         data = [1, 2, 3, 4]
 24 |         problem = CpdProblem(True)
 25 |         algorithm = GraphAlgorithm(custom_comparison, 4)
 26 |         scrubber = LinearScrubber(ListUnivariateProvider(data))
 27 |         solver = CpdSolver(problem, algorithm, scrubber)
 28 |         cpd_result = solver.run()
 29 |         assert isinstance(cpd_result, CpdLocalizationResults)
 30 |         assert cpd_result.result == []
 31 |         assert cpd_result.expected_result is None
 32 | 
 33 |     def test_cpd_localization_labeled_data(self) -> None:
 34 |         data = LabeledCpdData(np.array([1, 2, 3, 4], dtype=np.float64), [4, 5, 6, 7])
 35 |         problem = CpdProblem(True)
 36 |         algorithm = GraphAlgorithm(custom_comparison, 4)
 37 |         solver = CpdSolver(problem, algorithm, (data, LinearScrubber))
 38 |         cpd_result = solver.run()
 39 |         assert isinstance(cpd_result, CpdLocalizationResults)
 40 |         assert cpd_result.result == []
 41 |         assert cpd_result.expected_result == [4, 5, 6, 7]
 42 |         assert cpd_result.result_diff == [4, 5, 6, 7]
 43 | 
 44 | 
 45 | class TestCPDResultsAnalyzer:
 46 |     @pytest.mark.parametrize(
 47 |         "result1, result2, window, expected",
 48 |         [
 49 |             ([4, 5, 6, 7], [3, 5, 6], None, (2, 1, 1, 1)),
 50 |             ([4, 5, 6, 7], [3, 5, 6], (5, 6), (1, 0, 0, 0)),
 51 |             ([4, 5, 6, 7], [3, 5, 6], (0, 100), (2, 97, 2, 1)),
 52 |             ([4, 5, 6, 7], [3, 5, 6], (6, 6), (0, 0, 0, 0)),
 53 |             ([3, 5, 6, 7], [4, 5, 6], None, (2, 1, 1, 1)),
 54 |             ([], [4, 5, 6], None, (0, 0, 0, 2)),
 55 |             ([3, 5, 6, 7], [], None, (0, 4, 3, 0)),
 56 |         ],
 57 |     )
 58 |     def test_count_confusion_matrix(self, result1, result2, window, expected):
 59 |         assert CpdResultsAnalyzer.count_confusion_matrix(result1, result2, window) == expected
 60 | 
 61 |     def test_count_confusion_matrix_exception_case(self):
 62 |         with pytest.raises(ValueError):
 63 |             CpdResultsAnalyzer.count_confusion_matrix([], [])
 64 | 
 65 |     @pytest.mark.parametrize(
 66 |         "result1, result2, window, expected",
 67 |         [
 68 |             ([4, 5, 6, 7], [3, 5, 6], None, 0.6),
 69 |             ([4, 5, 6, 7], [3, 5, 6], (5, 6), 1.0),
 70 |             ([4, 5, 6, 7], [3, 5, 6], (6, 6), 0.0),
 71 |         ],
 72 |     )
 73 |     def test_count_accuracy(self, result1, result2, window, expected):
 74 |         assert CpdResultsAnalyzer.count_accuracy(result1, result2, window) == expected
 75 | 
 76 |     @pytest.mark.parametrize(
 77 |         "result1, result2, window, expected",
 78 |         [
 79 |             ([4, 5, 6, 7], [3, 5, 6], None, 2 / 3),
 80 |             ([4, 5, 6, 7], [3, 5, 6], (5, 6), 1.0),
 81 |             ([4, 5, 6, 7], [3, 5, 6], (6, 6), 0.0),
 82 |         ],
 83 |     )
 84 |     def test_count_precision(self, result1, result2, window, expected):
 85 |         assert CpdResultsAnalyzer.count_precision(result1, result2, window) == expected
 86 | 
 87 |     @pytest.mark.parametrize(
 88 |         "result1, result2, window, expected",
 89 |         [
 90 |             ([4, 5, 6, 7], [3, 5, 6], None, 2 / 3),
 91 |             ([4, 5, 6, 7], [3, 5, 6], (5, 6), 1.0),
 92 |             ([4, 5, 6, 7], [3, 5, 6], (6, 6), 0.0),
 93 |         ],
 94 |     )
 95 |     def test_count_recall(self, result1, result2, window, expected):
 96 |         assert CpdResultsAnalyzer.count_recall(result1, result2, window) == expected
 97 | 
 98 | 
 99 | class TestCpdLocalizationResults:
100 |     data = [np.float64(1)] * 15
101 |     cont_default1 = CpdLocalizationResults(iter(data), [1, 2, 3], [2, 3, 4], 10)
102 |     cont_default2 = CpdLocalizationResults(iter(data), [1, 2, 3, 6, 8], [2, 3, 4, 6], 20)
103 |     cont_no_expected = CpdLocalizationResults(iter(data), [1, 2, 3], None, 5)
104 | 
105 |     def test_result_diff(self) -> None:
106 |         assert self.cont_default1.result_diff == [1, 4]
107 |         assert self.cont_default2.result_diff == [1, 4, 8]
108 | 
109 |     def test_result_diff_exception_case(self) -> None:
110 |         with pytest.raises(ValueError):
111 |             print(self.cont_no_expected.result_diff)
112 | 
113 |     def test_str_cp_container(self) -> None:
114 |         assert (
115 |             str(self.cont_default1)
116 |             == """Located change points: (1;2;3)
117 | Expected change point: (2;3;4)
118 | Difference: (1;4)
119 | Computation time (sec): 10"""
120 |         )
121 | 
122 |         assert (
123 |             str(self.cont_default2)
124 |             == """Located change points: (1;2;3;6;8)
125 | Expected change point: (2;3;4;6)
126 | Difference: (1;4;8)
127 | Computation time (sec): 20"""
128 |         )
129 | 
130 |         assert (
131 |             str(self.cont_no_expected)
132 |             == """Located change points: (1;2;3)
133 | Computation time (sec): 5"""
134 |         )
135 | 
136 |     @pytest.mark.parametrize(
137 |         "data,name",
138 |         (
139 |             (cont_default1, "d_1"),
140 |             (cont_default2, "d_2"),
141 |             (cont_no_expected, "cne"),
142 |         ),
143 |     )
144 |     def test_visualize(self, data, name) -> None:
145 |         with tempfile.TemporaryDirectory() as tempdir:
146 |             data.visualize(False, Path(tempdir), name)
147 |             assert [f"{name}.png"] in [file_names for (_, _, file_names) in walk(tempdir)]
148 | 
149 |     def test_metric_exception_case(self):
150 |         with pytest.raises(ValueError):
151 |             self.cont_no_expected.count_confusion_matrix()
152 | 


--------------------------------------------------------------------------------