├── MANIFEST.in
├── frouros
├── tests
│ ├── __init__.py
│ ├── unit
│ │ ├── __init__.py
│ │ ├── metrics
│ │ │ ├── __init__.py
│ │ │ └── test_prequential_error.py
│ │ ├── detectors
│ │ │ ├── __init__.py
│ │ │ └── data_drift
│ │ │ │ ├── __init__.py
│ │ │ │ └── batch
│ │ │ │ ├── __init__.py
│ │ │ │ └── distance_based
│ │ │ │ └── __init__.py
│ │ ├── callbacks
│ │ │ └── batch
│ │ │ │ ├── __init__.py
│ │ │ │ └── test_permutation.py
│ │ └── utils
│ │ │ ├── test_stats.py
│ │ │ ├── test_checks.py
│ │ │ ├── test_kernels.py
│ │ │ └── test_persistence.py
│ └── integration
│ │ ├── __init__.py
│ │ ├── test_synthetic.py
│ │ └── test_real.py
├── datasets
│ ├── __init__.py
│ ├── exceptions.py
│ ├── real.py
│ └── synthetic.py
├── detectors
│ ├── __init__.py
│ ├── data_drift
│ │ ├── streaming
│ │ │ ├── distance_based
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ └── mmd.py
│ │ │ ├── statistical_test
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ ├── exceptions.py
│ │ ├── batch
│ │ │ ├── statistical_test
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── ks.py
│ │ │ │ ├── bws.py
│ │ │ │ ├── welch_t_test.py
│ │ │ │ ├── mann_whitney_u.py
│ │ │ │ ├── anderson_darling.py
│ │ │ │ ├── cvm.py
│ │ │ │ └── chisquare.py
│ │ │ ├── distance_based
│ │ │ │ ├── __init__.py
│ │ │ │ ├── emd.py
│ │ │ │ ├── bhattacharyya_distance.py
│ │ │ │ ├── energy_distance.py
│ │ │ │ ├── hellinger_distance.py
│ │ │ │ ├── kl.py
│ │ │ │ ├── js.py
│ │ │ │ ├── hi_normalized_complement.py
│ │ │ │ └── psi.py
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ └── __init__.py
│ ├── concept_drift
│ │ ├── exceptions.py
│ │ ├── streaming
│ │ │ ├── window_based
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ ├── statistical_process_control
│ │ │ │ ├── __init__.py
│ │ │ │ └── ddm.py
│ │ │ ├── base.py
│ │ │ ├── change_detection
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cusum.py
│ │ │ │ ├── page_hinkley.py
│ │ │ │ └── geometric_moving_average.py
│ │ │ └── __init__.py
│ │ └── __init__.py
│ └── base.py
├── __init__.py
├── utils
│ ├── __init__.py
│ ├── logger.py
│ ├── kernels.py
│ ├── checks.py
│ ├── decorators.py
│ └── persistence.py
├── callbacks
│ ├── streaming
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── history.py
│ ├── batch
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── reset.py
│ ├── __init__.py
│ └── base.py
└── metrics
│ ├── __init__.py
│ ├── base.py
│ └── prequential_error.py
├── CODEOWNERS
├── docs
├── source
│ ├── contribute.md
│ ├── examples
│ │ ├── utils.md
│ │ ├── concept_drift.md
│ │ └── data_drift.md
│ ├── examples.md
│ ├── _templates
│ │ ├── class.md
│ │ └── function.md
│ ├── api_reference
│ │ ├── datasets.md
│ │ ├── callbacks.md
│ │ ├── detectors.md
│ │ ├── detectors
│ │ │ ├── concept_drift.md
│ │ │ ├── data_drift.md
│ │ │ ├── data_drift
│ │ │ │ ├── streaming.md
│ │ │ │ └── batch.md
│ │ │ └── concept_drift
│ │ │ │ └── streaming.md
│ │ ├── utils
│ │ │ ├── checks.md
│ │ │ ├── kernels.md
│ │ │ ├── stats.md
│ │ │ ├── persistence.md
│ │ │ └── data_structures.md
│ │ ├── utils.md
│ │ ├── callbacks
│ │ │ ├── batch.md
│ │ │ └── streaming.md
│ │ ├── datasets
│ │ │ ├── real.md
│ │ │ └── synthetic.md
│ │ └── metrics.md
│ ├── api_reference.md
│ ├── installation.md
│ ├── index.md
│ ├── conf.py
│ ├── references.bib
│ └── concepts.md
├── make.bat
└── Makefile
├── images
└── logo.png
├── .coveragerc
├── .github
├── workflows
│ ├── documentation.yml
│ ├── code_coverage.yml
│ ├── ci.yml
│ └── publish.yml
├── dependabot.yml
├── PULL_REQUEST_TEMPLATE.md
└── ISSUE_TEMPLATE
│ ├── feature_request.yml
│ └── bug_report.yml
├── .readthedocs.yaml
├── .codecov.yml
├── .pre-commit-config.yaml
├── LICENSE
├── CONTRIBUTING.md
├── CITATION.cff
├── setup.py
├── tox.ini
├── .gitignore
├── pyproject.toml
└── CODE_OF_CONDUCT.md
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 |
--------------------------------------------------------------------------------
/frouros/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Tests init."""
2 |
--------------------------------------------------------------------------------
/frouros/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Datasets init."""
2 |
--------------------------------------------------------------------------------
/frouros/tests/unit/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit tests init."""
2 |
--------------------------------------------------------------------------------
/frouros/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | """Detection methods init."""
2 |
--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # CODEOWNERS file
2 |
3 | * @jaime-cespedes-sisniega
4 |
--------------------------------------------------------------------------------
/frouros/tests/unit/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | """Metrics test init."""
2 |
--------------------------------------------------------------------------------
/docs/source/contribute.md:
--------------------------------------------------------------------------------
1 | ```{include} ../../CONTRIBUTING.md
2 | ```
3 |
--------------------------------------------------------------------------------
/frouros/__init__.py:
--------------------------------------------------------------------------------
1 | """Frouros."""
2 |
3 | __version__ = "0.9.0"
4 |
--------------------------------------------------------------------------------
/frouros/tests/integration/__init__.py:
--------------------------------------------------------------------------------
1 | """Integration tests init."""
2 |
--------------------------------------------------------------------------------
/frouros/tests/unit/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | """Detectors test init."""
2 |
--------------------------------------------------------------------------------
/frouros/tests/unit/callbacks/batch/__init__.py:
--------------------------------------------------------------------------------
1 | """Batch callbacks test init."""
2 |
--------------------------------------------------------------------------------
/frouros/tests/unit/detectors/data_drift/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift detectors test init."""
2 |
--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IFCA-Advanced-Computing/frouros/HEAD/images/logo.png
--------------------------------------------------------------------------------
/frouros/tests/unit/detectors/data_drift/batch/__init__.py:
--------------------------------------------------------------------------------
1 | """Batch data drift detectors test init."""
2 |
--------------------------------------------------------------------------------
/docs/source/examples/utils.md:
--------------------------------------------------------------------------------
1 | # Utils
2 |
3 | ```{toctree}
4 | :maxdepth: 1
5 |
6 | utils/save_load
7 | ```
8 |
--------------------------------------------------------------------------------
/frouros/tests/unit/detectors/data_drift/batch/distance_based/__init__.py:
--------------------------------------------------------------------------------
1 | """Distance based batch data drift detectors test init."""
2 |
--------------------------------------------------------------------------------
/frouros/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Utils init."""
2 |
3 | from .persistence import load, save
4 |
5 | __all__ = [
6 | "load",
7 | "save",
8 | ]
9 |
--------------------------------------------------------------------------------
/docs/source/examples/concept_drift.md:
--------------------------------------------------------------------------------
1 | # Concept drift
2 |
3 | ```{toctree}
4 | :maxdepth: 1
5 |
6 | concept_drift/DDM_simple
7 | concept_drift/DDM_advance
8 | ```
9 |
--------------------------------------------------------------------------------
/docs/source/examples.md:
--------------------------------------------------------------------------------
1 | # Examples
2 |
3 | ```{toctree}
4 | :maxdepth: 2
5 |
6 | examples/concept_drift
7 | examples/data_drift
8 | examples/utils
9 | ```
10 |
--------------------------------------------------------------------------------
/frouros/callbacks/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | """Streaming callbacks init."""
2 |
3 | from .history import HistoryConceptDrift
4 |
5 | __all__ = [
6 | "HistoryConceptDrift",
7 | ]
8 |
--------------------------------------------------------------------------------
/frouros/utils/logger.py:
--------------------------------------------------------------------------------
1 | """Logging setup for datasets subpackage."""
2 |
3 | import logging
4 |
5 | logging.basicConfig(level=logging.INFO)
6 | logger = logging.getLogger("frouros")
7 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | show_missing = True
3 |
4 | [run]
5 | source = frouros
6 | omit =
7 | */setup.py
8 | */__init__.py
9 | */frouros/setup.py
10 | */frouros/tests/*
11 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/distance_based/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift streaming distance based detection methods' init."""
2 |
3 | from .mmd import MMD
4 |
5 | __all__ = [
6 | "MMD",
7 | ]
8 |
--------------------------------------------------------------------------------
/frouros/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | """Prequential error metrics init."""
2 |
3 | from .prequential_error import (
4 | PrequentialError,
5 | )
6 |
7 | __all__ = [
8 | "PrequentialError",
9 | ]
10 |
--------------------------------------------------------------------------------
/docs/source/_templates/class.md:
--------------------------------------------------------------------------------
1 | {{objname}}
2 | {{ underline }}==============
3 | ```{eval-rst}
4 | .. currentmodule:: {{ module }}
5 | ```
6 |
7 | ```{eval-rst}
8 | .. autoclass:: {{ objname }}
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/_templates/function.md:
--------------------------------------------------------------------------------
1 | {{objname}}
2 | {{ underline }}==============
3 | ```{eval-rst}
4 | .. currentmodule:: {{ module }}
5 | ```
6 |
7 | ```{eval-rst}
8 | .. autofunction:: {{ objname }}
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference/datasets.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | The {mod}`frouros.datasets` module contains datasets.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | datasets/real
9 | datasets/synthetic
10 | ```
11 |
--------------------------------------------------------------------------------
/docs/source/api_reference/callbacks.md:
--------------------------------------------------------------------------------
1 | # Callbacks
2 |
3 | The {mod}`frouros.callbacks` module contains callbacks.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | callbacks/batch
9 | callbacks/streaming
10 | ```
11 |
--------------------------------------------------------------------------------
/docs/source/examples/data_drift.md:
--------------------------------------------------------------------------------
1 | # Data drift
2 |
3 | ```{toctree}
4 | :maxdepth: 1
5 |
6 | data_drift/MMD_simple
7 | data_drift/MMD_advance
8 | data_drift/univariate_detector
9 | data_drift/multivariate_detector
10 | ```
11 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/statistical_test/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift streaming statistical test detection methods' init."""
2 |
3 | from .ks import IncrementalKSTest
4 |
5 | __all__ = [
6 | "IncrementalKSTest",
7 | ]
8 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors.md:
--------------------------------------------------------------------------------
1 | # Detectors
2 |
3 | The {mod}`frouros.detectors` module contains detection algorithms.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | detectors/concept_drift
9 | detectors/data_drift
10 | ```
11 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors/concept_drift.md:
--------------------------------------------------------------------------------
1 | # Concept drift
2 |
3 | The {mod}`frouros.detectors.concept_drift` module contains concept drift detection algorithms.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | concept_drift/streaming
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors/data_drift.md:
--------------------------------------------------------------------------------
1 | # Data drift
2 |
3 | The {mod}`frouros.detectors.batch` module contains data drift detection algorithms.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | data_drift/batch
9 | data_drift/streaming
10 | ```
11 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils/checks.md:
--------------------------------------------------------------------------------
1 | # Checks
2 |
3 | The {mod}`frouros.utils.checks` module contains auxiliary checks functions.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.utils.checks
7 | :members:
8 | :no-inherited-members:
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils/kernels.md:
--------------------------------------------------------------------------------
1 | # Kernels
2 |
3 | The {mod}`frouros.utils.kernels` module contains auxiliary kernel functions.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.utils.kernels
7 | :members:
8 | :no-inherited-members:
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils/stats.md:
--------------------------------------------------------------------------------
1 | # Stats
2 |
3 | The {mod}`frouros.utils.stats` module contains auxiliary stats classes or exceptions.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.utils.stats
7 | :members:
8 | :no-inherited-members:
9 | ```
10 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift streaming detection methods init."""
2 |
3 | from .distance_based import MMD
4 | from .statistical_test import IncrementalKSTest
5 |
6 | __all__ = [
7 | "IncrementalKSTest",
8 | "MMD",
9 | ]
10 |
--------------------------------------------------------------------------------
/frouros/callbacks/batch/__init__.py:
--------------------------------------------------------------------------------
1 | """Batch callbacks init."""
2 |
3 | from .permutation_test import PermutationTestDistanceBased
4 | from .reset import ResetStatisticalTest
5 |
6 | __all__ = [
7 | "PermutationTestDistanceBased",
8 | "ResetStatisticalTest",
9 | ]
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference.md:
--------------------------------------------------------------------------------
1 | # API Reference
2 |
3 | Welcome to the API reference for `frouros`.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | api_reference/callbacks
9 | api_reference/datasets
10 | api_reference/detectors
11 | api_reference/metrics
12 | api_reference/utils
13 | ```
14 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils.md:
--------------------------------------------------------------------------------
1 | # Utils
2 |
3 | The {mod}`frouros.utils` module contains auxiliary classes, functions or exceptions.
4 |
5 | ```{toctree}
6 | :maxdepth: 2
7 |
8 | utils/checks
9 | utils/data_structures
10 | utils/kernels
11 | utils/persistence
12 | utils/stats
13 | ```
14 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils/persistence.md:
--------------------------------------------------------------------------------
1 | # Persistence
2 |
3 | The {mod}`frouros.utils.persistence` module contains auxiliary functions to persistence objects.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.utils.persistence
7 | :members:
8 | :no-inherited-members:
9 | ```
10 |
--------------------------------------------------------------------------------
/frouros/callbacks/__init__.py:
--------------------------------------------------------------------------------
1 | """Callbacks init."""
2 |
3 | from .batch import PermutationTestDistanceBased, ResetStatisticalTest
4 | from .streaming import HistoryConceptDrift
5 |
6 | __all__ = [
7 | "HistoryConceptDrift",
8 | "PermutationTestDistanceBased",
9 | "ResetStatisticalTest",
10 | ]
11 |
--------------------------------------------------------------------------------
/docs/source/api_reference/utils/data_structures.md:
--------------------------------------------------------------------------------
1 | # Data Structures
2 |
3 | The {mod}`frouros.utils.data_structures` module contains auxiliary data structures classes or exceptions.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.utils.data_structures
7 | :members:
8 | :no-inherited-members:
9 | ```
10 |
--------------------------------------------------------------------------------
/docs/source/api_reference/callbacks/batch.md:
--------------------------------------------------------------------------------
1 | # Batch
2 |
3 | ```{eval-rst}
4 | .. automodule:: frouros.callbacks.batch
5 | :no-members:
6 | :no-inherited-members:
7 | ```
8 |
9 | ```{eval-rst}
10 | .. autosummary::
11 | :toctree: auto_generated/
12 | :template: class.md
13 |
14 | PermutationTestDistanceBased
15 | ResetStatisticalTest
16 | ```
17 |
--------------------------------------------------------------------------------
/docs/source/api_reference/datasets/real.md:
--------------------------------------------------------------------------------
1 | # Real
2 |
3 | ```{eval-rst}
4 | .. automodule:: frouros.datasets.real
5 | :no-members:
6 | :no-inherited-members:
7 | ```
8 |
9 | ```{currentmodule} frouros.datasets.real
10 | ```
11 |
12 | ```{eval-rst}
13 | .. autosummary::
14 | :toctree: auto_generated/
15 | :template: class.md
16 |
17 | Elec2
18 | ```
19 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/exceptions.py:
--------------------------------------------------------------------------------
1 | """Concept drift exception module."""
2 |
3 |
4 | class InvalidAverageRunLengthError(Exception):
5 | """Invalid average run length exception."""
6 |
7 |
8 | class NoFitMethodError(Exception):
9 | """Not fit method exception."""
10 |
11 |
12 | class UpdateDetectorError(Exception):
13 | """Update detector exception."""
14 |
--------------------------------------------------------------------------------
/docs/source/api_reference/datasets/synthetic.md:
--------------------------------------------------------------------------------
1 | # Synthetic
2 |
3 | ```{eval-rst}
4 | .. automodule:: frouros.datasets.synthetic
5 | :no-members:
6 | :no-inherited-members:
7 | ```
8 |
9 | ```{currentmodule} frouros.datasets.synthetic
10 | ```
11 |
12 | ```{eval-rst}
13 | .. autosummary::
14 | :toctree: auto_generated/
15 | :template: class.md
16 |
17 | SEA
18 | ```
19 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/window_based/__init__.py:
--------------------------------------------------------------------------------
1 | """Concept drift window based detection methods' init."""
2 |
3 | from .adwin import ADWIN, ADWINConfig
4 | from .kswin import KSWIN, KSWINConfig
5 | from .stepd import STEPD, STEPDConfig
6 |
7 | __all__ = [
8 | "ADWIN",
9 | "ADWINConfig",
10 | "KSWIN",
11 | "KSWINConfig",
12 | "STEPD",
13 | "STEPDConfig",
14 | ]
15 |
--------------------------------------------------------------------------------
/docs/source/api_reference/callbacks/streaming.md:
--------------------------------------------------------------------------------
1 | # Streaming
2 |
3 | The {mod}`frouros.callbacks.streaming` module contains streaming callbacks.
4 |
5 |
6 | ```{eval-rst}
7 | .. automodule:: frouros.callbacks.streaming
8 | :no-members:
9 | :no-inherited-members:
10 | ```
11 |
12 | ```{eval-rst}
13 | .. autosummary::
14 | :toctree: auto_generated/
15 | :template: class.md
16 |
17 | HistoryConceptDrift
18 | ```
19 |
--------------------------------------------------------------------------------
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
1 | name: Documentation
2 | on:
3 | pull_request_target:
4 | types:
5 | - opened
6 | # Execute this action only on PRs that touch
7 | # documentation files.
8 | # paths:
9 | # - "docs/**"
10 |
11 | permissions:
12 | pull-requests: write
13 |
14 | jobs:
15 | pull-request-links:
16 | runs-on: ubuntu-latest
17 | steps:
18 | - uses: readthedocs/actions/preview@v1
19 | with:
20 | project-slug: "frouros"
21 |
--------------------------------------------------------------------------------
/docs/source/api_reference/metrics.md:
--------------------------------------------------------------------------------
1 | # Metrics
2 |
3 | ```{eval-rst}
4 | .. automodule:: frouros.metrics
5 | :no-members:
6 | :no-inherited-members:
7 | ```
8 |
9 | ```{currentmodule} frouros.metrics
10 | ```
11 |
12 | ## Prequential Error
13 |
14 | ```{eval-rst}
15 | .. automodule:: frouros.metrics.prequential_error
16 | :no-members:
17 | :no-inherited-members:
18 | ```
19 |
20 | ```{eval-rst}
21 | .. autosummary::
22 | :toctree: auto_generated/
23 | :template: class.md
24 |
25 | PrequentialError
26 | ```
27 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/exceptions.py:
--------------------------------------------------------------------------------
1 | """Data drift exception module."""
2 |
3 |
4 | class DimensionError(Exception):
5 | """Dimension exception."""
6 |
7 |
8 | class GetStatisticalTestError(Exception):
9 | """Get statistical test exception."""
10 |
11 |
12 | class MismatchDimensionError(Exception):
13 | """Miss match dimension exception."""
14 |
15 |
16 | class MissingFitError(Exception):
17 | """Missing fit exception."""
18 |
19 |
20 | class InsufficientSamplesError(Exception):
21 | """Insufficient samples exception."""
22 |
--------------------------------------------------------------------------------
/frouros/datasets/exceptions.py:
--------------------------------------------------------------------------------
1 | """BaseDatasetDownload exception module."""
2 |
3 |
4 | class DownloadError(Exception):
5 | """Download exception."""
6 |
7 |
8 | class InvalidBlockError(Exception):
9 | """Invalid block exception."""
10 |
11 |
12 | class InvalidFilePathError(Exception):
13 | """Invalid file path exception."""
14 |
15 |
16 | class InvalidURLError(Exception):
17 | """Invalid URL exception."""
18 |
19 |
20 | class RequestFileError(Exception):
21 | """Request file exception."""
22 |
23 |
24 | class ReadFileError(Exception):
25 | """Read file exception."""
26 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |
4 | # Maintain depedencies for pip
5 | - package-ecosystem: "pip"
6 | directory: "/"
7 | schedule:
8 | interval: "daily"
9 | labels:
10 | - "dependencies"
11 | reviewers:
12 | - "jaime-cespedes-sisniega"
13 | - "alvarolopez"
14 |
15 | # Maintain depedencies for GitHub Actions
16 | - package-ecosystem: "github-actions"
17 | directory: "/"
18 | schedule:
19 | interval: "weekly"
20 | labels:
21 | - "dependencies"
22 | reviewers:
23 | - "jaime-cespedes-sisniega"
24 | - "alvarolopez"
25 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/statistical_process_control/__init__.py:
--------------------------------------------------------------------------------
1 | """Concept drift SPC (statistical process control) detection methods' init."""
2 |
3 | from .ddm import DDM, DDMConfig
4 | from .ecdd import ECDDWT, ECDDWTConfig
5 | from .eddm import EDDM, EDDMConfig
6 | from .hddm import HDDMA, HDDMW, HDDMAConfig, HDDMWConfig
7 | from .rddm import RDDM, RDDMConfig
8 |
9 | __all__ = [
10 | "DDM",
11 | "DDMConfig",
12 | "ECDDWT",
13 | "ECDDWTConfig",
14 | "EDDM",
15 | "EDDMConfig",
16 | "HDDMA",
17 | "HDDMAConfig",
18 | "HDDMW",
19 | "HDDMWConfig",
20 | "RDDM",
21 | "RDDMConfig",
22 | ]
23 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift batch statistical test detection methods' init."""
2 |
3 | from .anderson_darling import AndersonDarlingTest
4 | from .bws import BWSTest
5 | from .chisquare import ChiSquareTest
6 | from .cvm import CVMTest
7 | from .ks import KSTest
8 | from .kuiper_test import KuiperTest
9 | from .mann_whitney_u import MannWhitneyUTest
10 | from .welch_t_test import WelchTTest
11 |
12 | __all__ = [
13 | "AndersonDarlingTest",
14 | "BWSTest",
15 | "ChiSquareTest",
16 | "CVMTest",
17 | "KSTest",
18 | "KuiperTest",
19 | "MannWhitneyUTest",
20 | "WelchTTest",
21 | ]
22 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/base.py:
--------------------------------------------------------------------------------
1 | """Base concept drift streaming module."""
2 |
3 | import abc
4 | from typing import Any, Union
5 |
6 | from frouros.detectors.concept_drift.base import (
7 | BaseConceptDrift,
8 | BaseConceptDriftConfig,
9 | )
10 |
11 |
12 | class BaseConceptDriftStreamingConfig(BaseConceptDriftConfig):
13 | """Abstract class representing a concept drift streaming configuration class."""
14 |
15 |
16 | class BaseConceptDriftStreaming(BaseConceptDrift):
17 | """Abstract class representing a concept drift streaming detector."""
18 |
19 | @abc.abstractmethod
20 | def _update(self, value: Union[int, float], **kwargs: Any) -> None:
21 | pass
22 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/change_detection/__init__.py:
--------------------------------------------------------------------------------
1 | """Concept drift change detection methods' init."""
2 |
3 | from .bocd import (
4 | BOCD,
5 | BOCDConfig,
6 | )
7 | from .cusum import (
8 | CUSUM,
9 | CUSUMConfig,
10 | )
11 | from .geometric_moving_average import (
12 | GeometricMovingAverage,
13 | GeometricMovingAverageConfig,
14 | )
15 | from .page_hinkley import (
16 | PageHinkley,
17 | PageHinkleyConfig,
18 | )
19 |
20 | __all__ = [
21 | "BOCD",
22 | "BOCDConfig",
23 | "CUSUM",
24 | "CUSUMConfig",
25 | "GeometricMovingAverage",
26 | "GeometricMovingAverageConfig",
27 | "PageHinkley",
28 | "PageHinkleyConfig",
29 | ]
30 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift batch distance based detection methods' init."""
2 |
3 | from .bhattacharyya_distance import BhattacharyyaDistance
4 | from .emd import EMD
5 | from .energy_distance import EnergyDistance
6 | from .hellinger_distance import HellingerDistance
7 | from .hi_normalized_complement import HINormalizedComplement
8 | from .js import JS
9 | from .kl import KL
10 | from .mmd import MMD
11 | from .psi import PSI
12 |
13 | __all__ = [
14 | "BhattacharyyaDistance",
15 | "EMD",
16 | "EnergyDistance",
17 | "HellingerDistance",
18 | "HINormalizedComplement",
19 | "JS",
20 | "KL",
21 | "PSI",
22 | "MMD",
23 | ]
24 |
--------------------------------------------------------------------------------
/frouros/utils/kernels.py:
--------------------------------------------------------------------------------
1 | """Kernels module."""
2 |
3 | import numpy as np
4 | from scipy.spatial.distance import cdist
5 |
6 |
7 | def rbf_kernel(
8 | X: np.ndarray, # noqa: N803
9 | Y: np.ndarray,
10 | sigma: float = 1.0,
11 | ) -> np.ndarray:
12 | """Radial basis function kernel between X and Y matrices.
13 |
14 | :param X: X matrix
15 | :type X: numpy.ndarray
16 | :param Y: Y matrix
17 | :type Y: numpy.ndarray
18 | :param sigma: sigma value (equivalent to gamma = 1 / (2 * sigma**2))
19 | :type sigma: float
20 | :return: Radial basis kernel matrix
21 | :rtype: numpy.ndarray
22 | """
23 | return np.exp(-cdist(X, Y, "sqeuclidean") / (2 * sigma**2))
24 |
--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |
5 |
6 | #### Reference Issues/PRs
7 |
13 |
14 |
15 | #### What does this implement/fix? Explain your changes.
16 |
17 |
18 | #### Any other comments?
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/source/installation.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 | `frouros` currently supports Python 3.9, 3.10, 3.11 and 3.12.
4 |
5 | ```{tip}
6 | We highly recommend to use a [virtual environment](https://docs.python.org/3.12/tutorial/venv.html).
7 | ```
8 |
9 | ## From PyPI
10 |
11 | `frouros` releases are available via [PyPI](https://pypi.org/project/frouros/). To install the normal version via `pip` use:
12 |
13 | ```{code-block} bash
14 | pip install frouros
15 | ```
16 |
17 | ```{tip}
18 | Recommended method.
19 | ```
20 |
21 | ## From GitHub
22 |
23 | `frouros` latest main branch modifications can also be obtained via `pip`:
24 |
25 | ```{code-block} bash
26 | pip install git+https://github.com/IFCA/frouros.git
27 | ```
28 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/window_based/base.py:
--------------------------------------------------------------------------------
1 | """Base concept drift window based module."""
2 |
3 | import abc
4 | from typing import Any, Union
5 |
6 | from frouros.detectors.concept_drift.streaming.base import (
7 | BaseConceptDriftStreaming,
8 | BaseConceptDriftStreamingConfig,
9 | )
10 |
11 |
12 | class BaseWindowConfig(BaseConceptDriftStreamingConfig):
13 | """Class representing a window based configuration class."""
14 |
15 |
16 | class BaseWindow(BaseConceptDriftStreaming):
17 | """Abstract class representing a window based."""
18 |
19 | config_type = BaseWindowConfig
20 |
21 | @abc.abstractmethod
22 | def _update(self, value: Union[int, float], **kwargs: Any) -> None:
23 | pass
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Suggest a new algorithm, enhancement to an existing algorithm, etc.
3 | labels: ["enhancement", "needs triage"]
4 |
5 | body:
6 | - type: textarea
7 | attributes:
8 | label: Describe the workflow you want to enable
9 | validations:
10 | required: true
11 | - type: textarea
12 | attributes:
13 | label: Describe your proposed solution
14 | validations:
15 | required: true
16 | - type: textarea
17 | attributes:
18 | label: Describe alternatives you've considered, if relevant
19 | - type: textarea
20 | attributes:
21 | label: Additional context
22 | - type: markdown
23 | attributes:
24 | value: >
25 | Thanks for contributing 🎉! We will review your proposal as soon as possible.
26 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yaml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Set the version of Python and other tools you might need
9 | build:
10 | os: ubuntu-22.04
11 | tools:
12 | python: "3.9"
13 |
14 | # Build documentation in the docs/ directory with Sphinx
15 | sphinx:
16 | configuration: docs/source/conf.py
17 | # Right now there are several unmanaged warnings. Change it to true when there are no warnings.
18 | fail_on_warning: false
19 |
20 | # Optionally declare the Python requirements required to build your docs
21 | python:
22 | # Install our python package before building the docs
23 | install:
24 | - method: pip
25 | path: .
26 | extra_requirements:
27 | - docs
28 | - notebooks
29 |
--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
1 |
Welcome to Frouros
2 |
3 | ```{include} ../../README.md
4 | :start-after: ---
5 | :end-before: ⚡️ Quickstart
6 | ```
7 |
8 | In order to start using `frouros`, we highly recommend to check {doc}`concepts ` and
9 | {doc}`FAQ ` sections to get a quick idea of what `frouros` is capable of, and what it is not yet capable
10 | of. Subsequently, we recommend taking a look at the {doc}`examples ` section since it is the best way to
11 | start using `frouros`.
12 |
13 | Read {doc}`installation ` instructions to start using `frouros`.
14 |
15 | ```{include} ../../README.md
16 | :start-after: ----
17 | ```
18 |
19 | ```{warning}
20 | This library and its documentation are under heavy development.
21 | ```
22 |
23 | ```{toctree}
24 | :maxdepth: 1
25 | :hidden:
26 |
27 | installation
28 | concepts
29 | api_reference
30 | examples
31 | contribute
32 | faq
33 | ```
34 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift batch detection methods init."""
2 |
3 | from .distance_based import (
4 | EMD,
5 | JS,
6 | KL,
7 | MMD,
8 | PSI,
9 | BhattacharyyaDistance,
10 | EnergyDistance,
11 | HellingerDistance,
12 | HINormalizedComplement,
13 | )
14 | from .statistical_test import (
15 | AndersonDarlingTest,
16 | BWSTest,
17 | ChiSquareTest,
18 | CVMTest,
19 | KSTest,
20 | KuiperTest,
21 | MannWhitneyUTest,
22 | WelchTTest,
23 | )
24 |
25 | __all__ = [
26 | "AndersonDarlingTest",
27 | "BWSTest",
28 | "BhattacharyyaDistance",
29 | "ChiSquareTest",
30 | "CVMTest",
31 | "EMD",
32 | "EnergyDistance",
33 | "HellingerDistance",
34 | "HINormalizedComplement",
35 | "JS",
36 | "KL",
37 | "KSTest",
38 | "KuiperTest",
39 | "PSI",
40 | "MannWhitneyUTest",
41 | "MMD",
42 | "WelchTTest",
43 | ]
44 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.https://www.sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/frouros/utils/checks.py:
--------------------------------------------------------------------------------
1 | """Checks module."""
2 |
3 | from typing import Any
4 |
5 | from frouros.callbacks.base import BaseCallback
6 |
7 |
8 | def check_callbacks(
9 | callbacks: Any,
10 | expected_cls: BaseCallback,
11 | ) -> None:
12 | """Check callbacks.
13 |
14 | :param callbacks: callbacks
15 | :type callbacks: Any
16 | :param expected_cls: expected callback class
17 | :type expected_cls: BaseCallback
18 | :raises TypeError: Type error exception
19 | """
20 | if not (
21 | callbacks is None
22 | or isinstance(callbacks, expected_cls) # type: ignore
23 | or (
24 | isinstance(callbacks, list)
25 | and all(isinstance(item, expected_cls) for item in callbacks) # type: ignore # noqa: E501
26 | )
27 | ):
28 | raise TypeError(
29 | f"callbacks must be of type None, "
30 | f"{expected_cls.name} or a list of {expected_cls.name}."
31 | )
32 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/__init__.py:
--------------------------------------------------------------------------------
1 | """Data drift detection methods init."""
2 |
3 | from .batch import ( # noqa: F401
4 | EMD,
5 | JS,
6 | KL,
7 | MMD,
8 | PSI,
9 | AndersonDarlingTest,
10 | BhattacharyyaDistance,
11 | BWSTest,
12 | ChiSquareTest,
13 | CVMTest,
14 | EnergyDistance,
15 | HellingerDistance,
16 | HINormalizedComplement,
17 | KSTest,
18 | KuiperTest,
19 | MannWhitneyUTest,
20 | WelchTTest,
21 | )
22 | from .streaming import MMD as MMDStreaming
23 | from .streaming import IncrementalKSTest # noqa: N811
24 |
25 | __all__ = [
26 | "AndersonDarlingTest",
27 | "BhattacharyyaDistance",
28 | "ChiSquareTest",
29 | "CVMTest",
30 | "EMD",
31 | "EnergyDistance",
32 | "HellingerDistance",
33 | "HINormalizedComplement",
34 | "IncrementalKSTest",
35 | "JS",
36 | "KL",
37 | "KSTest",
38 | "KuiperTest",
39 | "PSI",
40 | "MannWhitneyUTest",
41 | "MMDStreaming",
42 | "WelchTTest",
43 | ]
44 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors/data_drift/streaming.md:
--------------------------------------------------------------------------------
1 | # Streaming
2 |
3 | The {mod}`frouros.detectors.data_drift.streaming` module contains streaming data drift detection algorithms.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.detectors.data_drift.streaming
7 | :no-members:
8 | :no-inherited-members:
9 | ```
10 |
11 | ```{currentmodule} frouros.detectors.data_drift.streaming
12 | ```
13 |
14 | ## Distance Based
15 |
16 | ```{eval-rst}
17 | .. automodule:: frouros.detectors.data_drift.streaming.distance_based
18 | :no-members:
19 | :no-inherited-members:
20 | ```
21 |
22 | ```{eval-rst}
23 | .. autosummary::
24 | :toctree: auto_generated/
25 | :template: class.md
26 |
27 | MMD
28 | ```
29 |
30 | ## Statistical Test
31 |
32 | ```{eval-rst}
33 | .. automodule:: frouros.detectors.data_drift.streaming.statistical_test
34 | :no-members:
35 | :no-inherited-members:
36 | ```
37 |
38 | ```{eval-rst}
39 | .. autosummary::
40 | :toctree: auto_generated/
41 | :template: class.md
42 |
43 | IncrementalKSTest
44 | ```
45 |
--------------------------------------------------------------------------------
/frouros/utils/decorators.py:
--------------------------------------------------------------------------------
1 | """Decorators module."""
2 |
3 | import functools
4 | import os
5 | from typing import Any
6 |
7 | import pytest
8 |
9 |
10 | def set_os_filename(base_filename: str) -> Any:
11 | """Set OS filename.
12 |
13 | :param base_filename: Base filename
14 | :type base_filename: str
15 | :return: Decorator
16 | :rtype: Any
17 | """
18 |
19 | def decorator(func: Any) -> Any:
20 | if os.name == "nt": # Windows
21 | temp_dir = os.environ.get("TEMP") or os.environ.get("TMP")
22 | filename = f"{temp_dir}\\{base_filename}"
23 | elif os.name == "posix": # Linux or macOS
24 | temp_dir = "/tmp"
25 | filename = f"{temp_dir}/{base_filename}"
26 | else:
27 | raise Exception("Unsupported operating system.")
28 |
29 | @functools.wraps(func)
30 | @pytest.mark.filename(filename)
31 | def wrapper(*args: Any, **kwargs: Any) -> Any:
32 | func(*args, **kwargs)
33 |
34 | return wrapper
35 |
36 | return decorator
37 |
--------------------------------------------------------------------------------
/.github/workflows/code_coverage.yml:
--------------------------------------------------------------------------------
1 | name: Code Coverage
2 |
3 | on:
4 | pull_request:
5 | types:
6 | - closed
7 |
8 | jobs:
9 | code-coverage:
10 | if: github.event.pull_request.merged == true
11 | runs-on: ubuntu-22.04
12 | steps:
13 | - name: Checkout
14 | uses: actions/checkout@v4
15 |
16 | - name: Set up Python 3.9
17 | uses: actions/setup-python@v5
18 | with:
19 | python-version: '3.9'
20 |
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | python -m pip install setuptools==68.2.2 wheel==0.42.0 tox==3.24.5
25 | python -m venv .venv
26 | source .venv/bin/activate
27 | pip install -e .
28 |
29 | - name: Run tox
30 | run: tox -e py39
31 |
32 | - name: Upload coverage to Codecov
33 | uses: codecov/codecov-action@v5
34 | with:
35 | token: ${{ secrets.CODECOV_TOKEN }}
36 | fail_ci_if_error: true
37 | override_branch: main
38 | verbose: true
39 |
--------------------------------------------------------------------------------
/.codecov.yml:
--------------------------------------------------------------------------------
1 | # For more configuration details:
2 | # https://docs.codecov.io/docs/codecov-yaml
3 |
4 | # Check if this file is valid by running in bash:
5 | # curl --data-binary @.codecov.yml https://codecov.io/validate
6 |
7 | # Coverage configuration
8 | # ----------------------
9 | codecov:
10 | require_ci_to_pass: yes
11 |
12 | coverage:
13 | precision: 5 # Number of decimal places, between 0 and 5
14 | round: nearest # up, down, or nearest (default is down)
15 | range: 70..95 # First number represents red, and second represents green
16 | # (default is 70..100)
17 |
18 | github_checks:
19 | annotations: true
20 |
21 | # Pull request comments:
22 | # ----------------------
23 | # Diff is the Coverage Diff of the pull request.
24 | # Files are the files impacted by the pull request
25 | comment:
26 | layout: diff, files # accepted in any order: reach, diff, flags, and/or files
27 |
28 | # Ignoring Paths
29 | # --------------
30 | # which folders/files to ignore
31 | ignore:
32 | - "*/tests/.*"
33 | - "setup.cfg"
34 | - "setup.py"
35 | - "pyproject.toml"
36 |
--------------------------------------------------------------------------------
/frouros/callbacks/streaming/base.py:
--------------------------------------------------------------------------------
1 | """Base callback streaming module."""
2 |
3 | import abc
4 | from typing import Union
5 |
6 | from frouros.callbacks.base import BaseCallback
7 |
8 |
9 | class BaseCallbackStreaming(BaseCallback):
10 | """Callback streaming class."""
11 |
12 | def on_update_start(self, value: Union[int, float]) -> None:
13 | """On update start method.
14 |
15 | :param value: value used to update the detector
16 | :type value: Union[int, float]
17 | """
18 |
19 | def on_update_end(self, value: Union[int, float]) -> None:
20 | """On update end method.
21 |
22 | :param value: value used to update the detector
23 | :type value: Union[int, float]
24 | """
25 |
26 | # FIXME: set_detector method as a workaround to # pylint: disable=fixme
27 | # avoid circular import problem. Make it an abstract method and
28 | # uncomment commented code when it is solved
29 |
30 | # @abc.abstractmethod
31 | # def set_detector(self, detector) -> None:
32 | # """Set detector method."""
33 |
34 | @abc.abstractmethod
35 | def reset(self) -> None:
36 | """Reset method."""
37 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | default_language_version:
2 | python: python3.10
3 | repos:
4 | - repo: https://github.com/pre-commit/pre-commit-hooks
5 | rev: v5.0.0
6 | hooks:
7 | - id: check-added-large-files
8 | - id: check-docstring-first
9 | - id: check-merge-conflict
10 | - id: check-yaml
11 | - id: debug-statements
12 | - id: end-of-file-fixer
13 | - id: no-commit-to-branch
14 | args: ["--branch", "main"]
15 | - id: trailing-whitespace
16 | - repo: https://github.com/astral-sh/ruff-pre-commit
17 | rev: v0.8.1
18 | hooks:
19 | - id: ruff
20 | types_or: [
21 | python,
22 | pyi,
23 | jupyter,
24 | ]
25 | args: [--fix]
26 | - id: ruff-format
27 | types_or: [
28 | python,
29 | pyi,
30 | jupyter,
31 | ]
32 | - repo: https://github.com/pre-commit/mirrors-mypy
33 | rev: v1.13.0
34 | hooks:
35 | - id: mypy
36 | args: [
37 | --config-file,
38 | pyproject.toml,
39 | ]
40 | additional_dependencies: [
41 | types-requests<2.33,
42 | types-toml<0.11,
43 | types-tqdm<4.68,
44 | ]
45 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/__init__.py:
--------------------------------------------------------------------------------
1 | """Concept drift detection methods init."""
2 |
3 | from .streaming import (
4 | ADWIN,
5 | BOCD,
6 | CUSUM,
7 | DDM,
8 | ECDDWT,
9 | EDDM,
10 | HDDMA,
11 | HDDMW,
12 | KSWIN,
13 | RDDM,
14 | STEPD,
15 | ADWINConfig,
16 | BOCDConfig,
17 | CUSUMConfig,
18 | DDMConfig,
19 | ECDDWTConfig,
20 | EDDMConfig,
21 | GeometricMovingAverage,
22 | GeometricMovingAverageConfig,
23 | HDDMAConfig,
24 | HDDMWConfig,
25 | KSWINConfig,
26 | PageHinkley,
27 | PageHinkleyConfig,
28 | RDDMConfig,
29 | STEPDConfig,
30 | )
31 |
32 | __all__ = [
33 | "ADWIN",
34 | "ADWINConfig",
35 | "BOCD",
36 | "BOCDConfig",
37 | "CUSUM",
38 | "CUSUMConfig",
39 | "DDM",
40 | "DDMConfig",
41 | "ECDDWT",
42 | "ECDDWTConfig",
43 | "EDDM",
44 | "EDDMConfig",
45 | "GeometricMovingAverage",
46 | "GeometricMovingAverageConfig",
47 | "HDDMA",
48 | "HDDMAConfig",
49 | "HDDMW",
50 | "HDDMWConfig",
51 | "KSWIN",
52 | "KSWINConfig",
53 | "PageHinkley",
54 | "PageHinkleyConfig",
55 | "RDDM",
56 | "RDDMConfig",
57 | "STEPD",
58 | "STEPDConfig",
59 | ]
60 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | PREPROCESS = sphinx-apidoc
7 | APIDOCTEMPLATEDIR = source/_templates/apidoc
8 | AUTOGENDIR = source/api_reference/auto_generated
9 | SPHINXOPTS =
10 | SPHINXBUILD = sphinx-build
11 | SOURCEDIR = source
12 | BUILDDIR = _build
13 | PROJECTDIR = "../frouros/"
14 |
15 | # exclude unit tests from API reference
16 | EXCLUDEPATHS = "../*tests* ../*test* ../*contrib* ../*setup* ../*all*"
17 |
18 | # Put it first so that "make" without argument is like "make help".
19 | help:
20 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
22 | .PHONY: help Makefile
23 |
24 | # Catch-all target: route all unknown targets to Sphinx using the new
25 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
26 | %: Makefile
27 | rm -rf $(AUTOGENDIR)
28 | @$(PREPROCESS) -eMT --templatedir $(APIDOCTEMPLATEDIR) -o $(AUTOGENDIR) $(PROJECTDIR) "$(EXCLUDEPATHS)"
29 | -rm -rf $(BUILDDIR)
30 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
31 |
32 | livehtml:
33 | sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" --port $(SPHINXPORT) $(O)
34 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors/data_drift/batch.md:
--------------------------------------------------------------------------------
1 | # Batch
2 |
3 | The {mod}`frouros.detectors.data_drift.batch` module contains batch data drift detection algorithms.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.detectors.data_drift.batch
7 | :no-members:
8 | :no-inherited-members:
9 | ```
10 |
11 | ```{currentmodule} frouros.detectors.data_drift.batch
12 | ```
13 |
14 | ## Distance Based
15 |
16 | ```{eval-rst}
17 | .. automodule:: frouros.detectors.data_drift.batch.distance_based
18 | :no-members:
19 | :no-inherited-members:
20 | ```
21 |
22 | ```{eval-rst}
23 | .. autosummary::
24 | :toctree: auto_generated/
25 | :template: class.md
26 |
27 | BhattacharyyaDistance
28 | EMD
29 | EnergyDistance
30 | HellingerDistance
31 | HINormalizedComplement
32 | JS
33 | KL
34 | MMD
35 | PSI
36 | ```
37 |
38 | ## Statistical Test
39 |
40 | ```{eval-rst}
41 | .. automodule:: frouros.detectors.data_drift.batch.statistical_test
42 | :no-members:
43 | :no-inherited-members:
44 | ```
45 |
46 | ```{eval-rst}
47 | .. autosummary::
48 | :toctree: auto_generated/
49 | :template: class.md
50 |
51 | AndersonDarlingTest
52 | BWSTest
53 | ChiSquareTest
54 | CVMTest
55 | KSTest
56 | KuiperTest
57 | MannWhitneyUTest
58 | WelchTTest
59 | ```
60 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/__init__.py:
--------------------------------------------------------------------------------
1 | """Concept drift streaming detection methods init."""
2 |
3 | # FIXME: Remove pylint disable if batch methods are added
4 | # pylint: skip-file
5 | from .change_detection import (
6 | BOCD,
7 | CUSUM,
8 | BOCDConfig,
9 | CUSUMConfig,
10 | GeometricMovingAverage,
11 | GeometricMovingAverageConfig,
12 | PageHinkley,
13 | PageHinkleyConfig,
14 | )
15 | from .statistical_process_control import (
16 | DDM,
17 | ECDDWT,
18 | EDDM,
19 | HDDMA,
20 | HDDMW,
21 | RDDM,
22 | DDMConfig,
23 | ECDDWTConfig,
24 | EDDMConfig,
25 | HDDMAConfig,
26 | HDDMWConfig,
27 | RDDMConfig,
28 | )
29 | from .window_based import (
30 | ADWIN,
31 | KSWIN,
32 | STEPD,
33 | ADWINConfig,
34 | KSWINConfig,
35 | STEPDConfig,
36 | )
37 |
38 | __all__ = [
39 | "ADWIN",
40 | "ADWINConfig",
41 | "BOCD",
42 | "BOCDConfig",
43 | "CUSUM",
44 | "CUSUMConfig",
45 | "DDM",
46 | "DDMConfig",
47 | "ECDDWT",
48 | "ECDDWTConfig",
49 | "EDDM",
50 | "EDDMConfig",
51 | "GeometricMovingAverage",
52 | "GeometricMovingAverageConfig",
53 | "HDDMA",
54 | "HDDMAConfig",
55 | "HDDMW",
56 | "HDDMWConfig",
57 | "KSWIN",
58 | "KSWINConfig",
59 | "PageHinkley",
60 | "PageHinkleyConfig",
61 | "RDDM",
62 | "RDDMConfig",
63 | "STEPD",
64 | "STEPDConfig",
65 | ]
66 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/base.py:
--------------------------------------------------------------------------------
1 | """Base data drift statistical test module."""
2 |
3 | import abc
4 | from collections import namedtuple
5 | from typing import Any, Tuple
6 |
7 | import numpy as np
8 |
9 | from frouros.detectors.data_drift.batch.base import BaseDataDriftBatch
10 |
11 | StatisticalResult = namedtuple("StatisticalResult", ["statistic", "p_value"])
12 |
13 |
14 | class BaseStatisticalTest(BaseDataDriftBatch):
15 | """Abstract class representing a statistical test."""
16 |
17 | def _apply_method(
18 | self,
19 | X_ref: np.ndarray, # noqa: N803
20 | X: np.ndarray,
21 | **kwargs: Any,
22 | ) -> Tuple[float, float]:
23 | statistical_test = self._statistical_test(
24 | X_ref=X_ref,
25 | X=X,
26 | **kwargs,
27 | )
28 | return statistical_test
29 |
30 | def _compare(
31 | self,
32 | X: np.ndarray, # noqa: N803
33 | **kwargs: Any,
34 | ) -> StatisticalResult:
35 | self._common_checks() # noqa: N806
36 | self._specific_checks(X=X) # noqa: N806
37 | result = self._get_result(X=X, **kwargs)
38 | return result # type: ignore
39 |
40 | @staticmethod
41 | @abc.abstractmethod
42 | def _statistical_test(
43 | X_ref: np.ndarray, # noqa: N803
44 | X: np.ndarray,
45 | **kwargs: Any,
46 | ) -> StatisticalResult:
47 | pass
48 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - "*"
7 |
8 | jobs:
9 | version:
10 | runs-on: ${{ matrix.os }}
11 | strategy:
12 | matrix:
13 | os: [
14 | ubuntu-22.04,
15 | windows-2022,
16 | macos-14,
17 | ]
18 | python-version: [
19 | '3.9',
20 | '3.10',
21 | '3.11',
22 | '3.12',
23 | ]
24 |
25 | steps:
26 | - name: Checkout
27 | uses: actions/checkout@v4
28 |
29 | - name: Set up Python ${{ matrix.python-version }}
30 | uses: actions/setup-python@v5
31 | with:
32 | python-version: ${{ matrix.python-version }}
33 |
34 | - name: Install dependencies
35 | run: |
36 | python -m pip install --upgrade pip
37 | python -m pip install setuptools==68.2.2 tox==4.12.0 tox-gh-actions==3.2.0
38 |
39 | - name: Create virtual environment
40 | run: |
41 | python -m venv .venv
42 |
43 | - name: Activate virtual environment (Windows)
44 | if: matrix.os == 'windows-2022'
45 | run: |
46 | .venv\Scripts\activate
47 |
48 | - name: Activate virtual environment (Linux/macOS)
49 | if: matrix.os != 'windows-2022'
50 | run: |
51 | source .venv/bin/activate
52 |
53 | - name: Install frouros
54 | run: |
55 | pip install "pytest>=7.4.4,<7.5"
56 | pip install -e .
57 |
58 | - name: Run tox
59 | run: tox
60 |
--------------------------------------------------------------------------------
/frouros/callbacks/batch/base.py:
--------------------------------------------------------------------------------
1 | """Base callback batch module."""
2 |
3 | import abc
4 | from typing import Any
5 |
6 | import numpy as np
7 |
8 | from frouros.callbacks.base import BaseCallback
9 |
10 |
11 | class BaseCallbackBatch(BaseCallback):
12 | """Callback batch class."""
13 |
14 | def on_compare_start(
15 | self,
16 | X_ref: np.ndarray, # noqa: N803
17 | X_test: np.ndarray,
18 | ) -> None:
19 | """On compare start method.
20 |
21 | :param X_ref: reference data
22 | :type X_ref: numpy.ndarray
23 | :param X_test: test data
24 | :type X_test: numpy.ndarray
25 | """
26 |
27 | def on_compare_end(
28 | self,
29 | result: Any,
30 | X_ref: np.ndarray, # noqa: N803
31 | X_test: np.ndarray,
32 | ) -> None:
33 | """On compare end method.
34 |
35 | :param result: result obtained from the `compare` method
36 | :type result: Any
37 | :param X_ref: reference data
38 | :type X_ref: numpy.ndarray
39 | :param X_test: test data
40 | :type X_test: numpy.ndarray
41 | """
42 |
43 | # FIXME: set_detector method as a workaround to # pylint: disable=fixme
44 | # avoid circular import problem. Make it an abstract method and
45 | # uncomment commented code when it is solved
46 |
47 | # @abc.abstractmethod
48 | # def set_detector(self, detector) -> None:
49 | # """Set detector method."""
50 |
51 | @abc.abstractmethod
52 | def reset(self) -> None:
53 | """Reset method."""
54 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2022, Spanish National Research Council (CSIC)
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to contribute
2 |
3 | Frouros is an open-source project. Anyone with an interest in the project can join the community and contribute to it in different ways. The following sections describe how you can contribute.
4 |
5 | ## Adding a feature or solving a bug
6 |
7 | Recommended steps for first time contributors:
8 |
9 | 1. Fork repository on GitHub.
10 | 2. Set up develop environment (it is not mandatory, but we highly recommend the use of a [virtual environment](https://docs.python.org/3.11/library/venv.html)):
11 | ```bash
12 | python3 -m venv .venv
13 | source .venv/bin/activate
14 | ```
15 | 3. Download and install development version:
16 | ```bash
17 | git clone https://github.com//frouros ## Replace with your GitHub account
18 | cd frouros
19 | git checkout main
20 | git pull
21 | pip install -e '.[dev]'
22 | ```
23 | 4. (Optional but recommended) Install pre-commit hooks:
24 | ```bash
25 | pip install pre-commit
26 | pre-commit install
27 | ```
28 | 5. After adding and committing your fix or feature, ensure that code coverage is at least 90% (otherwise the PR will be rejected) and that linting is successfully executed using the following command:
29 | ```bash
30 | tox
31 | ```
32 | 6. Create a pull request to the original repository.
33 |
34 | ## Reporting a bug
35 |
36 | 1. Check that there is not an [issue](https://github.com/IFCA-Advanced-Computing/frouros/issues) that currently highlights the bug or a [pull request](https://github.com/IFCA-Advanced-Computing/frouros/pulls) that solves it.
37 | 2. Create an [issue](https://github.com/IFCA-Advanced-Computing/frouros/issues/new) in GitHub.
38 |
--------------------------------------------------------------------------------
/frouros/tests/unit/metrics/test_prequential_error.py:
--------------------------------------------------------------------------------
1 | """Test prequential error module."""
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from frouros.metrics import PrequentialError
7 |
8 |
9 | def error_scorer(y_true: np.ndarray, y_pred: np.ndarray) -> float:
10 | """Error scorer function.
11 |
12 | :param y_true: ground-truth values
13 | :type: numpy.ndarray
14 | :param y_pred: predicted values
15 | :type: numpy.ndarray
16 | :return error value
17 | :rtype float
18 | """
19 | return 1 - int(y_true == y_pred)
20 |
21 |
22 | @pytest.mark.parametrize(
23 | "prequential_error, expected_performance",
24 | [
25 | (PrequentialError(alpha=1.0), 0.5),
26 | (
27 | PrequentialError(
28 | alpha=0.9999,
29 | ),
30 | 0.50000833,
31 | ),
32 | ],
33 | )
34 | def test_prequential_error(
35 | prequential_error: PrequentialError, expected_performance: float
36 | ) -> None:
37 | """Test prequential error.
38 |
39 | :param prequential_error: prequential error metric
40 | :type prequential_error: PrequentialError
41 | :param expected_performance: expected performance value
42 | :type expected_performance: float
43 | """
44 | y_true = [True, True, False, True, False, True]
45 | y_pred = [True, False, False, False, True, True]
46 | for y_true_sample, y_pred_sample in zip(y_true, y_pred):
47 | error_value = error_scorer(
48 | y_true=np.array([y_true_sample]),
49 | y_pred=np.array([y_pred_sample]),
50 | )
51 | performance = prequential_error(error_value=error_value)
52 |
53 | assert performance == pytest.approx(expected_performance)
54 |
--------------------------------------------------------------------------------
/frouros/metrics/base.py:
--------------------------------------------------------------------------------
1 | """Base metrics module."""
2 |
3 | import abc
4 | from typing import Optional
5 |
6 |
7 | class BaseMetric(abc.ABC):
8 | """Abstract class representing a metric."""
9 |
10 | def __init__(self, name: Optional[str] = None) -> None:
11 | """Init method.
12 |
13 | :param name: name value
14 | :type name: Optional[str]
15 | """
16 | self.name = type(self).__name__ if name is None else name
17 |
18 | @property
19 | def name(self) -> str:
20 | """Metrics' name property.
21 |
22 | :return: metrics' name
23 | :rtype: str
24 | """
25 | return self._name
26 |
27 | @name.setter
28 | def name(self, value: str) -> None:
29 | """Metrics' name setter.
30 |
31 | :param value: value to be set
32 | :type value: str
33 | :raises TypeError: Type error exception
34 | """
35 | if not isinstance(value, str):
36 | raise TypeError("value must be of type str.")
37 | self._name = value
38 |
39 | @abc.abstractmethod
40 | def __call__(
41 | self,
42 | error_value: float,
43 | ) -> float:
44 | """__call__ method that updates the metric error.
45 |
46 | :param error_value: error value
47 | :type error_value: float
48 | :return: cumulative error
49 | :rtype: Union[int, float]
50 | """
51 |
52 | @abc.abstractmethod
53 | def reset(self) -> None:
54 | """Reset method."""
55 |
56 | def __repr__(self) -> str:
57 | """Repr method.
58 |
59 | :return: repr value
60 | :rtype: str
61 | """
62 | return f"{self.__class__.__name__}(name='{self.name}')"
63 |
--------------------------------------------------------------------------------
/frouros/tests/unit/callbacks/batch/test_permutation.py:
--------------------------------------------------------------------------------
1 | """Test permutation module."""
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from frouros.utils.stats import permutation
7 |
8 |
9 | def statistic(X: np.ndarray, Y: np.ndarray) -> float: # noqa: N803
10 | """Statistic method.
11 |
12 | :param X: X data
13 | :type X: numpy.ndarray
14 | :param Y: Y data
15 | :type Y: numpy.ndarray
16 | :return: statistic
17 | :rtype: float
18 | """
19 | return np.abs(X.mean() - Y.mean())
20 |
21 |
22 | @pytest.mark.parametrize(
23 | "X, Y, expected_num_permutations, expected_permutation_mean",
24 | [
25 | (np.array([1, 2, 3]), np.array([10, 20, 30]), 720, 7.6),
26 | (np.array([*range(1, 11)]), np.array([*range(1, 101, 10)]), 1000, 10.3654),
27 | ],
28 | )
29 | def test_permutation(
30 | X: np.ndarray, # noqa: N803
31 | Y: np.ndarray,
32 | expected_num_permutations: int,
33 | expected_permutation_mean: float,
34 | ) -> None:
35 | """Test permutation method.
36 |
37 | :param X: X data
38 | :type X: numpy.ndarray
39 | :param Y: Y data
40 | :type Y: numpy.ndarray
41 | :param expected_num_permutations: expected number of permutations
42 | :type expected_num_permutations: int
43 | :param expected_permutation_mean: expected permutation mean
44 | :type expected_permutation_mean: float
45 | """
46 | permutations, _ = permutation(
47 | X=X,
48 | Y=Y,
49 | statistic=statistic,
50 | statistical_args={},
51 | num_permutations=1000,
52 | num_jobs=1,
53 | random_state=31,
54 | )
55 |
56 | assert len(permutations) == expected_num_permutations
57 | assert np.isclose(np.array(permutations).mean(), expected_permutation_mean)
58 |
--------------------------------------------------------------------------------
/docs/source/api_reference/detectors/concept_drift/streaming.md:
--------------------------------------------------------------------------------
1 | # Streaming
2 |
3 | The {mod}`frouros.detectors.concept_drift.streaming` module contains streaming concept drift detection algorithms.
4 |
5 | ```{eval-rst}
6 | .. automodule:: frouros.detectors.concept_drift.streaming
7 | :no-members:
8 | :no-inherited-members:
9 | ```
10 |
11 | ```{currentmodule} frouros.detectors.concept_drift.streaming
12 | ```
13 |
14 | ## Change Detection
15 |
16 | ```{eval-rst}
17 | .. automodule:: frouros.detectors.concept_drift.streaming.change_detection
18 | :no-members:
19 | :no-inherited-members:
20 | ```
21 |
22 | ```{eval-rst}
23 | .. autosummary::
24 | :toctree: auto_generated/
25 | :template: class.md
26 |
27 | BOCD
28 | BOCDConfig
29 | CUSUM
30 | CUSUMConfig
31 | GeometricMovingAverage
32 | GeometricMovingAverageConfig
33 | PageHinkley
34 | PageHinkleyConfig
35 | ```
36 |
37 | ## Statistical Process Control
38 |
39 | ```{eval-rst}
40 | .. automodule:: frouros.detectors.concept_drift.streaming.statistical_process_control
41 | :no-members:
42 | :no-inherited-members:
43 | ```
44 |
45 | ```{eval-rst}
46 | .. autosummary::
47 | :toctree: auto_generated/
48 | :template: class.md
49 |
50 | DDM
51 | DDMConfig
52 | ECDDWT
53 | ECDDWTConfig
54 | EDDM
55 | EDDMConfig
56 | HDDMA
57 | HDDMAConfig
58 | HDDMW
59 | HDDMWConfig
60 | RDDM
61 | RDDMConfig
62 | ```
63 |
64 | ## Window Based
65 |
66 | ```{eval-rst}
67 | .. automodule:: frouros.detectors.concept_drift.streaming.window_based
68 | :no-members:
69 | :no-inherited-members:
70 | ```
71 |
72 | ```{eval-rst}
73 | .. autosummary::
74 | :toctree: auto_generated/
75 | :template: class.md
76 |
77 | ADWIN
78 | ADWINConfig
79 | KSWIN
80 | KSWINConfig
81 | STEPD
82 | STEPDConfig
83 | ```
84 |
--------------------------------------------------------------------------------
/frouros/tests/integration/test_synthetic.py:
--------------------------------------------------------------------------------
1 | """Test synthetic datasets module."""
2 |
3 | from typing import Any
4 |
5 | import pytest
6 |
7 | from frouros.datasets.exceptions import InvalidBlockError
8 | from frouros.datasets.synthetic import SEA
9 |
10 |
11 | # SEA tests
12 | @pytest.mark.parametrize("seed", [-1, "a"])
13 | def test_sea_invalid_seed_error(seed: Any) -> None:
14 | """Test SEA invalid seed error.
15 |
16 | :param seed: seed value
17 | :type seed: Any
18 | """
19 | with pytest.raises((TypeError, ValueError)):
20 | _ = SEA(seed=seed)
21 |
22 |
23 | @pytest.mark.parametrize("block", [0, 5])
24 | def test_sea_invalid_block_error(sea: SEA, block: int) -> None:
25 | """Test SEA invalid block error.
26 |
27 | :param sea: SEA generator
28 | :type sea: SEA
29 | :param block: block to generate samples from
30 | :type block: int
31 | """
32 | with pytest.raises(InvalidBlockError):
33 | sea.generate_dataset(block=block)
34 |
35 |
36 | @pytest.mark.parametrize("noise", [-0.1, 1.1])
37 | def test_sea_invalid_noise_error(sea: SEA, noise: float) -> None:
38 | """Test SEA invalid noise error.
39 |
40 | :param sea: SEA generator
41 | :type sea: SEA
42 | :param noise: ratio of samples with a noisy class
43 | :type noise: float
44 | """
45 | with pytest.raises(ValueError):
46 | sea.generate_dataset(block=1, noise=noise)
47 |
48 |
49 | @pytest.mark.parametrize("num_samples", [-1, 0])
50 | def test_sea_invalid_num_samples_error(sea: SEA, num_samples: int) -> None:
51 | """Test SEA invalid number of samples error.
52 |
53 | :param sea: SEA generator
54 | :type sea: SEA
55 | :param num_samples: number of samples to generate
56 | :type num_samples: int
57 | """
58 | with pytest.raises(ValueError):
59 | sea.generate_dataset(block=1, num_samples=num_samples)
60 |
--------------------------------------------------------------------------------
/frouros/tests/integration/test_real.py:
--------------------------------------------------------------------------------
1 | """Test real datasets module."""
2 |
3 | import sys
4 |
5 | import numpy as np
6 | import pytest
7 |
8 | from frouros.datasets.exceptions import ReadFileError
9 | from frouros.datasets.real import Elec2
10 |
11 |
12 | # Elec2 tests
13 | def test_elec2_file_not_found_error(elec2_raw: Elec2) -> None:
14 | """Test Elec2 file not found error.
15 |
16 | :param elec2_raw: Elec2 raw dataset
17 | :type elec2_raw: Elec2
18 | # :param elec2_delimiter: Elec2 load delimiter
19 | # :type elec2_delimiter: str
20 | """
21 | _ = elec2_raw.load()
22 | with pytest.raises(FileNotFoundError):
23 | _ = elec2_raw.load()
24 |
25 |
26 | # FIXME: PermissionError not raised on Windows and MacOS.
27 | @pytest.mark.skipif(
28 | sys.platform.startswith("win"),
29 | reason="PermissionError not raised on Windows.",
30 | )
31 | @pytest.mark.skipif(
32 | sys.platform.startswith("darwin"),
33 | reason="PermissionError not raised on MacOS.",
34 | )
35 | def test_elec2_permission_error() -> None:
36 | """Test Elec2 permission error."""
37 | with pytest.raises(PermissionError):
38 | Elec2(file_path="//elec2").download()
39 |
40 |
41 | def test_elec2_read_file_error(elec2_raw: Elec2) -> None:
42 | """Test Elec2 read file error.
43 |
44 | :param elec2_raw: Elec2 raw dataset
45 | :type elec2_raw: Elec2
46 | """
47 | with pytest.raises(ReadFileError):
48 | _ = elec2_raw.load(index=2)
49 |
50 |
51 | def test_elec2_shape(elec2: np.ndarray) -> None:
52 | """Test Elec2 shape.
53 |
54 | :param elec2: Elec2 dataset
55 | :type elec2: np.ndarray
56 | """
57 | assert elec2.shape == (45312,)
58 |
59 |
60 | def test_elec2_type(elec2: np.ndarray) -> None:
61 | """Test Elec2 type.
62 |
63 | :param elec2: Elec2 dataset
64 | :type elec2: np.ndarray
65 | """
66 | assert isinstance(elec2, np.ndarray)
67 |
--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
1 | name: Publish Package
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | jobs:
8 | build:
9 | name: Build distribution
10 | runs-on: ubuntu-24.04
11 |
12 | steps:
13 | - name: Checkout
14 | uses: actions/checkout@v4
15 |
16 | - name: Set up Python 3.9
17 | uses: actions/setup-python@v5
18 | with:
19 | python-version: "3.9"
20 |
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install build
25 |
26 | - name: Build package
27 | run: python -m build
28 |
29 | - name: Store distribution packages
30 | uses: actions/upload-artifact@v4
31 | with:
32 | name: python-package-distributions
33 | path: dist/
34 |
35 | publish-to-testpypi:
36 | name: Publish to TestPyPI
37 | runs-on: ubuntu-24.04
38 | needs: build
39 | environment:
40 | name: testpypi
41 | url: https://test.pypi.org/p/frouros
42 | permissions:
43 | id-token: write
44 |
45 | steps:
46 | - name: Download distribution packages
47 | uses: actions/download-artifact@v4
48 | with:
49 | name: python-package-distributions
50 | path: dist/
51 | - name: Publish distribution to TestPyPI
52 | uses: pypa/gh-action-pypi-publish@release/v1
53 | with:
54 | repository-url: https://test.pypi.org/legacy/
55 |
56 | publish-to-pypi:
57 | name: Publish to PyPI
58 | runs-on: ubuntu-24.04
59 | needs: [build, publish-to-testpypi]
60 | environment:
61 | name: pypi
62 | url: https://pypi.org/p/frouros
63 | permissions:
64 | id-token: write
65 |
66 | steps:
67 | - name: Download distribution packages
68 | uses: actions/download-artifact@v4
69 | with:
70 | name: python-package-distributions
71 | path: dist/
72 | - name: Publish distribution to PyPI
73 | uses: pypa/gh-action-pypi-publish@release/v1
74 |
--------------------------------------------------------------------------------
/frouros/tests/unit/utils/test_stats.py:
--------------------------------------------------------------------------------
1 | """Test stats module."""
2 |
3 | from typing import Union
4 |
5 | import numpy as np
6 | import pytest
7 |
8 | from frouros.utils.stats import CircularMean, Mean
9 |
10 |
11 | @pytest.mark.parametrize(
12 | "size, values, expected_mean_steps",
13 | [
14 | (1, [5, 10, 6, 4, 14], [5.0, 10.0, 6.0, 4.0, 14.0]),
15 | (3, [5, 10, 6, 4, 14], [5.0, 7.5, 7.0, 6.66666667, 8.0]),
16 | (6, [5, 10, 6, 4, 14], [5.0, 7.5, 7, 6.25, 7.8]),
17 | ],
18 | )
19 | def test_circular_mean(
20 | size: int,
21 | values: list[Union[int, float]],
22 | expected_mean_steps: list[Union[int, float]],
23 | ) -> None:
24 | """Test circular mean.
25 |
26 | :param size: size value
27 | :type size: int
28 | :param values: values
29 | :type values: list[Union[int, float]]
30 | :param expected_mean_steps: expected mean step values
31 | :type expected_mean_steps: list[Union[int, float]]
32 | """
33 | mean = CircularMean(size=size)
34 |
35 | for value, expected_mean_step in zip(values, expected_mean_steps):
36 | mean.update(value=value)
37 | assert np.isclose(mean.get(), expected_mean_step)
38 |
39 |
40 | @pytest.mark.parametrize(
41 | "values, expected_mean_steps",
42 | [
43 | ([5, 10, 6, 4, 14], [5.0, 7.5, 7, 6.25, 7.8]),
44 | ([-5, 10, -6, 4, -14], [-5.0, 2.5, -0.33333334, 0.75, -2.2]),
45 | ],
46 | )
47 | def test_mean(
48 | values: list[Union[int, float]],
49 | expected_mean_steps: list[Union[int, float]],
50 | ) -> None:
51 | """Test mean.
52 |
53 | :param values: values
54 | :type values: list[Union[int, float]]
55 | :param expected_mean_steps: expected mean step values
56 | :type expected_mean_steps: list[Union[int, float]]
57 | """
58 | mean = Mean()
59 |
60 | for value, expected_mean_step in zip(values, expected_mean_steps):
61 | mean.update(value=value)
62 | assert np.isclose(mean.get(), expected_mean_step)
63 |
--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
1 | cff-version: 1.2.0
2 | title: >-
3 | Frouros: An open-source Python library for drift detection
4 | in machine learning systems
5 | message: >-
6 | If you use this software, please cite it using the
7 | metadata from this file.
8 | type: software
9 | authors:
10 | - given-names: Jaime
11 | family-names: Céspedes Sisniega
12 | email: cespedes@ifca.unican.es
13 | orcid: 'https://orcid.org/0000-0002-6010-1212'
14 | affiliation: >-
15 | Institute of Physics of Cantabria, Spanish National
16 | Research Council — IFCA (CSIC—UC)
17 | - given-names: Álvaro
18 | family-names: López García
19 | email: aloga@ifca.unican.es
20 | orcid: 'https://orcid.org/0000-0002-0013-4602'
21 | affiliation: >-
22 | Institute of Physics of Cantabria, Spanish National
23 | Research Council — IFCA (CSIC—UC)
24 | identifiers:
25 | - type: doi
26 | value: 10.1016/j.softx.2024.101733
27 | description: SoftwareX
28 | - type: doi
29 | value: 10.48550/arXiv.2208.06868
30 | description: arXiv
31 | repository-code: 'https://github.com/IFCA-Advanced-Computing/frouros'
32 | url: 'https://frouros.readthedocs.io'
33 | repository: 'https://github.com/ElsevierSoftwareX/SOFTX-D-24-00119'
34 | repository-artifact: 'https://pypi.org/project/frouros'
35 | abstract: >-
36 | Frouros is an open-source Python library capable of detecting drift in machine learning systems. It provides a combination of classical and more recent algorithms for drift detection, covering both concept and data drift. We have designed it to be compatible with any machine learning framework and easily adaptable to real-world use cases. The library is developed following best development and continuous integration practices to ensure ease of maintenance and extensibility.
37 | keywords:
38 | - Machine learning
39 | - Drift detection
40 | - Concept drift
41 | - Data drift
42 | - Python
43 | license: BSD-3-Clause
44 | commit: 4e1e27ee73507b15090f0038d8dda7c67485b728
45 | version: 0.8.0
46 | date-released: '2024-04-03'
47 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Package setup file."""
2 |
3 | import codecs
4 |
5 | import toml
6 | from setuptools import find_packages, setup
7 |
8 |
9 | def long_description() -> str:
10 | """Read and return README as long description."""
11 | with codecs.open("README.md", encoding="utf-8-sig") as f:
12 | return f.read()
13 |
14 |
15 | # ground truth package metadata is loaded from pyproject.toml
16 | # for context see:
17 | # - [PEP 621 -- Storing project metadata in pyproject.toml]
18 | # (https://www.python.org/dev/peps/pep-0621)
19 | pyproject = toml.load("pyproject.toml")
20 |
21 |
22 | def setup_package() -> None:
23 | """Set up package."""
24 | setup(
25 | author_email=pyproject["project"]["authors"][0]["email"],
26 | author=pyproject["project"]["authors"][0]["name"],
27 | description=pyproject["project"]["description"],
28 | extras_require=pyproject["project"]["optional-dependencies"],
29 | include_package_data=True,
30 | install_requires=pyproject["project"]["dependencies"],
31 | keywords=pyproject["project"]["keywords"],
32 | classifiers=pyproject["project"]["classifiers"],
33 | # FIXME: Find a way not to hardcode license
34 | license="BSD-3-Clause",
35 | long_description=long_description(),
36 | long_description_content_type="text/markdown",
37 | maintainer_email=pyproject["project"]["maintainers"][0]["email"],
38 | maintainer=pyproject["project"]["maintainers"][0]["name"],
39 | name=pyproject["project"]["name"],
40 | packages=find_packages(
41 | where=".",
42 | exclude=["tests", "tests.*"],
43 | ),
44 | project_urls=pyproject["project"]["urls"],
45 | python_requires=pyproject["project"]["requires-python"],
46 | setup_requires=pyproject["build-system"]["requires"],
47 | url=pyproject["project"]["urls"]["repository"],
48 | version=pyproject["project"]["version"],
49 | zip_safe=False,
50 | )
51 |
52 |
53 | if __name__ == "__main__":
54 | setup_package()
55 |
--------------------------------------------------------------------------------
/frouros/utils/persistence.py:
--------------------------------------------------------------------------------
1 | """Persistence module."""
2 |
3 | import pickle
4 |
5 | from frouros.callbacks.base import BaseCallback
6 | from frouros.detectors.base import BaseDetector
7 | from frouros.utils.logger import logger
8 |
9 | DEFAULT_PROTOCOL = pickle.DEFAULT_PROTOCOL
10 |
11 |
12 | def load(
13 | filename: str,
14 | ) -> object:
15 | """Load object from file.
16 |
17 | :param filename: Filename
18 | :type filename: str
19 | :return: Loaded object
20 | :rtype: object
21 | """
22 | try:
23 | with open(filename, "rb") as file:
24 | obj = pickle.load(
25 | file,
26 | )
27 | return obj
28 | except (IOError, pickle.UnpicklingError) as e:
29 | logger.error("Error occurred while loading object: %s", e)
30 | raise e
31 |
32 |
33 | def save(
34 | obj: object,
35 | filename: str,
36 | pickle_protocol: int = DEFAULT_PROTOCOL,
37 | ) -> None:
38 | """Save object to file.
39 |
40 | :param obj: Object to save
41 | :type obj: object
42 | :param filename: Filename
43 | :type filename: str
44 | :param pickle_protocol: Pickle protocol, defaults to DEFAULT_PROTOCOL
45 | :type pickle_protocol: int, optional
46 | """
47 | try:
48 | if not isinstance(obj, (BaseDetector, BaseCallback)):
49 | raise TypeError(
50 | f"Object of type {type(obj)} is not serializable. "
51 | f"Must be an instance that inherits from BaseDetector or BaseCallback."
52 | )
53 | if pickle_protocol not in range(pickle.HIGHEST_PROTOCOL + 1):
54 | raise ValueError(
55 | f"Invalid pickle_protocol value. "
56 | f"Must be in range 0..{pickle.HIGHEST_PROTOCOL}."
57 | )
58 | with open(filename, "wb") as file:
59 | pickle.dump(
60 | obj,
61 | file,
62 | protocol=pickle_protocol,
63 | )
64 | except (IOError, pickle.PicklingError) as e:
65 | logger.error("Error occurred while saving object: %s", e)
66 | raise e
67 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | minversion = 4.23.2
3 | envlist =
4 | py3{9, 10, 11, 12}
5 | linters
6 |
7 | [base]
8 | python = python3
9 | skip_install = true
10 | package = frouros
11 | venv = {toxinidir}/.venv
12 | deps =
13 | # Package used as a workaround to install the dependencies read from pyproject.toml dev section.
14 | toml>=0.10.2,<0.11
15 | pyproject_reader =
16 | # Workaround to install the dependencies read from pyproject.toml dev section.
17 | python -c "import toml, subprocess; deps = toml.load('pyproject.toml')['project']['optional-dependencies']['{env:DEPS_SECTION}']; subprocess.run(['pip', 'install'] + deps)"
18 |
19 | [gh-actions]
20 | python =
21 | 3.9: py39, linters
22 | 3.10: py310, linters
23 | 3.11: py311, linters
24 | 3.12: py312, linters
25 |
26 | [testenv]
27 | # Force to upgrade pip/wheel/setuptools to the latest version
28 | download = True
29 | deps = {[base]deps}
30 | setenv =
31 | DEPS_SECTION = dev-tests
32 | commands_pre =
33 | {[base]pyproject_reader}
34 | commands =
35 | pytest --cov={[base]package} \
36 | --cov-report term \
37 | --cov-report=xml \
38 | --cov-fail-under=90
39 |
40 | [pytest]
41 | addopts = -ra -q
42 | norecursedirs = docs
43 |
44 | [testenv:ruff]
45 | basepython = {[base]python}
46 | skip_install = {[base]skip_install}
47 | deps = {[base]deps}
48 | setenv =
49 | DEPS_SECTION = dev-ruff
50 | commands_pre =
51 | {[base]pyproject_reader}
52 | commands = ruff check --config pyproject.toml .
53 | ruff format --config pyproject.toml .
54 |
55 | [testenv:mypy]
56 | basepython = {[base]python}
57 | skip_install = {[base]skip_install}
58 | deps = {[base]deps}
59 | setenv =
60 | DEPS_SECTION = dev-mypy
61 | commands_pre =
62 | {[base]pyproject_reader}
63 | commands = mypy --config-file pyproject.toml .
64 |
65 | [testenv:linters]
66 | basepython = {[base]python}
67 | skip_install = {[base]skip_install}
68 | setenv =
69 | PYTHONPATH = $PYTHONPATH:{toxinidir}:{[base]venv}/lib/{[base]python}/site-packages
70 | deps =
71 | {[base]deps}
72 | commands_pre =
73 | {[testenv:ruff]commands_pre}
74 | {[testenv:mypy]commands_pre}
75 | commands =
76 | {[testenv:ruff]commands}
77 | {[testenv:mypy]commands}
78 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/distance_based/base.py:
--------------------------------------------------------------------------------
1 | """Base data drift distance based module."""
2 |
3 | import abc
4 | from typing import Any, Optional, Tuple, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.detectors.data_drift.base import BaseResult
9 | from frouros.detectors.data_drift.streaming.base import (
10 | BaseDataDriftStreaming,
11 | )
12 |
13 |
14 | class DistanceResult(BaseResult):
15 | """Distance result class."""
16 |
17 | def __init__(
18 | self,
19 | distance: Union[int, float],
20 | ) -> None:
21 | """Init method.
22 |
23 | :param distance: distance value
24 | :type distance: Union[int, float]
25 | """
26 | self.distance = distance
27 |
28 | @property
29 | def distance(self) -> Union[int, float]:
30 | """Distance value property.
31 |
32 | :return: distance value
33 | :rtype: Union[int, float]
34 | """
35 | return self._distance
36 |
37 | @distance.setter
38 | def distance(self, value: Union[int, float]) -> None:
39 | """Distance value setter.
40 |
41 | :param value: value to be set
42 | :type value: Union[int, float]
43 | """
44 | self._distance = value
45 |
46 |
47 | class BaseDistanceBased(BaseDataDriftStreaming):
48 | """Abstract class representing a distance based."""
49 |
50 | @abc.abstractmethod
51 | def _fit(self, X: np.ndarray) -> None: # noqa: N803
52 | pass
53 |
54 | @abc.abstractmethod
55 | def _reset(self) -> None:
56 | pass
57 |
58 | @abc.abstractmethod
59 | def _update(self, value: Union[int, float]) -> Optional[DistanceResult]:
60 | pass
61 |
62 | def compare(
63 | self,
64 | X: np.ndarray, # noqa: N803
65 | ) -> Tuple[Optional[DistanceResult], dict[str, Any]]:
66 | """Compare detector.
67 |
68 | :param X: data to use to compare the detector
69 | :type X: np.ndarray
70 | :return: update result
71 | :rtype: Tuple[Optional[DistanceResult], dict[str, Any]]
72 | """
73 | result = self._compare(X=X)
74 | return result
75 |
76 | @abc.abstractmethod
77 | def _compare(
78 | self,
79 | X: np.ndarray, # noqa: N803
80 | ) -> Tuple[Optional[DistanceResult], dict[str, Any]]:
81 | pass
82 |
--------------------------------------------------------------------------------
/frouros/detectors/base.py:
--------------------------------------------------------------------------------
1 | """Base detector module."""
2 |
3 | import abc
4 | from typing import Any, Optional, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.callbacks.base import BaseCallback
9 |
10 |
11 | class BaseDetector(abc.ABC):
12 | """Abstract class representing a detector."""
13 |
14 | def __init__(
15 | self,
16 | callbacks: Optional[Union[BaseCallback, list[BaseCallback]]] = None,
17 | ) -> None:
18 | """Init method.
19 |
20 | :param callbacks: callbacks
21 | :type callbacks: Optional[Union[BaseCallback, list[BaseCallback]]]
22 | """
23 | self.callbacks = callbacks # type: ignore
24 |
25 | @property
26 | def callbacks(self) -> Optional[list[BaseCallback]]:
27 | """Callbacks property.
28 |
29 | :return: callbacks
30 | :rtype: Optional[list[BaseCallback]]
31 | """
32 | return self._callbacks
33 |
34 | @callbacks.setter
35 | def callbacks(
36 | self,
37 | value: Optional[Union[BaseCallback, list[BaseCallback]]],
38 | ) -> None:
39 | """Callbacks setter.
40 |
41 | :param value: value to be set
42 | :type value: Optional[Union[BaseCallback, list[Callback]]]
43 | :raises TypeError: Type error exception
44 | """
45 | if value is not None:
46 | if isinstance(value, BaseCallback):
47 | self._callbacks = [value]
48 | elif not all(isinstance(callback, BaseCallback) for callback in value):
49 | raise TypeError("value must be of type None or a list of BaseCallback.")
50 | else:
51 | self._callbacks = value
52 | else:
53 | self._callbacks = []
54 |
55 | @abc.abstractmethod
56 | def reset(self) -> None:
57 | """Reset method."""
58 |
59 | def _get_callbacks_logs(self) -> dict[str, Any]:
60 | logs = {
61 | callback.name: callback.logs
62 | for callback in self.callbacks # type: ignore
63 | }
64 | return logs
65 |
66 | @staticmethod
67 | def _check_array(X: Any) -> None: # noqa: N803
68 | if not isinstance(X, np.ndarray):
69 | raise TypeError("X must be a numpy array")
70 |
71 | def __repr__(self) -> str:
72 | """Repr method.
73 |
74 | :return: repr value
75 | :rtype: str
76 | """
77 | return (
78 | f"{self.__class__.__name__}"
79 | f"(callbacks=["
80 | f"{', '.join([*map(str, self.callbacks)])}])" # type: ignore
81 | )
82 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/ks.py:
--------------------------------------------------------------------------------
1 | """KSTest (Kolmogorov-Smirnov test) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import ks_2samp
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 |
15 |
16 | class KSTest(BaseStatisticalTest):
17 | """KSTest (Kolmogorov-Smirnov test) [massey1951kolmogorov]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 |
22 | :Note:
23 | - Passing additional arguments to `scipy.stats.ks_2samp `__ can be done using :func:`compare` kwargs.
24 |
25 | :References:
26 |
27 | .. [massey1951kolmogorov] Massey Jr, Frank J.
28 | "The Kolmogorov-Smirnov test for goodness of fit."
29 | Journal of the American statistical Association 46.253 (1951): 68-78.
30 |
31 | :Example:
32 |
33 | >>> from frouros.detectors.data_drift import KSTest
34 | >>> import numpy as np
35 | >>> np.random.seed(seed=31)
36 | >>> X = np.random.normal(loc=0, scale=1, size=100)
37 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
38 | >>> detector = KSTest()
39 | >>> _ = detector.fit(X=X)
40 | >>> detector.compare(X=Y)[0]
41 | StatisticalResult(statistic=0.55, p_value=3.0406585087050305e-14)
42 | """ # noqa: E501 # pylint: disable=line-too-long
43 |
44 | def __init__( # noqa: D107
45 | self,
46 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
47 | ) -> None:
48 | super().__init__(
49 | data_type=NumericalData(),
50 | statistical_type=UnivariateData(),
51 | callbacks=callbacks,
52 | )
53 |
54 | @staticmethod
55 | def _statistical_test(
56 | X_ref: np.ndarray, # noqa: N803
57 | X: np.ndarray,
58 | **kwargs: Any,
59 | ) -> StatisticalResult:
60 | test = ks_2samp(
61 | data1=X_ref,
62 | data2=X,
63 | alternative=kwargs.get("alternative", "two-sided"),
64 | method=kwargs.get("method", "auto"),
65 | )
66 | test = StatisticalResult(
67 | statistic=test.statistic,
68 | p_value=test.pvalue,
69 | )
70 | return test
71 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/bws.py:
--------------------------------------------------------------------------------
1 | """BWSTest (Baumgartner-Weiss-Schindler test) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import bws_test
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 |
15 |
16 | class BWSTest(BaseStatisticalTest):
17 | """BWSTest (Baumgartner-Weiss-Schindler test) [baumgartner1998nonparametric]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 |
22 | :Note:
23 | - Passing additional arguments to `scipy.stats.bws_test `__ can be done using :func:`compare` kwargs.
24 |
25 | :References:
26 |
27 | .. [baumgartner1998nonparametric] Baumgartner, W., P. Weiß, and H. Schindler.
28 | "A nonparametric test for the general two-sample problem."
29 | Biometrics (1998): 1129-1135.
30 |
31 | :Example:
32 |
33 | >>> from frouros.detectors.data_drift import BWSTest
34 | >>> import numpy as np
35 | >>> np.random.seed(seed=31)
36 | >>> X = np.random.normal(loc=0, scale=1, size=100)
37 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
38 | >>> detector = BWSTest()
39 | >>> _ = detector.fit(X=X)
40 | >>> detector.compare(X=Y)[0]
41 | StatisticalResult(statistic=29.942072035675395, p_value=0.0001)
42 | """ # noqa: E501 # pylint: disable=line-too-long
43 |
44 | def __init__( # noqa: D107
45 | self,
46 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
47 | ) -> None:
48 | super().__init__(
49 | data_type=NumericalData(),
50 | statistical_type=UnivariateData(),
51 | callbacks=callbacks,
52 | )
53 |
54 | @staticmethod
55 | def _statistical_test(
56 | X_ref: np.ndarray, # noqa: N803
57 | X: np.ndarray,
58 | **kwargs: Any,
59 | ) -> StatisticalResult:
60 | test = bws_test(
61 | x=X_ref,
62 | y=X,
63 | alternative=kwargs.get("alternative", "two-sided"),
64 | method=kwargs.get("method"),
65 | )
66 | test = StatisticalResult(
67 | statistic=test.statistic,
68 | p_value=test.pvalue,
69 | )
70 | return test
71 |
--------------------------------------------------------------------------------
/frouros/callbacks/base.py:
--------------------------------------------------------------------------------
1 | """Base callback module."""
2 |
3 | import abc
4 | from typing import Any, Optional
5 |
6 | import numpy as np
7 |
8 |
9 | class BaseCallback(abc.ABC):
10 | """Abstract class representing a callback."""
11 |
12 | def __init__(self, name: Optional[str] = None) -> None:
13 | """Init method.
14 |
15 | :param name: name value
16 | :type name: Optional[str]
17 | """
18 | self.name: str = name # type: ignore
19 | self.detector = None
20 | self.logs: dict[str, Any] = {}
21 |
22 | @property
23 | def name(self) -> str:
24 | """Name property.
25 |
26 | :return: name value
27 | :rtype: str
28 | """
29 | return self._name
30 |
31 | @name.setter
32 | def name(self, value: Optional[str]) -> None:
33 | """Name method setter.
34 |
35 | :param value: value to be set
36 | :type value: Optional[str]
37 | :raises TypeError: Type error exception
38 | """
39 | if not isinstance(value, str) and value is not None:
40 | raise TypeError("name must be of type str or None.")
41 | self._name = self.__class__.__name__ if value is None else value
42 |
43 | def set_detector(self, detector) -> None: # type: ignore
44 | """Set detector method."""
45 | self.detector = detector
46 |
47 | # @property
48 | # def detector(self) -> Optional[BaseConceptDrift, BaseDataDriftBatch]:
49 | # return self._detector
50 | #
51 | # @detector.setter
52 | # def detector(self, value: Optional[BaseConceptDrift, BaseDataDriftBatch]) -> None:
53 | # if not isinstance(
54 | # value, (BaseConceptDrift, BaseDataDriftBatch)):
55 | # raise TypeError(
56 | # "value must be of type BaseConceptDrift or BaseDataDriftBatch."
57 | # )
58 | # self._detector = value
59 |
60 | def on_fit_start(self, X: np.ndarray) -> None: # noqa: N803, B027
61 | """On fit start method.
62 |
63 | :param X: reference data
64 | :type X: numpy.ndarray
65 | """
66 |
67 | def on_fit_end(self, X: np.ndarray) -> None: # noqa: N803, B027
68 | """On fit end method.
69 |
70 | :param X: reference data
71 | :type X: numpy.ndarray
72 | """
73 |
74 | @abc.abstractmethod
75 | def reset(self) -> None:
76 | """Reset method."""
77 |
78 | def __repr__(self) -> str:
79 | """Repr method.
80 |
81 | :return: repr value
82 | :rtype: str
83 | """
84 | return f"{self.__class__.__name__}(name='{self.name}')"
85 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/emd.py:
--------------------------------------------------------------------------------
1 | """EMD (Earth Mover's Distance) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import wasserstein_distance
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import UnivariateData
10 | from frouros.detectors.data_drift.batch.distance_based.base import (
11 | BaseDistanceBased,
12 | DistanceResult,
13 | )
14 |
15 |
16 | class EMD(BaseDistanceBased):
17 | """EMD (Earth Mover's Distance) [rubner2000earth]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 | :param kwargs: additional keyword arguments to pass to scipy.stats.wasserstein_distance
22 | :type kwargs: Dict[str, Any]
23 |
24 | :References:
25 |
26 | .. [rubner2000earth] Rubner, Yossi, Carlo Tomasi, and Leonidas J. Guibas.
27 | "The earth mover's distance as a metric for image retrieval."
28 | International journal of computer vision 40.2 (2000): 99.
29 |
30 | :Example:
31 |
32 | >>> from frouros.detectors.data_drift import EMD
33 | >>> import numpy as np
34 | >>> np.random.seed(seed=31)
35 | >>> X = np.random.normal(loc=0, scale=1, size=100)
36 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
37 | >>> detector = EMD()
38 | >>> _ = detector.fit(X=X)
39 | >>> detector.compare(X=Y)[0]
40 | DistanceResult(distance=1.0686078744674332)
41 | """ # noqa: E501
42 |
43 | def __init__( # noqa: D107
44 | self,
45 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
46 | **kwargs: Any,
47 | ) -> None:
48 | super().__init__(
49 | statistical_type=UnivariateData(),
50 | statistical_method=self._emd,
51 | statistical_kwargs=kwargs,
52 | callbacks=callbacks,
53 | )
54 | self.kwargs = kwargs
55 |
56 | def _distance_measure(
57 | self,
58 | X_ref: np.ndarray, # noqa: N803
59 | X: np.ndarray, # noqa: N803
60 | **kwargs: Any,
61 | ) -> DistanceResult:
62 | emd = self._emd(X=X_ref, Y=X, **self.kwargs)
63 | distance = DistanceResult(distance=emd)
64 | return distance
65 |
66 | @staticmethod
67 | def _emd(X: np.ndarray, Y: np.ndarray, **kwargs: Any) -> float: # noqa: N803
68 | emd = wasserstein_distance(
69 | u_values=X.flatten(),
70 | v_values=Y.flatten(),
71 | **kwargs,
72 | )
73 | return emd
74 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/welch_t_test.py:
--------------------------------------------------------------------------------
1 | """Welch's t-test module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import ttest_ind
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 |
15 |
16 | class WelchTTest(BaseStatisticalTest):
17 | """Welch's t-test [welch1947generalization]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 |
22 | :Note:
23 | - Passing additional arguments to `scipy.stats.ttest_ind `__ can be done using :func:`compare` kwargs.
24 |
25 | :References:
26 |
27 | .. [welch1947generalization] Welch, Bernard L.
28 | "The generalization of ‘STUDENT'S’problem when several different population
29 | varlances are involved."
30 | Biometrika 34.1-2 (1947): 28-35.
31 |
32 | :Example:
33 |
34 | >>> from frouros.detectors.data_drift import WelchTTest
35 | >>> import numpy as np
36 | >>> np.random.seed(seed=31)
37 | >>> X = np.random.normal(loc=0, scale=1, size=100)
38 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
39 | >>> detector = WelchTTest()
40 | >>> _ = detector.fit(X=X)
41 | >>> detector.compare(X=Y)[0]
42 | StatisticalResult(statistic=-7.651304662806378, p_value=8.685225410826823e-13)
43 | """ # noqa: E501 # pylint: disable=line-too-long
44 |
45 | def __init__( # noqa: D107
46 | self,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | ) -> None:
49 | super().__init__(
50 | data_type=NumericalData(),
51 | statistical_type=UnivariateData(),
52 | callbacks=callbacks,
53 | )
54 |
55 | @staticmethod
56 | def _statistical_test(
57 | X_ref: np.ndarray, # noqa: N803
58 | X: np.ndarray,
59 | **kwargs: Any,
60 | ) -> StatisticalResult:
61 | test = ttest_ind(
62 | a=X_ref,
63 | b=X,
64 | equal_var=False,
65 | alternative=kwargs.get("alternative", "two-sided"),
66 | **kwargs,
67 | )
68 | test = StatisticalResult(
69 | statistic=test.statistic,
70 | p_value=test.pvalue,
71 | )
72 | return test
73 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/bhattacharyya_distance.py:
--------------------------------------------------------------------------------
1 | """Bhattacharyya distance module."""
2 |
3 | from typing import Optional, Union
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.batch.base import BaseCallbackBatch
8 | from frouros.detectors.data_drift.batch.distance_based.base import (
9 | BaseDistanceBasedBins,
10 | )
11 |
12 |
13 | class BhattacharyyaDistance(BaseDistanceBasedBins):
14 | """Bhattacharyya distance [bhattacharyya1946measure]_ detector.
15 |
16 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
17 | :type num_bins: int
18 | :param callbacks: callbacks, defaults to None
19 | :type callbacks: Optional[Union[BaseCallback, list[Callback]]]
20 |
21 | :References:
22 |
23 | .. [bhattacharyya1946measure] Bhattacharyya, Anil.
24 | "On a measure of divergence between two multinomial populations."
25 | Sankhyā: the indian journal of statistics (1946): 401-406.
26 |
27 | :Example:
28 |
29 | >>> from frouros.detectors.data_drift import BhattacharyyaDistance
30 | >>> import numpy as np
31 | >>> np.random.seed(seed=31)
32 | >>> X = np.random.normal(loc=0, scale=1, size=100)
33 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
34 | >>> detector = BhattacharyyaDistance(num_bins=20)
35 | >>> _ = detector.fit(X=X)
36 | >>> detector.compare(X=Y)
37 | DistanceResult(distance=0.2182101059622703)
38 | """
39 |
40 | def __init__( # noqa: D107
41 | self,
42 | num_bins: int = 10,
43 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
44 | ) -> None:
45 | super().__init__(
46 | statistical_method=self._bhattacharyya,
47 | statistical_kwargs={
48 | "num_bins": num_bins,
49 | },
50 | callbacks=callbacks,
51 | )
52 | self.num_bins = num_bins
53 |
54 | def _distance_measure_bins(
55 | self,
56 | X_ref: np.ndarray, # noqa: N803
57 | X: np.ndarray, # noqa: N803
58 | ) -> float:
59 | bhattacharyya = self._bhattacharyya(X=X_ref, Y=X, num_bins=self.num_bins)
60 | return bhattacharyya
61 |
62 | @staticmethod
63 | def _bhattacharyya(
64 | X: np.ndarray, # noqa: N803
65 | Y: np.ndarray,
66 | *,
67 | num_bins: int,
68 | ) -> float:
69 | ( # noqa: N806
70 | X_percents,
71 | Y_percents,
72 | ) = BaseDistanceBasedBins._calculate_bins_values(
73 | X_ref=X, X=Y, num_bins=num_bins
74 | )
75 | bhattacharyya = 1 - np.sum(np.sqrt(X_percents * Y_percents))
76 | return bhattacharyya
77 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/statistical_test/base.py:
--------------------------------------------------------------------------------
1 | """Data drift statistical test base module."""
2 |
3 | import abc
4 | from typing import Any, Optional, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.detectors.data_drift.base import BaseResult
9 | from frouros.detectors.data_drift.streaming.base import (
10 | BaseDataDriftStreaming,
11 | )
12 |
13 |
14 | class StatisticalResult(BaseResult):
15 | """Statistical result class."""
16 |
17 | def __init__(
18 | self,
19 | statistic: Union[int, float],
20 | p_value: Union[int, float],
21 | ) -> None:
22 | """Init method.
23 |
24 | :param statistic: statistic value
25 | :type statistic: Union[int, float]
26 | :param p_value: p-value
27 | :type p_value: Union[int, float]
28 | """
29 | self.statistic = statistic
30 | self.p_value = p_value
31 |
32 | @property
33 | def statistic(self) -> Union[int, float]:
34 | """Statistic value property.
35 |
36 | :return: statistic value
37 | :rtype: Union[int, float]
38 | """
39 | return self._statistic
40 |
41 | @statistic.setter
42 | def statistic(self, value: Union[int, float]) -> None:
43 | """Statistic value setter.
44 |
45 | :param value: value to be set
46 | :type value: Union[int, float]
47 | """
48 | self._statistic = value
49 |
50 | @property
51 | def p_value(self) -> Union[int, float]:
52 | """P-value property.
53 |
54 | :return: p-value
55 | :rtype: Union[int, float]
56 | """
57 | return self._p_value
58 |
59 | @p_value.setter
60 | def p_value(self, value: Union[int, float]) -> None:
61 | """P-value setter.
62 |
63 | :param value: value to be set
64 | :type value: Union[int, float]
65 | """
66 | if not 0 <= value <= 1:
67 | raise ValueError("p-value must be between 0 and 1.")
68 | self._p_value = value
69 |
70 |
71 | class BaseStatisticalTest(BaseDataDriftStreaming):
72 | """Abstract class representing a statistical test."""
73 |
74 | @abc.abstractmethod
75 | def _fit(self, X: np.ndarray) -> None: # noqa: N803
76 | pass
77 |
78 | @abc.abstractmethod
79 | def _reset(self) -> None:
80 | pass
81 |
82 | @abc.abstractmethod
83 | def _update(self, value: Union[int, float]) -> Optional[StatisticalResult]:
84 | pass
85 |
86 | @staticmethod
87 | @abc.abstractmethod
88 | def _statistical_test(
89 | X_ref: np.ndarray, # noqa: N803
90 | X: np.ndarray,
91 | **kwargs: Any,
92 | ) -> StatisticalResult:
93 | pass
94 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/energy_distance.py:
--------------------------------------------------------------------------------
1 | """Energy Distance module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import energy_distance
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import UnivariateData
10 | from frouros.detectors.data_drift.batch.distance_based.base import (
11 | BaseDistanceBased,
12 | DistanceResult,
13 | )
14 |
15 |
16 | class EnergyDistance(BaseDistanceBased):
17 | """EnergyDistance [szekely2013energy]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 | :param kwargs: additional keyword arguments to pass to scipy.stats.energy_distance
22 | :type kwargs: Dict[str, Any]
23 |
24 | :References:
25 |
26 | .. [szekely2013energy] Székely, Gábor J., and Maria L. Rizzo.
27 | "Energy statistics: A class of statistics based on distances."
28 | Journal of statistical planning and inference 143.8 (2013): 1249-1272.
29 |
30 | :Example:
31 |
32 | >>> from frouros.detectors.data_drift import EnergyDistance
33 | >>> import numpy as np
34 | >>> np.random.seed(seed=31)
35 | >>> X = np.random.normal(loc=0, scale=1, size=100)
36 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
37 | >>> detector = EnergyDistance()
38 | >>> _ = detector.fit(X=X)
39 | >>> detector.compare(X=Y)[0]
40 | DistanceResult(distance=0.8359206395514527)
41 | """ # noqa: E501
42 |
43 | def __init__( # noqa: D107
44 | self,
45 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
46 | **kwargs: Any,
47 | ) -> None:
48 | super().__init__(
49 | statistical_type=UnivariateData(),
50 | statistical_method=self._energy_distance,
51 | statistical_kwargs=kwargs,
52 | callbacks=callbacks,
53 | )
54 | self.kwargs = kwargs
55 |
56 | def _distance_measure(
57 | self,
58 | X_ref: np.ndarray, # noqa: N803
59 | X: np.ndarray, # noqa: N803
60 | **kwargs: Any,
61 | ) -> DistanceResult:
62 | emd = self._energy_distance(X=X_ref, Y=X, **self.kwargs)
63 | distance = DistanceResult(distance=emd)
64 | return distance
65 |
66 | @staticmethod
67 | def _energy_distance(
68 | X: np.ndarray, # noqa: N803
69 | Y: np.ndarray,
70 | **kwargs: Any,
71 | ) -> float:
72 | energy = energy_distance(
73 | u_values=X.flatten(),
74 | v_values=Y.flatten(),
75 | **kwargs,
76 | )
77 | return energy
78 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/mann_whitney_u.py:
--------------------------------------------------------------------------------
1 | """Mann-Whitney U test module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import mannwhitneyu
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 |
15 |
16 | class MannWhitneyUTest(BaseStatisticalTest):
17 | """Mann–Whitney U test [mann1947test]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 |
22 | :Note:
23 | - Passing additional arguments to `scipy.stats.mannwhitneyu `__ can be done using :func:`compare` kwargs.
24 |
25 | :References:
26 |
27 | .. [mann1947test] Mann, Henry B., and Donald R. Whitney.
28 | "On a test of whether one of two random variables is stochastically larger than
29 | the other."
30 | The annals of mathematical statistics (1947): 50-60.
31 |
32 | :Example:
33 |
34 | >>> from frouros.detectors.data_drift import MannWhitneyUTest
35 | >>> import numpy as np
36 | >>> np.random.seed(seed=31)
37 | >>> X = np.random.normal(loc=0, scale=1, size=100)
38 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
39 | >>> detector = MannWhitneyUTest()
40 | >>> _ = detector.fit(X=X)
41 | >>> detector.compare(X=Y)[0]
42 | StatisticalResult(statistic=2139.0, p_value=2.7623373527697943e-12)
43 | """ # noqa: E501 # pylint: disable=line-too-long
44 |
45 | def __init__( # noqa: D107
46 | self,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | ) -> None:
49 | super().__init__(
50 | data_type=NumericalData(),
51 | statistical_type=UnivariateData(),
52 | callbacks=callbacks,
53 | )
54 |
55 | @staticmethod
56 | def _statistical_test(
57 | X_ref: np.ndarray, # noqa: N803
58 | X: np.ndarray,
59 | **kwargs: Any,
60 | ) -> StatisticalResult:
61 | test = mannwhitneyu( # pylint: disable=unexpected-keyword-arg
62 | x=X_ref,
63 | y=X,
64 | alternative=kwargs.get("alternative", "two-sided"),
65 | nan_policy=kwargs.get("nan_policy", "raise"),
66 | **kwargs,
67 | )
68 | test = StatisticalResult(
69 | statistic=test.statistic,
70 | p_value=test.pvalue,
71 | )
72 | return test
73 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # PyCharm
132 | .idea
133 |
134 | # Sphinx
135 | docs/_build/
136 | docs/source/_static/
137 | docs/source/api_reference/auto_generated/
138 | docs/source/api_reference/*/auto_generated/
139 | docs/source/api_reference/*/*/auto_generated/
140 |
141 | # Tests
142 | test-result.xml
143 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/anderson_darling.py:
--------------------------------------------------------------------------------
1 | """Anderson-Darling test module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import anderson_ksamp
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 |
15 |
16 | class AndersonDarlingTest(BaseStatisticalTest):
17 | """Anderson-Darling test [scholz1987k]_ detector.
18 |
19 | :param callbacks: callbacks, defaults to None
20 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
21 |
22 | :Note:
23 | - Passing additional arguments to `scipy.stats.anderson_ksamp `__ can be done using :func:`compare` kwargs.
24 | - p-values are bounded between 0.001 and 0.25 according to `scipy.stats.anderson_ksamp `__.
25 |
26 | :References:
27 |
28 | .. [scholz1987k] Scholz, Fritz W., and Michael A. Stephens.
29 | "K-sample Anderson–Darling tests."
30 | Journal of the American Statistical Association 82.399 (1987): 918-924.
31 |
32 | :Example:
33 |
34 | >>> from frouros.detectors.data_drift import AndersonDarlingTest
35 | >>> import numpy as np
36 | >>> np.random.seed(seed=31)
37 | >>> X = np.random.normal(loc=0, scale=1, size=100)
38 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
39 | >>> detector = AndersonDarlingTest()
40 | >>> _ = detector.fit(X=X)
41 | >>> detector.compare(X=Y)[0]
42 | StatisticalResult(statistic=32.40316586267425, p_value=0.001)
43 | """ # noqa: E501 # pylint: disable=line-too-long
44 |
45 | def __init__( # noqa: D107
46 | self,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | ) -> None:
49 | super().__init__(
50 | data_type=NumericalData(),
51 | statistical_type=UnivariateData(),
52 | callbacks=callbacks,
53 | )
54 |
55 | @staticmethod
56 | def _statistical_test(
57 | X_ref: np.ndarray, # noqa: N803
58 | X: np.ndarray,
59 | **kwargs: Any,
60 | ) -> StatisticalResult:
61 | test = anderson_ksamp(
62 | samples=[
63 | X_ref,
64 | X,
65 | ],
66 | **kwargs,
67 | )
68 | test = StatisticalResult(
69 | statistic=test.statistic,
70 | p_value=test.pvalue,
71 | )
72 | return test
73 |
--------------------------------------------------------------------------------
/frouros/datasets/real.py:
--------------------------------------------------------------------------------
1 | """Real datasets module."""
2 |
3 | from typing import Any, Optional
4 |
5 | import numpy as np
6 | from scipy.io import arff
7 |
8 | from frouros.datasets.base import BaseDatasetDownload
9 |
10 |
11 | class Elec2(BaseDatasetDownload):
12 | """Elec2 dataset [harries1999splice]_.
13 |
14 | :param file_path: file path for the downloaded file, defaults to None. If None, the file will be downloaded to a temporary file.
15 | :type file_path: Optional[str]
16 |
17 | :Note:
18 | Dataset can be downloaded from the following sources (in order of preference):
19 |
20 | - https://nextcloud.ifca.es/index.php/s/2coqgBEpa82boLS/download
21 | - https://www.openml.org/data/download/2419/electricity-normalized.arff
22 |
23 | :References:
24 |
25 | .. [harries1999splice] Harries, Michael.
26 | "Splice-2 comparative evaluation: Electricity pricing." (1999).
27 |
28 | :Example:
29 |
30 | >>> from frouros.datasets.real import Elec2
31 | >>> elec2 = Elec2()
32 | >>> elec2.download()
33 | INFO:frouros:Trying to download data from https://nextcloud.ifca.es/index.php/s/2coqgBEpa82boLS/download to /tmp/tmpro3ienx0
34 | >>> dataset = elec2.load()
35 | >>> dataset
36 | array([(0. , b'2', 0. , 0.056443, 0.439155, 0.003467, 0.422915, 0.414912, b'UP'),
37 | (0. , b'2', 0.021277, 0.051699, 0.415055, 0.003467, 0.422915, 0.414912, b'UP'),
38 | (0. , b'2', 0.042553, 0.051489, 0.385004, 0.003467, 0.422915, 0.414912, b'UP'),
39 | ...,
40 | (0.9158, b'7', 0.957447, 0.043593, 0.34097 , 0.002983, 0.247799, 0.362281, b'DOWN'),
41 | (0.9158, b'7', 0.978723, 0.066651, 0.329366, 0.00463 , 0.345417, 0.206579, b'UP'),
42 | (0.9158, b'7', 1. , 0.050679, 0.288753, 0.003542, 0.355256, 0.23114 , b'DOWN')],
43 | dtype=[('date', ' None:
50 | super().__init__(
51 | url=[
52 | "https://nextcloud.ifca.es/index.php/s/2coqgBEpa82boLS/download",
53 | "https://www.openml.org/data/download/2419/electricity-normalized.arff",
54 | ],
55 | file_path=file_path,
56 | )
57 |
58 | def read_file(self, **kwargs: Any) -> np.ndarray:
59 | """Read file.
60 |
61 | :param kwargs: additional arguments
62 | :type kwargs: Any
63 | :return: read file
64 | :rtype: numpy.ndarray
65 | """
66 | index = kwargs.get("index", 0)
67 | dataset = arff.loadarff(f=self.file_path)[index]
68 | return dataset
69 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/hellinger_distance.py:
--------------------------------------------------------------------------------
1 | """Hellinger distance module."""
2 |
3 | from typing import Optional, Union
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.batch.base import BaseCallbackBatch
8 | from frouros.detectors.data_drift.batch.distance_based.base import (
9 | BaseDistanceBasedBins,
10 | )
11 |
12 |
13 | class HellingerDistance(BaseDistanceBasedBins):
14 | """Hellinger distance [hellinger1909neue]_ detector.
15 |
16 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
17 | :type num_bins: int
18 | :param callbacks: callbacks, defaults to None
19 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
20 |
21 | :References:
22 |
23 | .. [hellinger1909neue] Hellinger, Ernst.
24 | "Neue begründung der theorie quadratischer formen von unendlichvielen
25 | veränderlichen."
26 | Journal für die reine und angewandte Mathematik 1909.136 (1909): 210-271.
27 |
28 | :Example:
29 |
30 | >>> from frouros.detectors.data_drift import HellingerDistance
31 | >>> import numpy as np
32 | >>> np.random.seed(seed=31)
33 | >>> X = np.random.normal(loc=0, scale=1, size=100)
34 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
35 | >>> detector = HellingerDistance(num_bins=20)
36 | >>> _ = detector.fit(X=X)
37 | >>> detector.compare(X=Y)[0]
38 | DistanceResult(distance=0.467129645775421)
39 | """
40 |
41 | def __init__( # noqa: D107
42 | self,
43 | num_bins: int = 10,
44 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
45 | ) -> None:
46 | sqrt_div = np.sqrt(2)
47 | super().__init__(
48 | statistical_method=self._hellinger,
49 | statistical_kwargs={
50 | "num_bins": num_bins,
51 | "sqrt_div": sqrt_div,
52 | },
53 | callbacks=callbacks,
54 | )
55 | self.num_bins = num_bins
56 | self.sqrt_div = sqrt_div
57 |
58 | def _distance_measure_bins(
59 | self,
60 | X_ref: np.ndarray, # noqa: N803
61 | X: np.ndarray, # noqa: N803
62 | ) -> float:
63 | hellinger = self._hellinger(
64 | X=X_ref,
65 | Y=X,
66 | num_bins=self.num_bins,
67 | sqrt_div=self.sqrt_div,
68 | )
69 | return hellinger
70 |
71 | @staticmethod
72 | def _hellinger(
73 | X: np.ndarray, # noqa: N803
74 | Y: np.ndarray,
75 | *,
76 | num_bins: int,
77 | sqrt_div: float,
78 | ) -> float:
79 | ( # noqa: N806
80 | X_percents,
81 | Y_percents,
82 | ) = BaseDistanceBasedBins._calculate_bins_values(
83 | X_ref=X, X=Y, num_bins=num_bins
84 | )
85 | hellinger = (
86 | np.sqrt(np.sum((np.sqrt(X_percents) - np.sqrt(Y_percents)) ** 2)) / sqrt_div
87 | )
88 | return hellinger
89 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/kl.py:
--------------------------------------------------------------------------------
1 | """KL (Kullback-Leibler divergence distance) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.special import rel_entr
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.batch.distance_based.base import (
10 | BaseDistanceBasedProbability,
11 | DistanceResult,
12 | )
13 |
14 |
15 | class KL(BaseDistanceBasedProbability):
16 | """KL (Kullback-Leibler divergence) [kullback1951information]_ detector.
17 |
18 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
19 | :type num_bins: int
20 | :param callbacks: number of bins in which to divide probabilities, defaults to None
21 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
22 | :param kwargs: additional keyword arguments to pass to scipy.special.rel_entr
23 | :type kwargs: dict[str, Any]
24 |
25 | :References:
26 |
27 | .. [kullback1951information] Kullback, Solomon, and Richard A. Leibler.
28 | "On information and sufficiency."
29 | The annals of mathematical statistics 22.1 (1951): 79-86.
30 |
31 | :Example:
32 |
33 | >>> from frouros.detectors.data_drift import KL
34 | >>> import numpy as np
35 | >>> np.random.seed(seed=31)
36 | >>> X = np.random.normal(loc=0, scale=1, size=100)
37 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
38 | >>> detector = KL(num_bins=20)
39 | >>> _ = detector.fit(X=X)
40 | >>> detector.compare(X=Y)[0]
41 | DistanceResult(distance=inf)
42 | """
43 |
44 | def __init__( # noqa: D107
45 | self,
46 | num_bins: int = 10,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | **kwargs: Any,
49 | ) -> None:
50 | super().__init__(
51 | statistical_method=self._kl,
52 | statistical_kwargs={**kwargs, "num_bins": num_bins},
53 | callbacks=callbacks,
54 | )
55 | self.num_bins = num_bins
56 | self.kwargs = kwargs
57 |
58 | def _distance_measure(
59 | self,
60 | X_ref: np.ndarray, # noqa: N803
61 | X: np.ndarray, # noqa: N803
62 | **kwargs: Any,
63 | ) -> DistanceResult:
64 | kl = self._kl(X=X_ref, Y=X, num_bins=self.num_bins, **self.kwargs)
65 | distance = DistanceResult(distance=kl)
66 | return distance
67 |
68 | @staticmethod
69 | def _kl(
70 | X: np.ndarray, # noqa: N803
71 | Y: np.ndarray,
72 | *,
73 | num_bins: int,
74 | **kwargs: dict[str, Any],
75 | ) -> float:
76 | ( # noqa: N806
77 | X_ref_rvs,
78 | X_rvs,
79 | ) = BaseDistanceBasedProbability._calculate_probabilities(
80 | X_ref=X,
81 | X=Y,
82 | num_bins=num_bins,
83 | )
84 | kl = np.sum(rel_entr(X_rvs, X_ref_rvs, **kwargs))
85 | return kl
86 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/js.py:
--------------------------------------------------------------------------------
1 | """JS (Jensen-Shannon distance) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.spatial.distance import jensenshannon
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.batch.distance_based.base import (
10 | BaseDistanceBasedProbability,
11 | DistanceResult,
12 | )
13 |
14 |
15 | class JS(BaseDistanceBasedProbability):
16 | """JS (Jensen-Shannon distance) [lin1991divergence]_ detector.
17 |
18 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
19 | :type num_bins: int
20 | :param callbacks: callbacks, defaults to None
21 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
22 | :param kwargs: additional keyword arguments to pass to scipy.spatial.distance.jensenshannon
23 | :type kwargs: dict[str, Any]
24 |
25 | :References:
26 |
27 | .. [lin1991divergence] Lin, Jianhua.
28 | "Divergence measures based on the Shannon entropy."
29 | IEEE Transactions on Information theory 37.1 (1991): 145-151.
30 |
31 | :Example:
32 |
33 | >>> from frouros.detectors.data_drift import JS
34 | >>> import numpy as np
35 | >>> np.random.seed(seed=31)
36 | >>> X = np.random.normal(loc=0, scale=1, size=100)
37 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
38 | >>> detector = JS(num_bins=20)
39 | >>> _ = detector.fit(X=X)
40 | >>> detector.compare(X=Y)[0]
41 | DistanceResult(distance=0.41702877367162156)
42 | """ # noqa: E501
43 |
44 | def __init__( # noqa: D107
45 | self,
46 | num_bins: int = 10,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | **kwargs: Any,
49 | ) -> None:
50 | super().__init__(
51 | statistical_method=self._js,
52 | statistical_kwargs={
53 | "num_bins": num_bins,
54 | **kwargs,
55 | },
56 | callbacks=callbacks,
57 | )
58 | self.num_bins = num_bins
59 | self.kwargs = kwargs
60 |
61 | def _distance_measure(
62 | self,
63 | X_ref: np.ndarray, # noqa: N803
64 | X: np.ndarray, # noqa: N803
65 | **kwargs: Any,
66 | ) -> DistanceResult:
67 | js = self._js(X=X_ref, Y=X, num_bins=self.num_bins, **self.kwargs)
68 | distance = DistanceResult(distance=js)
69 | return distance
70 |
71 | @staticmethod
72 | def _js(
73 | X: np.ndarray, # noqa: N803
74 | Y: np.ndarray,
75 | *,
76 | num_bins: int,
77 | **kwargs: dict[str, Any],
78 | ) -> float:
79 | ( # noqa: N806
80 | X_ref_rvs,
81 | X_rvs,
82 | ) = BaseDistanceBasedProbability._calculate_probabilities(
83 | X_ref=X,
84 | X=Y,
85 | num_bins=num_bins,
86 | )
87 | js = jensenshannon(p=X_ref_rvs, q=X_rvs, **kwargs)
88 | return js
89 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/hi_normalized_complement.py:
--------------------------------------------------------------------------------
1 | """HI (Histogram intersection) normalized complement module."""
2 |
3 | from typing import Optional, Union
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.batch.base import BaseCallbackBatch
8 | from frouros.detectors.data_drift.batch.distance_based.base import (
9 | BaseDistanceBasedBins,
10 | )
11 |
12 |
13 | class HINormalizedComplement(BaseDistanceBasedBins):
14 | """HI (Histogram intersection) normalized complement [swain1991color]_ detector.
15 |
16 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
17 | :type num_bins: int
18 | :param callbacks: callbacks, defaults to None
19 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
20 |
21 | :References:
22 |
23 | .. [swain1991color] Swain, M. J., and D. H. Ballard.
24 | "Color Indexing International Journal of Computer
25 | Vision 7." (1991): 11-32.
26 |
27 | :Example:
28 |
29 | >>> from frouros.detectors.data_drift import HINormalizedComplement
30 | >>> import numpy as np
31 | >>> np.random.seed(seed=31)
32 | >>> X = np.random.normal(loc=0, scale=1, size=100)
33 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
34 | >>> detector = HINormalizedComplement(num_bins=20)
35 | >>> _ = detector.fit(X=X)
36 | >>> detector.compare(X=Y)[0]
37 | DistanceResult(distance=0.53)
38 | """
39 |
40 | def __init__( # noqa: D107
41 | self,
42 | num_bins: int = 10,
43 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
44 | ) -> None:
45 | super().__init__(
46 | statistical_method=self._hi_normalized_complement,
47 | statistical_kwargs={
48 | "num_bins": num_bins,
49 | },
50 | callbacks=callbacks,
51 | )
52 | self.num_bins = num_bins
53 |
54 | def _distance_measure_bins(
55 | self,
56 | X_ref: np.ndarray, # noqa: N803
57 | X: np.ndarray, # noqa: N803
58 | ) -> float:
59 | intersection_normalized_complement = self._hi_normalized_complement(
60 | X=X_ref, Y=X, num_bins=self.num_bins
61 | )
62 | return intersection_normalized_complement
63 |
64 | @staticmethod
65 | def _hi_normalized_complement(
66 | X: np.ndarray, # noqa: N803
67 | Y: np.ndarray,
68 | *,
69 | num_bins: int,
70 | ) -> float:
71 | hist_range = (
72 | np.min([np.min(X), np.min(Y)]),
73 | np.max([np.max(X), np.max(Y)]),
74 | )
75 | X_hist, _ = np.histogram( # noqa: N806
76 | X,
77 | bins=num_bins,
78 | range=hist_range, # noqa: N806
79 | )
80 | X_hist = X_hist / X.shape[0] # noqa: N806
81 | Y_hist, _ = np.histogram(Y, bins=num_bins, range=hist_range) # noqa: N806
82 | Y_hist = Y_hist / Y.shape[0] # noqa: N806
83 | intersection_normalized_complement = 1 - np.sum(np.minimum(X_hist, Y_hist))
84 |
85 | return intersection_normalized_complement
86 |
--------------------------------------------------------------------------------
/frouros/tests/unit/utils/test_checks.py:
--------------------------------------------------------------------------------
1 | """Test checks module."""
2 |
3 | from typing import Any
4 |
5 | import pytest
6 |
7 | from frouros.callbacks.base import BaseCallback
8 | from frouros.callbacks.batch import PermutationTestDistanceBased, ResetStatisticalTest
9 | from frouros.callbacks.batch.base import BaseCallbackBatch
10 | from frouros.callbacks.streaming import HistoryConceptDrift
11 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
12 | from frouros.utils.checks import check_callbacks
13 |
14 |
15 | @pytest.mark.parametrize(
16 | "callbacks, expected_cls",
17 | [
18 | (
19 | None,
20 | BaseCallbackBatch,
21 | ),
22 | (
23 | PermutationTestDistanceBased(
24 | num_permutations=10,
25 | ),
26 | BaseCallbackBatch,
27 | ),
28 | (
29 | None,
30 | BaseCallbackStreaming,
31 | ),
32 | (
33 | [
34 | PermutationTestDistanceBased(
35 | num_permutations=10,
36 | ),
37 | ResetStatisticalTest(
38 | alpha=0.05,
39 | ),
40 | ],
41 | BaseCallbackBatch,
42 | ),
43 | (
44 | HistoryConceptDrift(),
45 | BaseCallbackStreaming,
46 | ),
47 | ],
48 | )
49 | def test_check_callbacks(
50 | callbacks: Any,
51 | expected_cls: BaseCallback,
52 | ) -> None:
53 | """Test check_callbacks function.
54 |
55 | :param callbacks: callbacks
56 | :type callbacks: Any
57 | :param expected_cls: expected callback class
58 | :type expected_cls: BaseCallback
59 | """
60 | check_callbacks(
61 | callbacks=callbacks,
62 | expected_cls=expected_cls,
63 | )
64 |
65 |
66 | @pytest.mark.parametrize(
67 | "callbacks, expected_cls",
68 | [
69 | (
70 | PermutationTestDistanceBased(
71 | num_permutations=10,
72 | ),
73 | BaseCallbackStreaming,
74 | ),
75 | (
76 | [
77 | PermutationTestDistanceBased(
78 | num_permutations=10,
79 | ),
80 | ResetStatisticalTest(
81 | alpha=0.05,
82 | ),
83 | ],
84 | BaseCallbackStreaming,
85 | ),
86 | (
87 | HistoryConceptDrift(),
88 | BaseCallbackBatch,
89 | ),
90 | ],
91 | )
92 | def test_check_callbacks_exceptions(
93 | callbacks: Any,
94 | expected_cls: BaseCallback,
95 | ) -> None:
96 | """Test check_callbacks function exceptions.
97 |
98 | :param callbacks: callbacks
99 | :type callbacks: Any
100 | :param expected_cls: expected callback class
101 | :type expected_cls: BaseCallback
102 | """
103 | with pytest.raises(TypeError):
104 | check_callbacks(
105 | callbacks=callbacks,
106 | expected_cls=expected_cls,
107 | )
108 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/distance_based/psi.py:
--------------------------------------------------------------------------------
1 | """PSI (Population Stability Index) module."""
2 |
3 | import sys
4 | from typing import Any, Optional, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.batch.distance_based.base import (
10 | BaseDistanceBasedBins,
11 | DistanceResult,
12 | )
13 |
14 |
15 | class PSI(BaseDistanceBasedBins):
16 | """PSI (Population Stability Index) [wu2010enterprise]_ detector.
17 |
18 | :param num_bins: number of bins in which to divide probabilities, defaults to 10
19 | :type num_bins: int
20 | :param callbacks: callbacks, defaults to None
21 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
22 |
23 | :References:
24 |
25 | .. [wu2010enterprise] Wu, Desheng, and David L. Olson.
26 | "Enterprise risk management: coping with model risk in a large bank."
27 | Journal of the Operational Research Society 61.2 (2010): 179-190.
28 |
29 | :Example:
30 |
31 | >>> from frouros.detectors.data_drift import PSI
32 | >>> import numpy as np
33 | >>> np.random.seed(seed=31)
34 | >>> X = np.random.normal(loc=0, scale=1, size=100)
35 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
36 | >>> detector = PSI(num_bins=20)
37 | >>> _ = detector.fit(X=X)
38 | >>> detector.compare(X=Y)[0]
39 | DistanceResult(distance=134.95409065116183)
40 | """
41 |
42 | def __init__( # noqa: D107
43 | self,
44 | num_bins: int = 10,
45 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
46 | ) -> None:
47 | super().__init__(
48 | statistical_method=self._psi,
49 | statistical_kwargs={
50 | "num_bins": num_bins,
51 | },
52 | callbacks=callbacks,
53 | )
54 | self.num_bins = num_bins
55 |
56 | def _apply_method(
57 | self,
58 | X_ref: np.ndarray, # noqa: N803
59 | X: np.ndarray,
60 | **kwargs: Any,
61 | ) -> DistanceResult:
62 | distance = self._distance_measure(X_ref=X_ref, X=X, **kwargs)
63 | return distance
64 |
65 | def _distance_measure_bins(
66 | self,
67 | X_ref: np.ndarray, # noqa: N803
68 | X: np.ndarray, # noqa: N803
69 | ) -> float:
70 | psi = self._psi(X=X_ref, Y=X, num_bins=self.num_bins)
71 | return psi
72 |
73 | @staticmethod
74 | def _psi(
75 | X: np.ndarray, # noqa: N803
76 | Y: np.ndarray, # noqa: N803
77 | num_bins: int,
78 | ) -> float:
79 | ( # noqa: N806
80 | X_percents,
81 | Y_percents,
82 | ) = BaseDistanceBasedBins._calculate_bins_values(
83 | X_ref=X, X=Y, num_bins=num_bins
84 | )
85 | # Replace 0.0 values with the smallest number possible
86 | # in order to avoid division by zero
87 | X_percents[X_percents == 0.0] = sys.float_info.min
88 | Y_percents[Y_percents == 0.0] = sys.float_info.min
89 | psi = np.sum((Y_percents - X_percents) * np.log(Y_percents / X_percents))
90 | return psi
91 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/change_detection/cusum.py:
--------------------------------------------------------------------------------
1 | """CUSUM module."""
2 |
3 | from typing import Optional, Union
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
8 | from frouros.detectors.concept_drift.streaming.change_detection.base import (
9 | BaseCUSUM,
10 | BaseCUSUMConfig,
11 | DeltaConfig,
12 | )
13 |
14 |
15 | class CUSUMConfig(BaseCUSUMConfig, DeltaConfig):
16 | """CUSUM [page1954continuous]_ configuration.
17 |
18 | :param delta: delta value, defaults to 0.005
19 | :type delta: float
20 | :param lambda_: delta value, defaults to 50.0
21 | :type lambda_: float
22 | :param min_num_instances: minimum numbers of instances to start looking for changes, defaults to 30
23 | :type min_num_instances: int
24 |
25 | :References:
26 |
27 | .. [page1954continuous] Page, Ewan S.
28 | "Continuous inspection schemes."
29 | Biometrika 41.1/2 (1954): 100-115.
30 | """ # noqa: E501 # pylint: disable=line-too-long
31 |
32 | def __init__( # noqa: D107
33 | self,
34 | delta: float = 0.005,
35 | lambda_: float = 50.0,
36 | min_num_instances: int = 30,
37 | ) -> None:
38 | BaseCUSUMConfig.__init__(
39 | self, lambda_=lambda_, min_num_instances=min_num_instances
40 | )
41 | DeltaConfig.__init__(self, delta=delta)
42 |
43 |
44 | class CUSUM(BaseCUSUM):
45 | """CUSUM [page1954continuous]_ detector.
46 |
47 | :param config: configuration object of the detector, defaults to None. If None, the default configuration of :class:`CUSUMConfig` is used.
48 | :type config: Optional[CUSUMConfig]
49 | :param callbacks: callbacks, defaults to None
50 | :type callbacks: Optional[Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]]
51 |
52 | :References:
53 |
54 | .. [page1954continuous] Page, Ewan S.
55 | "Continuous inspection schemes."
56 | Biometrika 41.1/2 (1954): 100-115.
57 |
58 | :Example:
59 |
60 | >>> from frouros.detectors.concept_drift import CUSUM
61 | >>> import numpy as np
62 | >>> np.random.seed(seed=31)
63 | >>> dist_a = np.random.normal(loc=0.2, scale=0.01, size=1000)
64 | >>> dist_b = np.random.normal(loc=0.8, scale=0.04, size=1000)
65 | >>> stream = np.concatenate((dist_a, dist_b))
66 | >>> detector = CUSUM()
67 | >>> for i, value in enumerate(stream):
68 | ... _ = detector.update(value=value)
69 | ... if detector.drift:
70 | ... print(f"Change detected at step {i}")
71 | ... break
72 | Change detected at step 1086
73 | """ # noqa: E501 # pylint: disable=line-too-long
74 |
75 | config_type = CUSUMConfig # type: ignore
76 |
77 | def __init__( # noqa: D107
78 | self,
79 | config: Optional[CUSUMConfig] = None,
80 | callbacks: Optional[
81 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
82 | ] = None,
83 | ) -> None:
84 | super().__init__(
85 | config=config,
86 | callbacks=callbacks,
87 | )
88 |
89 | def _update_sum(self, error_rate: float) -> None:
90 | self.sum_ = np.maximum(
91 | 0,
92 | self.sum_ + error_rate - self.mean_error_rate.mean - self.config.delta, # type: ignore # noqa: E501
93 | )
94 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/base.py:
--------------------------------------------------------------------------------
1 | """Base data drift batch module."""
2 |
3 | import abc
4 | from typing import Any, Optional, Tuple, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
9 | from frouros.detectors.data_drift.base import (
10 | BaseDataDrift,
11 | BaseDataType,
12 | BaseResult,
13 | BaseStatisticalType,
14 | )
15 | from frouros.utils.checks import check_callbacks
16 |
17 |
18 | class BaseDataDriftStreaming(BaseDataDrift):
19 | """Abstract class representing a data drift streaming detector."""
20 |
21 | def __init__(
22 | self,
23 | data_type: BaseDataType,
24 | statistical_type: BaseStatisticalType,
25 | callbacks: Optional[
26 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
27 | ] = None,
28 | ) -> None:
29 | """Init method.
30 |
31 | :param data_type: data type
32 | :type data_type: BaseDataType
33 | :param statistical_type: statistical type
34 | :type statistical_type: BaseStatisticalType
35 | :param callbacks: callbacks
36 | :type callbacks: Optional[Union[BaseCallbackStreaming],
37 | list[BaseCallbackStreaming]]
38 | """
39 | check_callbacks(
40 | callbacks=callbacks,
41 | expected_cls=BaseCallbackStreaming, # type: ignore
42 | )
43 | super().__init__(
44 | callbacks=callbacks, # type: ignore
45 | data_type=data_type,
46 | statistical_type=statistical_type,
47 | )
48 | self.num_instances = 0
49 | for callback in self.callbacks: # type: ignore
50 | callback.set_detector(detector=self)
51 |
52 | def reset(self) -> None:
53 | """Reset method."""
54 | super().reset()
55 | self.num_instances = 0
56 | self._reset()
57 |
58 | def update(
59 | self,
60 | value: Union[int, float],
61 | ) -> Tuple[Optional[BaseResult], dict[str, Any]]:
62 | """Update detector.
63 |
64 | :param value: value to use to update the detector
65 | :type value: Union[int, float]
66 | :return: update result and callbacks logs
67 | :rtype: Tuple[Optional[BaseResult], dict[str, Any]]
68 | """
69 | self._common_checks() # noqa: N806
70 | self._specific_checks(X=value) # noqa: N806
71 | self.num_instances += 1
72 |
73 | for callback in self.callbacks: # type: ignore
74 | callback.on_update_start( # type: ignore
75 | value=value,
76 | )
77 | result = self._update(value=value)
78 | for callback in self.callbacks: # type: ignore
79 | callback.on_update_end( # type: ignore
80 | value=result,
81 | )
82 |
83 | callbacks_logs = self._get_callbacks_logs()
84 | return result, callbacks_logs
85 |
86 | def _specific_checks(self, X: np.ndarray) -> None: # noqa: N803
87 | pass
88 |
89 | @abc.abstractmethod
90 | def _fit(self, X: np.ndarray) -> None: # noqa: N803
91 | pass
92 |
93 | @abc.abstractmethod
94 | def _reset(self) -> None:
95 | pass
96 |
97 | @abc.abstractmethod
98 | def _update(self, value: Union[int, float]) -> Optional[BaseResult]:
99 | pass
100 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [project]
2 | name = "frouros"
3 | version = "0.9.0"
4 | description = "An open-source Python library for drift detection in machine learning systems"
5 | authors = [
6 | {name = "Jaime Céspedes Sisniega", email = "cespedes@ifca.unican.es"}
7 | ]
8 | maintainers = [
9 | {name = "Jaime Céspedes Sisniega", email = "cespedes@ifca.unican.es"}
10 | ]
11 | license = {text = "BSD-3-Clause"}
12 | readme = "README.md"
13 | keywords = [
14 | "drift-detection",
15 | "concept-drift",
16 | "data-drift",
17 | "machine-learning",
18 | "data-science",
19 | "machine-learning-operations",
20 | "machine-learning-systems",
21 | ]
22 | classifiers = [
23 | "Development Status :: 5 - Production/Stable",
24 | "Intended Audience :: Developers",
25 | "Intended Audience :: Science/Research",
26 | "License :: OSI Approved :: BSD License",
27 | "Topic :: Scientific/Engineering",
28 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
29 | "Topic :: Scientific/Engineering :: Mathematics",
30 | "Topic :: Software Development",
31 | "Topic :: Software Development :: Libraries :: Python Modules",
32 | "Programming Language :: Python :: 3.9",
33 | "Programming Language :: Python :: 3.10",
34 | "Programming Language :: Python :: 3.11",
35 | "Programming Language :: Python :: 3.12",
36 | "Programming Language :: Python :: 3 :: Only",
37 | ]
38 | requires-python = ">=3.9,<3.13"
39 | dependencies = [
40 | "matplotlib>=3.8.2,<3.10",
41 | "numpy>=1.26.3,<2.3",
42 | "requests>=2.31.0,<2.33",
43 | "scipy>=1.12.0,<1.16",
44 | "tqdm>=4.66.1,<5.0",
45 | ]
46 |
47 | [project.optional-dependencies]
48 | docs = [
49 | "sphinx>=7.2.6,<8.2",
50 | "sphinx-book-theme>=1.1.0,<1.2",
51 | "sphinxcontrib-bibtex>=2.6.2,<2.7",
52 | "myst-parser>=2.0.0,<4.1",
53 | "myst-nb>=1.0.0,<1.2",
54 | ]
55 | notebooks = [
56 | "scikit-learn>=1.5.1,<1.6",
57 | "torch>=2.1.2,<2.6",
58 | "torchvision>=0.16.2,<0.22",
59 | "ipywidgets>=8.1.1,<8.2",
60 | ]
61 | dev-tests = [
62 | "pytest>=8.3.1,<8.4",
63 | "pytest-cov>=6.0.0,<6.1",
64 | "pytest-mock>=3.14.0,<3.15",
65 | "scikit-learn>=1.5.1,<1.6",
66 | ]
67 | dev-ruff = [
68 | "ruff>=0.8.1,<0.9",
69 | ]
70 | dev-mypy = [
71 | "mypy>=1.13.0,<1.14",
72 | "types-requests>=2.32.0,<2.33",
73 | "types-toml>=0.10.0,<0.11",
74 | "types-tqdm>=4.66,<4.68",
75 | ]
76 | dev = [
77 | "frouros[docs,notebooks,dev-tests,dev-ruff,dev-mypy]",
78 | "tox>=4.23.2,<4.24",
79 | ]
80 |
81 | [project.urls]
82 | homepage = "https://frouros.readthedocs.io"
83 | repository = "https://github.com/IFCA-Advanced-Computing/frouros"
84 | documentation = "https://frouros.readthedocs.io"
85 | download = "https://pypi.org/project/frouros/"
86 |
87 | [build-system]
88 | requires = [
89 | "setuptools>=61.0,<76.0",
90 | "wheel>=0.42.0,<0.46",
91 | "toml>=0.10.2,<0.11",
92 | "build>=1.0.3,<1.3",
93 | ]
94 | build-backend = "setuptools.build_meta"
95 |
96 | [tool.ruff]
97 | extend-include = ["*.ipynb"]
98 |
99 | [tool.ruff.lint]
100 | select = [
101 | "E", # pycodestyle
102 | "F", # Pyflakes
103 | "B", # flake8-bugbear
104 | "SIM", # flake8-simplify
105 | "I", # isort
106 | ]
107 |
108 | [tool.mypy]
109 | disable_error_code = [
110 | "misc",
111 | "no-any-return",
112 | ]
113 | ignore_missing_imports = true
114 | strict = true
115 |
--------------------------------------------------------------------------------
/frouros/tests/unit/utils/test_kernels.py:
--------------------------------------------------------------------------------
1 | """Test kernels module."""
2 |
3 | import numpy as np
4 | import pytest
5 |
6 | from frouros.utils.kernels import rbf_kernel
7 |
8 | # TODO: Create fixtures for the matrices and the expected kernel values
9 |
10 |
11 | @pytest.mark.parametrize(
12 | "X, Y, sigma, expected_kernel_value",
13 | [
14 | (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), 0.5, np.array([[1.0]])),
15 | (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), 1.0, np.array([[1.0]])),
16 | (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), 2.0, np.array([[1.0]])),
17 | (
18 | np.array([[1, 2, 3]]),
19 | np.array([[4, 5, 6]]),
20 | 0.5,
21 | np.array([[3.53262857e-24]]),
22 | ),
23 | (
24 | np.array([[1, 2, 3]]),
25 | np.array([[4, 5, 6]]),
26 | 1.0,
27 | np.array([[1.37095909e-06]]),
28 | ),
29 | (np.array([[1, 2, 3]]), np.array([[4, 5, 6]]), 2.0, np.array([[0.03421812]])),
30 | (
31 | np.array([[1, 2, 3], [4, 5, 6]]),
32 | np.array([[1, 2, 3], [4, 5, 6]]),
33 | 0.5,
34 | np.array(
35 | [[1.00000000e00, 3.53262857e-24], [3.53262857e-24, 1.00000000e00]]
36 | ),
37 | ),
38 | (
39 | np.array([[1, 2, 3], [4, 5, 6]]),
40 | np.array([[1, 2, 3], [4, 5, 6]]),
41 | 1.0,
42 | np.array(
43 | [[1.00000000e00, 1.37095909e-06], [1.37095909e-06, 1.00000000e00]]
44 | ),
45 | ),
46 | (
47 | np.array([[1, 2, 3], [4, 5, 6]]),
48 | np.array([[1, 2, 3], [4, 5, 6]]),
49 | 2.0,
50 | np.array([[1.00000000e00, 0.03421812], [0.03421812, 1.00000000e00]]),
51 | ),
52 | (
53 | np.array([[1, 2, 3], [4, 5, 6]]),
54 | np.array([[1.5, 2.5, 3.5], [4.5, 5.5, 6.5]]),
55 | 0.5,
56 | np.array(
57 | [[2.23130160e-01, 1.20048180e-32], [5.17555501e-17, 2.23130160e-01]]
58 | ),
59 | ),
60 | (
61 | np.array([[1, 2, 3], [4, 5, 6]]),
62 | np.array([[1.5, 2.5, 3.5], [4.5, 5.5, 6.5]]),
63 | 1.0,
64 | np.array(
65 | [[6.87289279e-01, 1.04674018e-08], [8.48182352e-05, 6.87289279e-01]]
66 | ),
67 | ),
68 | (
69 | np.array([[1, 2, 3], [4, 5, 6]]),
70 | np.array([[1.5, 2.5, 3.5], [4.5, 5.5, 6.5]]),
71 | 2.0,
72 | np.array([[0.91051036, 0.01011486], [0.09596709, 0.91051036]]),
73 | ),
74 | ],
75 | )
76 | def test_rbf_kernel(
77 | X: np.ndarray, # noqa: N803
78 | Y: np.ndarray,
79 | sigma: float,
80 | expected_kernel_value: np.ndarray,
81 | ) -> None:
82 | """Test rbf kernel.
83 |
84 | :param X: X values
85 | :type X: numpy.ndarray
86 | :param Y: Y values
87 | :type Y: numpy.ndarray
88 | :param sigma: sigma value
89 | :type sigma: float
90 | :param expected_kernel_value: expected kernel value
91 | :type expected_kernel_value: numpy.ndarray
92 | """
93 | assert np.all(
94 | np.isclose(
95 | rbf_kernel(
96 | X=X,
97 | Y=Y,
98 | sigma=sigma,
99 | ),
100 | expected_kernel_value,
101 | ),
102 | )
103 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/cvm.py:
--------------------------------------------------------------------------------
1 | """CVMTest (Cramér-von Mises test) module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | import numpy as np
6 | from scipy.stats import cramervonmises_2samp
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import NumericalData, UnivariateData
10 | from frouros.detectors.data_drift.batch.statistical_test.base import (
11 | BaseStatisticalTest,
12 | StatisticalResult,
13 | )
14 | from frouros.detectors.data_drift.exceptions import InsufficientSamplesError
15 |
16 |
17 | class CVMTest(BaseStatisticalTest):
18 | """CVMTest (Cramér-von Mises test) [cramer1928composition]_ detector.
19 |
20 | :param callbacks: callbacks, defaults to None
21 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
22 |
23 | :Note:
24 | - Passing additional arguments to `scipy.stats.cramervonmises_2samp `__ can be done using :func:`compare` kwargs.
25 |
26 | :References:
27 |
28 | .. [cramer1928composition] Cramér, Harald.
29 | "On the composition of elementary errors: First paper: Mathematical deductions."
30 | Scandinavian Actuarial Journal 1928.1 (1928): 13-74.
31 |
32 | :Example:
33 |
34 | >>> from frouros.detectors.data_drift import CVMTest
35 | >>> import numpy as np
36 | >>> np.random.seed(seed=31)
37 | >>> X = np.random.normal(loc=0, scale=1, size=100)
38 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
39 | >>> detector = CVMTest()
40 | >>> _ = detector.fit(X=X)
41 | >>> detector.compare(X=Y)[0]
42 | StatisticalResult(statistic=5.331699999999998, p_value=1.7705426014202885e-10)
43 | """ # noqa: E501 # pylint: disable=line-too-long
44 |
45 | def __init__( # noqa: D107
46 | self,
47 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
48 | ) -> None:
49 | super().__init__(
50 | data_type=NumericalData(),
51 | statistical_type=UnivariateData(),
52 | callbacks=callbacks,
53 | )
54 |
55 | @BaseStatisticalTest.X_ref.setter # type: ignore[attr-defined]
56 | def X_ref(self, value: Optional[np.ndarray]) -> None: # noqa: N802
57 | """Reference data setter.
58 |
59 | :param value: value to be set
60 | :type value: Optional[numpy.ndarray]
61 | """
62 | if value is not None:
63 | self._check_sufficient_samples(X=value)
64 | self._X_ref = value
65 | # self._X_ref_ = check_array(value) # noqa: N806
66 | else:
67 | self._X_ref = None # noqa: N806
68 |
69 | def _specific_checks(self, X: np.ndarray) -> None: # noqa: N803
70 | self._check_sufficient_samples(X=X)
71 |
72 | @staticmethod
73 | def _check_sufficient_samples(X: np.ndarray) -> None: # noqa: N803
74 | if X.shape[0] < 2:
75 | raise InsufficientSamplesError("Number of samples must be at least 2.")
76 |
77 | @staticmethod
78 | def _statistical_test(
79 | X_ref: np.ndarray, # noqa: N803
80 | X: np.ndarray,
81 | **kwargs: Any,
82 | ) -> StatisticalResult:
83 | test = cramervonmises_2samp(
84 | x=X_ref,
85 | y=X,
86 | **kwargs,
87 | )
88 | test = StatisticalResult(
89 | statistic=test.statistic,
90 | p_value=test.pvalue,
91 | )
92 | return test
93 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: Create a report to help us reproduce and correct the bug
3 | labels: ["bug", "needs triage"]
4 |
5 | body:
6 | - type: markdown
7 | attributes:
8 | value: >
9 | #### Before submitting a bug, please make sure the issue hasn't been already
10 | addressed by searching through [the past issues](https://github.com/IFCA/frouros/issues).
11 | - type: textarea
12 | attributes:
13 | label: Describe the bug
14 | description: >
15 | A clear and concise description of what the bug is.
16 | validations:
17 | required: true
18 | - type: textarea
19 | attributes:
20 | label: Steps/Code to Reproduce
21 | description: |
22 | Please add a minimal code example that can reproduce the error when running it. Be as succinct as possible, **do not depend on external data files**: instead you can generate synthetic data using `numpy.random`, [frouros.datasets](https://frouros.readthedocs.io/en/stable/api_reference/datasets.html) or a few lines of Python code. Example:
23 |
24 | ```python
25 | from frouros.detectors.data_drift.streaming import MMD
26 | import numpy as np
27 |
28 | np.random.seed(seed=31)
29 | group_A = np.random.normal(loc=1, scale=1, size=1000)
30 | group_B = np.concatenate(
31 | [
32 | np.random.normal(loc=1, scale=1, size=1000),
33 | np.random.normal(loc=5, scale=0.1, size=1000),
34 | ],
35 | )
36 | detector = MMD(
37 | chunk_size=100,
38 | )
39 | ```
40 | placeholder: |
41 | ```
42 | Sample code to reproduce the problem
43 | ```
44 | validations:
45 | required: true
46 | - type: textarea
47 | attributes:
48 | label: Expected Results
49 | description: >
50 | Please paste or describe the expected results.
51 | placeholder: >
52 | Example: No error is thrown.
53 | validations:
54 | required: true
55 | - type: textarea
56 | attributes:
57 | label: Actual Results
58 | description: |
59 | Please paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full traceback** of the exception. For instance the code above raises the following exception:
60 | ```python-traceback
61 | ---------------------------------------------------------------------------
62 | Traceback (most recent call last)
63 | File "/frouros/mmd.py", line 12, in
64 | detector = MMD(
65 | TypeError: __init__() missing 1 required positional argument: 'window_size'
66 | ```
67 | placeholder: >
68 | Please paste or specifically describe the actual output or traceback.
69 | validations:
70 | required: true
71 | - type: textarea
72 | attributes:
73 | label: Python version
74 | render: shell
75 | description: |
76 | Please run the following and paste the output below.
77 | ```python
78 | import platform; print(f"'{platform.python_version()}'")
79 | ```
80 | validations:
81 | required: true
82 | - type: textarea
83 | attributes:
84 | label: Frouros version
85 | render: shell
86 | description: |
87 | Please run the following and paste the output below.
88 | ```python
89 | from importlib.metadata import version; version("frouros")
90 | ```
91 | validations:
92 | required: true
93 | - type: markdown
94 | attributes:
95 | value: >
96 | Thanks for contributing 🎉! We will review your bug report as soon as possible.
97 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/change_detection/page_hinkley.py:
--------------------------------------------------------------------------------
1 | """Page Hinkley module."""
2 |
3 | from typing import Optional, Union
4 |
5 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
6 | from frouros.detectors.concept_drift.streaming.change_detection.base import (
7 | AlphaConfig,
8 | BaseCUSUM,
9 | BaseCUSUMConfig,
10 | DeltaConfig,
11 | )
12 |
13 |
14 | class PageHinkleyConfig(BaseCUSUMConfig, DeltaConfig, AlphaConfig):
15 | """Page Hinkley [page1954continuous]_ configuration.
16 |
17 | :param delta: delta value, defaults to 0.005
18 | :type delta: float
19 | :param lambda_: lambda value, defaults to 50.0
20 | :type lambda_: float
21 | :param alpha: forgetting factor value, defaults to 0.9999
22 | :type alpha: float
23 | :param min_num_instances: minimum numbers of instances to start looking for changes, defaults to 30
24 | :type min_num_instances: int
25 |
26 | :References:
27 |
28 | .. [page1954continuous] Page, Ewan S.
29 | "Continuous inspection schemes."
30 | Biometrika 41.1/2 (1954): 100-115.
31 | """ # noqa: E501 # pylint: disable=line-too-long
32 |
33 | def __init__( # noqa: D107
34 | self,
35 | delta: float = 0.005,
36 | lambda_: float = 50.0,
37 | alpha: float = 0.9999,
38 | min_num_instances: int = 30,
39 | ) -> None:
40 | BaseCUSUMConfig.__init__(
41 | self, min_num_instances=min_num_instances, lambda_=lambda_
42 | )
43 | DeltaConfig.__init__(self, delta=delta)
44 | AlphaConfig.__init__(self, alpha=alpha)
45 |
46 |
47 | class PageHinkley(BaseCUSUM):
48 | """Page Hinkley [page1954continuous]_ detector.
49 |
50 | :param config: configuration object of the detector, defaults to None. If None, the default configuration of :class:`PageHinkleyConfig` is used.
51 | :type config: Optional[PageHinkleyConfig]
52 | :param callbacks: callbacks, defaults to None
53 | :type callbacks: Optional[Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]]
54 |
55 | :References:
56 |
57 | .. [page1954continuous] Page, Ewan S.
58 | "Continuous inspection schemes."
59 | Biometrika 41.1/2 (1954): 100-115.
60 |
61 | :Example:
62 |
63 | >>> from frouros.detectors.concept_drift import PageHinkley
64 | >>> import numpy as np
65 | >>> np.random.seed(seed=31)
66 | >>> dist_a = np.random.normal(loc=0.2, scale=0.01, size=1000)
67 | >>> dist_b = np.random.normal(loc=0.8, scale=0.04, size=1000)
68 | >>> stream = np.concatenate((dist_a, dist_b))
69 | >>> detector = PageHinkley()
70 | >>> for i, value in enumerate(stream):
71 | ... _ = detector.update(value=value)
72 | ... if detector.drift:
73 | ... print(f"Change detected at step {i}")
74 | ... break
75 | Change detected at step 1094
76 | """ # noqa: E501 # pylint: disable=line-too-long
77 |
78 | config_type = PageHinkleyConfig # type: ignore
79 |
80 | def __init__( # noqa: D107
81 | self,
82 | config: Optional[PageHinkleyConfig] = None,
83 | callbacks: Optional[
84 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
85 | ] = None,
86 | ) -> None:
87 | super().__init__(
88 | config=config,
89 | callbacks=callbacks,
90 | )
91 |
92 | def _update_sum(self, error_rate: float) -> None:
93 | self.sum_ = self.config.alpha * self.sum_ + ( # type: ignore
94 | error_rate - self.mean_error_rate.mean - self.config.delta # type: ignore
95 | )
96 |
--------------------------------------------------------------------------------
/frouros/callbacks/batch/reset.py:
--------------------------------------------------------------------------------
1 | """Reset batch callback module."""
2 |
3 | from typing import Any, Optional
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.batch.base import BaseCallbackBatch
8 | from frouros.utils.logger import logger
9 |
10 |
11 | class ResetStatisticalTest(BaseCallbackBatch):
12 | """Reset callback class that can be applied to :mod:`data_drift.batch.statistical_test ` detectors.
13 |
14 | :param alpha: significance value
15 | :type alpha: float
16 | :param name: name value, defaults to None. If None, the name will be set to `ResetStatisticalTest`.
17 | :type name: Optional[str]
18 |
19 | :Example:
20 |
21 | >>> from frouros.callbacks import ResetStatisticalTest
22 | >>> from frouros.detectors.data_drift import KSTest
23 | >>> import numpy as np
24 | >>> np.random.seed(seed=31)
25 | >>> X = np.random.normal(loc=0, scale=1, size=100)
26 | >>> Y = np.random.normal(loc=1, scale=1, size=100)
27 | >>> detector = KSTest(callbacks=ResetStatisticalTest(alpha=0.01))
28 | >>> _ = detector.fit(X=X)
29 | >>> detector.compare(X=Y)[0]
30 | INFO:frouros:Drift detected. Resetting detector...
31 | StatisticalResult(statistic=0.55, p_value=3.0406585087050305e-14)
32 | """ # noqa: E501 # pylint: disable=line-too-long
33 |
34 | def __init__( # noqa: D107
35 | self,
36 | alpha: float,
37 | name: Optional[str] = None,
38 | ) -> None:
39 | super().__init__(name=name)
40 | self.alpha = alpha
41 |
42 | @property
43 | def alpha(self) -> float:
44 | """Alpha property.
45 |
46 | :return: significance value
47 | :rtype: float
48 | """
49 | return self._alpha
50 |
51 | @alpha.setter
52 | def alpha(self, value: float) -> None:
53 | """Alpha setter.
54 |
55 | :param value: value to be set
56 | :type value: float
57 | :raises ValueError: Value error exception
58 | """
59 | if value <= 0.0:
60 | raise ValueError("value must be greater than 0.")
61 | self._alpha = value
62 |
63 | def on_compare_end(
64 | self,
65 | result: Any,
66 | X_ref: np.ndarray, # noqa: N803
67 | X_test: np.ndarray,
68 | ) -> None:
69 | """On compare end method.
70 |
71 | :param result: result obtained from the `compare` method
72 | :type result: Any
73 | :param X_ref: reference data
74 | :type X_ref: numpy.ndarray
75 | :param X_test: test data
76 | :type X_test: numpy.ndarray
77 | """
78 | p_value = result.p_value
79 | if p_value <= self.alpha:
80 | logger.info("Drift detected. Resetting detector...")
81 | self.detector.reset() # type: ignore
82 |
83 | # FIXME: set_detector method as a workaround to # pylint: disable=fixme
84 | # avoid circular import problem. Make it an abstract method and
85 | # uncomment commented code when it is solved
86 |
87 | # def set_detector(self, detector) -> None:
88 | # """Set detector method.
89 | #
90 | # :raises TypeError: Type error exception
91 | # """
92 | # if not isinstance(detector, BaseDataDriftBatch):
93 | # raise TypeError(
94 | # f"callback {self.__class__.name} cannot be used with detector"
95 | # f" {detector.__class__name}. Must be used with a detector of "
96 | # f"type BaseDataDriftBatch."
97 | # )
98 | # self.detector = detector
99 |
100 | def reset(self) -> None:
101 | """Reset method."""
102 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/change_detection/geometric_moving_average.py:
--------------------------------------------------------------------------------
1 | """Geometric Moving Average module."""
2 |
3 | from typing import Optional, Union
4 |
5 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
6 | from frouros.detectors.concept_drift.streaming.change_detection.base import (
7 | AlphaConfig,
8 | BaseCUSUM,
9 | BaseCUSUMConfig,
10 | )
11 |
12 |
13 | class GeometricMovingAverageConfig(BaseCUSUMConfig, AlphaConfig):
14 | """Geometric Moving Average [robertst1959control]_ configuration.
15 |
16 | :param alpha: forgetting factor value, defaults to 0.99
17 | :type alpha: float
18 | :param lambda_: delta value, defaults to 1.0
19 | :type lambda_: float
20 | :param min_num_instances: minimum numbers of instances to start looking for changes, defaults to 30
21 | :type min_num_instances: int
22 |
23 | :References:
24 |
25 | .. [robertst1959control] Roberts, S. W.
26 | “Control Chart Tests Based on Geometric Moving Averages.”
27 | Technometrics, vol. 1, no. 3, 1959, pp. 239–50.
28 | JSTOR, https://doi.org/10.2307/1266443.
29 | """ # noqa: E501 # pylint: disable=line-too-long
30 |
31 | def __init__( # noqa: D107
32 | self,
33 | alpha: float = 0.99,
34 | lambda_: float = 1.0,
35 | min_num_instances: int = 30,
36 | ) -> None:
37 | BaseCUSUMConfig.__init__(
38 | self, lambda_=lambda_, min_num_instances=min_num_instances
39 | )
40 | AlphaConfig.__init__(self, alpha=alpha)
41 |
42 |
43 | class GeometricMovingAverage(BaseCUSUM):
44 | """Geometric Moving Average [robertst1959control]_ detector.
45 |
46 | :param config: configuration object of the detector, defaults to None. If None, the default configuration of :class:`GeometricMovingAverageConfig` is used.
47 | :type config: Optional[GeometricMovingAverageConfig]
48 | :param callbacks: callbacks, defaults to None
49 | :type callbacks: Optional[Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]]
50 |
51 | :References:
52 |
53 | .. [robertst1959control] Roberts, S. W.
54 | “Control Chart Tests Based on Geometric Moving Averages.”
55 | Technometrics, vol. 1, no. 3, 1959, pp. 239–50.
56 | JSTOR, https://doi.org/10.2307/1266443.
57 |
58 | :Example:
59 |
60 | >>> from frouros.detectors.concept_drift import GeometricMovingAverage, GeometricMovingAverageConfig
61 | >>> import numpy as np
62 | >>> np.random.seed(seed=31)
63 | >>> dist_a = np.random.normal(loc=0.2, scale=0.01, size=1000)
64 | >>> dist_b = np.random.normal(loc=0.8, scale=0.04, size=1000)
65 | >>> stream = np.concatenate((dist_a, dist_b))
66 | >>> detector = GeometricMovingAverage(config=GeometricMovingAverageConfig(lambda_=0.3))
67 | >>> for i, value in enumerate(stream):
68 | ... _ = detector.update(value=value)
69 | ... if detector.drift:
70 | ... print(f"Change detected at step {i}")
71 | ... break
72 | Change detected at step 1071
73 | """ # noqa: E501 # pylint: disable=line-too-long
74 |
75 | config_type = GeometricMovingAverageConfig # type: ignore
76 |
77 | def __init__( # noqa: D107
78 | self,
79 | config: Optional[GeometricMovingAverageConfig] = None,
80 | callbacks: Optional[
81 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
82 | ] = None,
83 | ) -> None:
84 | super().__init__(
85 | config=config,
86 | callbacks=callbacks,
87 | )
88 |
89 | def _update_sum(self, error_rate: float) -> None:
90 | self.sum_ = self.config.alpha * self.sum_ + ( # type: ignore
91 | 1 - self.config.alpha # type: ignore
92 | ) * (error_rate - self.mean_error_rate.mean)
93 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/statistical_test/chisquare.py:
--------------------------------------------------------------------------------
1 | """ChiSquareTest (Chi-square test) module."""
2 |
3 | import collections
4 | import typing
5 | from typing import (
6 | Any,
7 | Optional,
8 | Union,
9 | )
10 |
11 | import numpy as np
12 | from scipy.stats import chi2_contingency
13 |
14 | from frouros.callbacks.batch.base import BaseCallbackBatch
15 | from frouros.detectors.data_drift.base import (
16 | CategoricalData,
17 | UnivariateData,
18 | )
19 | from frouros.detectors.data_drift.batch.statistical_test.base import (
20 | BaseStatisticalTest,
21 | StatisticalResult,
22 | )
23 |
24 |
25 | class ChiSquareTest(BaseStatisticalTest):
26 | """ChiSquareTest (Chi-square test) [pearson1900x]_ detector.
27 |
28 | :param callbacks: callbacks, defaults to None
29 | :type callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]]
30 |
31 | :Note:
32 | - Passing additional arguments to `scipy.stats.chi2_contingency `__ can be done using :func:`compare` kwargs.
33 |
34 | :References:
35 |
36 | .. [pearson1900x] Pearson, Karl.
37 | "X. On the criterion that a given system of deviations from the probable in the
38 | case of a correlated system of variables is such that it can be reasonably
39 | supposed to have arisen from random sampling."
40 | The London, Edinburgh, and Dublin Philosophical Magazine and Journal of
41 | Science 50.302 (1900): 157-175.
42 |
43 | :Example:
44 |
45 | >>> from frouros.detectors.data_drift import ChiSquareTest
46 | >>> import numpy as np
47 | >>> np.random.seed(seed=31)
48 | >>> X = np.random.choice(a=[0, 1], size=100, p=[0.5, 0.5])
49 | >>> Y = np.random.choice(a=[0, 1], size=100, p=[0.8, 0.2])
50 | >>> detector = ChiSquareTest()
51 | >>> _ = detector.fit(X=X)
52 | >>> detector.compare(X=Y)[0]
53 | StatisticalResult(statistic=9.81474665685192, p_value=0.0017311812135839511)
54 | """ # noqa: E501 # pylint: disable=line-too-long
55 |
56 | def __init__( # noqa: D107
57 | self,
58 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
59 | ) -> None:
60 | super().__init__(
61 | data_type=CategoricalData(),
62 | statistical_type=UnivariateData(),
63 | callbacks=callbacks,
64 | )
65 |
66 | @staticmethod
67 | def _statistical_test(
68 | X_ref: np.ndarray, # noqa: N803
69 | X: np.ndarray,
70 | **kwargs: Any,
71 | ) -> StatisticalResult:
72 | f_exp, f_obs = ChiSquareTest._calculate_frequencies( # type: ignore
73 | X_ref=X_ref,
74 | X=X,
75 | )
76 | statistic, p_value, _, _ = chi2_contingency(
77 | observed=np.array([f_obs, f_exp]),
78 | **kwargs,
79 | )
80 |
81 | test = StatisticalResult(
82 | statistic=statistic,
83 | p_value=p_value,
84 | )
85 | return test
86 |
87 | @staticmethod
88 | @typing.no_type_check # FIXME: X_ref_counter and X_counter cause mypy errors # pylint: disable=fixme # noqa: E501
89 | def _calculate_frequencies(
90 | X_ref: np.ndarray, # noqa: N803
91 | X: np.ndarray,
92 | ) -> tuple[list[int], list[int]]:
93 | X_ref_counter, X_counter = [ # noqa: N806
94 | *map(collections.Counter, [X_ref, X]) # noqa: N806
95 | ]
96 | possible_values: set[str] = set([*X_ref_counter.keys()] + [*X_counter.keys()]) # noqa: N806
97 | f_exp, f_obs = {}, {}
98 | for value in possible_values:
99 | f_exp[value] = X_ref_counter.get(value, 0) # noqa: N806
100 | f_obs[value] = X_counter.get(value, 0) # noqa: N806
101 | f_exp_values, f_obs_values = [*map(list, [f_exp.values(), f_obs.values()])]
102 | return f_exp_values, f_obs_values
103 |
--------------------------------------------------------------------------------
/frouros/datasets/synthetic.py:
--------------------------------------------------------------------------------
1 | """Synthetic datasets module."""
2 |
3 | from typing import Iterator, Optional, Tuple
4 |
5 | import numpy as np
6 |
7 | from frouros.datasets.base import BaseDatasetGenerator
8 | from frouros.datasets.exceptions import InvalidBlockError
9 |
10 |
11 | class SEA(BaseDatasetGenerator):
12 | """SEA generator [street2001streaming]_.
13 |
14 | :param seed: seed value, defaults to None
15 | :type seed: Optional[int]
16 |
17 | :References:
18 |
19 | .. [street2001streaming] Street, W. Nick, and YongSeog Kim.
20 | "A streaming ensemble algorithm (SEA) for large-scale classification."
21 | Proceedings of the seventh ACM SIGKDD international conference on Knowledge
22 | discovery and data mining. 2001.
23 |
24 | :Example:
25 |
26 | >>> from frouros.datasets.synthetic import SEA
27 | >>> sea = SEA(seed=31)
28 | >>> dataset = sea.generate_dataset(block=1, noise=0.1, num_samples=5)
29 | >>> for X, y in dataset:
30 | ... print(X, y)
31 | [2.86053822 9.58105567 7.70312932] 0
32 | [2.08165462 1.36917049 9.08373802] 0
33 | [8.36483632 1.12172604 8.3489916 ] 0
34 | [2.44680795 1.36231348 7.22094455] 1
35 | [1.28477715 2.20364007 5.19211202] 1
36 | """
37 |
38 | def __init__( # noqa: D107
39 | self,
40 | seed: Optional[int] = None,
41 | ) -> None:
42 | super().__init__(
43 | seed=seed,
44 | )
45 | self._block_map = {1: 8.0, 2: 9.0, 3: 7.0, 4: 9.5}
46 |
47 | @staticmethod
48 | def _generate_sample(threshold: float, noise: float) -> Tuple[np.ndarray, int]:
49 | X = np.random.uniform(low=0.0, high=10.0, size=(3,)) # noqa: N806
50 | if np.random.random() < noise:
51 | y = np.random.randint(2)
52 | else:
53 | y = 1 if X[0] + X[1] <= threshold else 0
54 | return X, y
55 |
56 | def generate_dataset(
57 | self, block: int, noise: float = 0.1, num_samples: int = 12500
58 | ) -> Iterator[Tuple[np.ndarray, int]]:
59 | """Generate dataset.
60 |
61 | :param block: block to generate samples from, must be 1, 2, 3 or 4
62 | :type block: int
63 | :param noise: ratio of samples with a noisy class, defaults to 0.1
64 | :type noise: float
65 | :param num_samples: number of samples to generate, defaults to 12500
66 | :type num_samples: int
67 | :return: generator with the samples
68 | :rtype: Iterator[Tuple[np.ndarray, int]]
69 | """
70 | try:
71 | threshold = self._block_map[block]
72 | except KeyError as e:
73 | raise InvalidBlockError("block must be 1, 2, 3 or 4.") from e
74 | if num_samples < 1:
75 | raise ValueError("num_samples must be greater than 0.")
76 | if not 0 <= noise <= 1:
77 | raise ValueError("noise must be in the range [0, 1].")
78 | dataset = (
79 | self._generate_sample(threshold=threshold, noise=noise)
80 | for _ in range(num_samples)
81 | )
82 | return dataset
83 |
84 |
85 | class Dummy(BaseDatasetGenerator):
86 | """Dummy generator class."""
87 |
88 | @staticmethod
89 | def _generate_sample(class_: int) -> Tuple[np.ndarray, int]:
90 | X = np.random.uniform(low=0.0, high=10.0, size=(2,)) # noqa: N806
91 | y = class_ if X[0] + X[1] < 10.0 else 1 - class_
92 | return X, y
93 |
94 | def generate_dataset(
95 | self, class_: int = 1, num_samples: int = 12500
96 | ) -> Iterator[Tuple[np.ndarray, int]]:
97 | """Generate dataset.
98 |
99 | :param class_: class value
100 | :type class_: int
101 | :param num_samples: number of samples to generate
102 | :type num_samples: int
103 | :return: generator with the samples
104 | :rtype: Iterator[Tuple[np.ndarray, int]]
105 | """
106 | if class_ not in [1, 0]:
107 | raise ValueError("class_ must be 1 or 0.")
108 | if num_samples < 1:
109 | raise ValueError("num_samples must be greater than 0.")
110 | dataset = (self._generate_sample(class_=class_) for _ in range(num_samples))
111 | return dataset
112 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | import frouros
17 |
18 | sys.path.insert(0, os.path.abspath("../.."))
19 | # sys.path.insert(0, os.path.abspath("../../examples"))
20 |
21 |
22 | # -- Project information -----------------------------------------------------
23 |
24 | project = "Frouros"
25 | copyright = "2023, Spanish National Research Council (CSIC)"
26 | author = "Spanish National Research Council (CSIC)"
27 |
28 | # The full version, including alpha/beta/rc tags
29 | CURRENT_VERSION = f"v{frouros.__version__}"
30 |
31 | # -- General configuration ---------------------------------------------------
32 |
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = [
37 | "sphinx.ext.autodoc",
38 | "sphinx.ext.autosectionlabel",
39 | "sphinx.ext.autosummary",
40 | "sphinx.ext.duration",
41 | "sphinxcontrib.bibtex",
42 | "myst_nb",
43 | ]
44 |
45 | # Use bootstrap CSS from theme.
46 | panels_add_bootstrap_css = False
47 |
48 | # Add any paths that contain templates here, relative to this directory.
49 | templates_path = ["_templates"]
50 |
51 | # The suffix(es) of source filenames.
52 | # You can specify multiple suffix as a list of string:
53 | source_suffix = [".md", ".ipynb"]
54 |
55 | # This is also used if you do content translation via gettext catalogs.
56 | # Usually you set "language" from the command line for these cases.
57 | language = "en"
58 |
59 | # List of patterns, relative to source directory, that match files and
60 | # directories to ignore when looking for source files.
61 | # This pattern also affects html_static_path and html_extra_path.
62 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".venv"]
63 |
64 | # see http://stackoverflow.com/q/12206334/562769
65 | numpydoc_show_class_members = True
66 | # this is needed for some reason...
67 | # see https://github.com/numpy/numpydoc/issues/69
68 | numpydoc_class_members_toctree = False
69 |
70 | # -- Options for HTML output -------------------------------------------------
71 |
72 | # The theme to use for HTML and HTML Help pages. See the documentation for
73 | # a list of builtin themes.
74 | #
75 | html_theme = "sphinx_book_theme"
76 | html_title = "Frouros"
77 | html_logo = "../../images/logo.png"
78 |
79 | # Theme options
80 |
81 | html_theme_options = {
82 | "repository_url": "https://github.com/IFCA/frouros",
83 | "use_repository_button": True,
84 | "use_issues_button": True,
85 | "show_prev_next": False,
86 | "logo": {
87 | "alt_text": "frouros_logo",
88 | },
89 | }
90 |
91 | # Add any paths that contain custom static files (such as style sheets) here,
92 | # relative to this directory. They are copied after the builtin static files,
93 | # so a file named "default.css" will overwrite the builtin "default.css".
94 | html_static_path = ["_static"]
95 |
96 | # generate autosummary even if no references
97 | autosummary_generate = True
98 |
99 | autodoc_default_options = {
100 | "member-order": "bysource",
101 | "inherited-members": True,
102 | "private-members": False,
103 | }
104 | autoclass_content = "class"
105 |
106 | myst_enable_extensions = [
107 | "amsmath",
108 | "colon_fence",
109 | "deflist",
110 | "dollarmath",
111 | "html_image",
112 | ]
113 | myst_url_schemes = ("http", "https", "mailto")
114 | myst_heading_anchors = 3
115 | myst_all_links_external = True
116 |
117 | # MyST-NB configuration
118 | nb_execution_timeout = 480
119 | nb_execution_raise_on_error = True
120 | nb_merge_streams = True
121 |
122 | # sphinxcontrib-bibtex configuration
123 | bibtex_bibfiles = ["references.bib"]
124 | bibtex_default_style = "plain"
125 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/batch/base.py:
--------------------------------------------------------------------------------
1 | """Base data drift batch module."""
2 |
3 | import abc
4 | from typing import Any, Optional, Tuple, Union
5 |
6 | import numpy as np
7 |
8 | from frouros.callbacks.batch.base import BaseCallbackBatch
9 | from frouros.detectors.data_drift.base import (
10 | BaseDataDrift,
11 | BaseDataType,
12 | BaseStatisticalType,
13 | )
14 | from frouros.detectors.data_drift.exceptions import (
15 | MismatchDimensionError,
16 | )
17 | from frouros.utils.checks import check_callbacks
18 |
19 |
20 | class BaseDataDriftBatch(BaseDataDrift):
21 | """Abstract class representing a data drift batch detector."""
22 |
23 | def __init__(
24 | self,
25 | data_type: BaseDataType,
26 | statistical_type: BaseStatisticalType,
27 | callbacks: Optional[Union[BaseCallbackBatch, list[BaseCallbackBatch]]] = None,
28 | ) -> None:
29 | """Init method.
30 |
31 | :param data_type: data type
32 | :type data_type: BaseDataType
33 | :param statistical_type: statistical type
34 | :type statistical_type: BaseStatisticalType
35 | :param callbacks: callbacks
36 | :type callbacks: Optional[Union[BaseCallbackBatch], list[BaseCallbackBatch]]
37 | """
38 | check_callbacks(
39 | callbacks=callbacks,
40 | expected_cls=BaseCallbackBatch, # type: ignore
41 | )
42 | super().__init__(
43 | callbacks=callbacks, # type: ignore
44 | data_type=data_type,
45 | statistical_type=statistical_type,
46 | )
47 | for callback in self.callbacks: # type: ignore
48 | callback.set_detector(detector=self)
49 |
50 | def _fit(
51 | self,
52 | X: np.ndarray, # noqa: N803
53 | ) -> None:
54 | self.X_ref = X
55 |
56 | def compare(
57 | self,
58 | X: np.ndarray, # noqa: N803
59 | **kwargs: Any,
60 | ) -> Tuple[np.ndarray, dict[str, Any]]:
61 | """Compare values.
62 |
63 | :param X: test data
64 | :type X: numpy.ndarray
65 | :return: compare result and callbacks logs
66 | :rtype: Tuple[numpy.ndarray, dict[str, Any]]
67 | """
68 | for callback in self.callbacks: # type: ignore
69 | callback.on_compare_start( # type: ignore
70 | X_ref=self.X_ref,
71 | X_test=X,
72 | )
73 | result = self._compare(X=X, **kwargs)
74 | for callback in self.callbacks: # type: ignore
75 | callback.on_compare_end( # type: ignore
76 | result=result,
77 | X_ref=self.X_ref,
78 | X_test=X,
79 | )
80 |
81 | callbacks_logs = self._get_callbacks_logs()
82 | return result, callbacks_logs
83 |
84 | def _check_compare_dimensions(self, X: np.ndarray) -> None: # noqa: N803
85 | try:
86 | if self.X_ref.shape[1] != X.shape[1]: # type: ignore
87 | raise MismatchDimensionError(
88 | f"Dimensions of X_ref ({self.X_ref.shape[-1]}) " # type: ignore
89 | f"and X ({X.shape[-1]}) must be equal"
90 | )
91 | except IndexError as e:
92 | if self.X_ref.ndim != X.ndim: # type: ignore
93 | raise MismatchDimensionError(f"Dimensions of X ({X.ndim})") from e
94 |
95 | def _specific_checks(self, X: np.ndarray) -> None: # noqa: N803
96 | self._check_compare_dimensions(X=X)
97 |
98 | @abc.abstractmethod
99 | def _apply_method(
100 | self,
101 | X_ref: np.ndarray, # noqa: N803
102 | X: np.ndarray,
103 | **kwargs: Any,
104 | ) -> Any:
105 | pass
106 |
107 | @abc.abstractmethod
108 | def _compare(
109 | self,
110 | X: np.ndarray, # noqa: N803
111 | **kwargs: Any,
112 | ) -> np.ndarray:
113 | pass
114 |
115 | def _get_result(
116 | self,
117 | X: np.ndarray, # noqa: N803
118 | **kwargs: Any,
119 | ) -> Union[list[float], list[Tuple[float, float]], Tuple[float, float]]:
120 | result = self._apply_method( # pylint: disable=not-callable
121 | X_ref=self.X_ref,
122 | X=X,
123 | **kwargs,
124 | )
125 | return result
126 |
--------------------------------------------------------------------------------
/frouros/detectors/data_drift/streaming/distance_based/mmd.py:
--------------------------------------------------------------------------------
1 | """MMD (Maximum Mean Discrepancy) module."""
2 |
3 | from typing import Any, Callable, Optional, Tuple, Union
4 |
5 | import numpy as np
6 |
7 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
8 | from frouros.detectors.data_drift.base import MultivariateData, NumericalData
9 | from frouros.detectors.data_drift.batch import MMD as MMDBatch # noqa: N811
10 | from frouros.detectors.data_drift.batch.distance_based.mmd import ( # type: ignore
11 | rbf_kernel,
12 | )
13 | from frouros.detectors.data_drift.streaming.distance_based.base import (
14 | BaseDistanceBased,
15 | DistanceResult,
16 | )
17 | from frouros.utils.data_structures import CircularQueue
18 |
19 |
20 | class MMD(BaseDistanceBased):
21 | """MMD (Maximum Mean Discrepancy) [gretton2012kernel]_ detector.
22 |
23 | :param window_size: window size value
24 | :type window_size: int
25 | :param kernel: kernel function, defaults to :func:`rbf_kernel() `
26 | :type kernel: Callable
27 | :param chunk_size: chunk size value, defaults to None
28 | :type chunk_size: Optional[int]
29 | :param callbacks: callbacks, defaults to None
30 | :type callbacks: Optional[Union[BaseCallbackStreaming,
31 | list[BaseCallbackStreaming]]]
32 |
33 | :References:
34 |
35 | .. [gretton2012kernel] Gretton, Arthur, et al.
36 | "A kernel two-sample test."
37 | The Journal of Machine Learning Research 13.1 (2012): 723-773.
38 |
39 | :Example:
40 |
41 | >>> from functools import partial
42 | >>> from frouros.detectors.data_drift import MMDStreaming
43 | >>> from frouros.utils.kernels import rbf_kernel
44 | >>> import numpy as np
45 | >>> np.random.seed(seed=31)
46 | >>> X = np.random.multivariate_normal(mean=[1, 1], cov=[[2, 0], [0, 2]], size=100)
47 | >>> Y = np.random.multivariate_normal(mean=[0, 0], cov=[[2, 1], [1, 2]], size=100)
48 | >>> detector = MMDStreaming(window_size=10, kernel=partial(rbf_kernel, sigma=0.5))
49 | >>> _ = detector.fit(X=X)
50 | >>> for sample in Y:
51 | ... distance, _ = detector.update(value=sample)
52 | ... if distance is not None:
53 | ... print(distance)
54 | """ # noqa: E501 # pylint: disable=line-too-long
55 |
56 | def __init__( # noqa: D107
57 | self,
58 | window_size: int,
59 | kernel: Callable = rbf_kernel, # type: ignore
60 | chunk_size: Optional[int] = None,
61 | callbacks: Optional[
62 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
63 | ] = None,
64 | ) -> None:
65 | super().__init__(
66 | data_type=NumericalData(),
67 | statistical_type=MultivariateData(),
68 | callbacks=callbacks,
69 | )
70 | self.mmd = MMDBatch(
71 | kernel=kernel,
72 | chunk_size=chunk_size,
73 | )
74 | self.window_size = window_size
75 | self.X_queue = CircularQueue(max_len=self.window_size)
76 |
77 | @property
78 | def window_size(self) -> int:
79 | """Window size property.
80 |
81 | :return: window size
82 | :rtype: int
83 | """
84 | return self._window_size
85 |
86 | @window_size.setter
87 | def window_size(self, value: int) -> None:
88 | """Window size setter.
89 |
90 | :param value: value to be set
91 | :type value: int
92 | :raises ValueError: Value error exception
93 | """
94 | if value < 1:
95 | raise ValueError("window_size value must be greater than 0.")
96 | self._window_size = value
97 |
98 | def _fit(self, X: np.ndarray) -> None: # noqa: N803
99 | self.mmd.fit(X=X)
100 | self.X_ref = self.mmd.X_ref
101 |
102 | def _reset(self) -> None:
103 | self.mmd.reset()
104 |
105 | def _update(self, value: Union[int, float]) -> Optional[DistanceResult]:
106 | self.X_queue.enqueue(value=value)
107 |
108 | if self.num_instances < self.window_size:
109 | return None
110 |
111 | # FIXME: Handle callback logs. Now are ignored. # pylint: disable=fixme
112 | distance, _ = self.mmd.compare(X=np.array(self.X_queue))
113 | return distance
114 |
115 | def _compare(
116 | self,
117 | X: np.ndarray, # noqa: N803
118 | ) -> Tuple[Optional[DistanceResult], dict[str, Any]]: # noqa: N803
119 | return self.mmd.compare(X=X)
120 |
--------------------------------------------------------------------------------
/frouros/tests/unit/utils/test_persistence.py:
--------------------------------------------------------------------------------
1 | """Test persistence module."""
2 |
3 | import pickle
4 |
5 | import pytest
6 |
7 | from frouros.callbacks import HistoryConceptDrift, PermutationTestDistanceBased
8 | from frouros.callbacks.base import BaseCallback
9 | from frouros.detectors.base import BaseDetector
10 | from frouros.detectors.concept_drift import DDM, DDMConfig
11 | from frouros.detectors.data_drift import MMD # type: ignore
12 | from frouros.utils import load, save
13 | from frouros.utils.decorators import set_os_filename
14 |
15 |
16 | @pytest.fixture(
17 | scope="session",
18 | params=[
19 | DDM(
20 | config=DDMConfig(),
21 | ),
22 | MMD(),
23 | ],
24 | )
25 | def detector(
26 | request: pytest.FixtureRequest,
27 | ) -> BaseDetector:
28 | """Fixture for detector.
29 |
30 | :param request: Request
31 | :type request: pytest.FixtureRequest
32 | :return: Detector
33 | :rtype: BaseDetector
34 | """
35 | return request.param
36 |
37 |
38 | @pytest.fixture(
39 | scope="session",
40 | params=[
41 | HistoryConceptDrift(),
42 | PermutationTestDistanceBased(
43 | num_permutations=2,
44 | ),
45 | ],
46 | )
47 | def callback(
48 | request: pytest.FixtureRequest,
49 | ) -> BaseCallback:
50 | """Fixture for callback.
51 |
52 | :param request: Request
53 | :type request: pytest.FixtureRequest
54 | :return: Callback
55 | :rtype: BaseCallback
56 | """
57 | return request.param
58 |
59 |
60 | @set_os_filename("detector.pkl")
61 | def test_save_load_with_valid_detector(
62 | request: pytest.FixtureRequest,
63 | detector: BaseDetector,
64 | ) -> None:
65 | """Test save and load with valid detector.
66 |
67 | :param request: Request
68 | :type request: pytest.FixtureRequest
69 | :param detector: Detector
70 | :type detector: BaseDetector
71 | """
72 | filename = request.node.get_closest_marker("filename").args[0]
73 | save(
74 | obj=detector,
75 | filename=filename,
76 | )
77 | loaded_detector = load(
78 | filename=filename,
79 | )
80 | assert isinstance(loaded_detector, detector.__class__)
81 |
82 |
83 | @set_os_filename("callback.pkl")
84 | def test_save_load_with_valid_callback(
85 | request: pytest.FixtureRequest,
86 | callback: BaseCallback,
87 | ) -> None:
88 | """Test save and load with valid callback.
89 |
90 | :param request: Request
91 | :type request: pytest.FixtureRequest
92 | :param callback: Callback
93 | :type callback: BaseCallback
94 | """
95 | filename = request.node.get_closest_marker("filename").args[0]
96 | save(
97 | obj=callback,
98 | filename=filename,
99 | )
100 | loaded_callback = load(
101 | filename=filename,
102 | )
103 | assert isinstance(loaded_callback, BaseCallback)
104 |
105 |
106 | @set_os_filename("invalid.pkl")
107 | def test_save_with_invalid_object(
108 | request: pytest.FixtureRequest,
109 | ) -> None:
110 | """Test save with invalid object.
111 |
112 | :param request: Request
113 | :type request: pytest.FixtureRequest
114 | :raises TypeError: Type error exception
115 | """
116 | invalid_object = "invalid"
117 | filename = request.node.get_closest_marker("filename").args[0]
118 | with pytest.raises(TypeError):
119 | save(invalid_object, filename)
120 |
121 |
122 | @set_os_filename("detector.pkl")
123 | def test_save_with_invalid_protocol(
124 | request: pytest.FixtureRequest,
125 | detector: BaseDetector,
126 | ) -> None:
127 | """Test save with invalid protocol.
128 |
129 | :param request: Request
130 | :type request: pytest.FixtureRequest
131 | :param detector: Detector
132 | :type detector: BaseDetector
133 | :raises ValueError: Value error exception
134 | """
135 | invalid_protocol = pickle.HIGHEST_PROTOCOL + 1
136 | filename = request.node.get_closest_marker("filename").args[0]
137 | with pytest.raises(ValueError):
138 | save(detector, filename, invalid_protocol)
139 |
140 |
141 | @set_os_filename("non_existent.pkl")
142 | def test_load_with_non_existent_file(
143 | request: pytest.FixtureRequest,
144 | ) -> None:
145 | """Test load with non-existent file.
146 |
147 | :param request: Request
148 | :type request: pytest.FixtureRequest
149 | :raises FileNotFoundError: File not found error exception
150 | """
151 | filename = request.node.get_closest_marker("filename").args[0]
152 | with pytest.raises(FileNotFoundError):
153 | load(filename)
154 |
--------------------------------------------------------------------------------
/frouros/callbacks/streaming/history.py:
--------------------------------------------------------------------------------
1 | """History callback module."""
2 |
3 | from typing import Any, Optional, Union
4 |
5 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
6 | from frouros.utils.stats import BaseStat
7 |
8 |
9 | class HistoryConceptDrift(BaseCallbackStreaming):
10 | """HistoryConceptDrift callback class that can be applied to :mod:`concept_drift.streaming ` detectors.
11 |
12 | :param name: name value, defaults to None. If None, the name will be set to `HistoryConceptDrift`.
13 | :type name: Optional[str]
14 |
15 | :Note:
16 | By default the following variables are stored:
17 |
18 | - `value`: list of values received by the detector
19 | - `drift`: list of drift flags
20 | - `num_instances`: list of number of instances received by the detector
21 | Each detector may store additional variables if they are defined in an `additional_vars` dictionary in the detectors `__init__` method.
22 | The user can add additional variables by calling the :func:`add_additional_vars` method.
23 |
24 | :Example:
25 |
26 | >>> from frouros.callbacks import HistoryConceptDrift
27 | >>> from frouros.detectors.concept_drift import ADWIN
28 | >>> import numpy as np
29 | >>> np.random.seed(seed=31)
30 | >>> dist_a = np.random.normal(loc=0.2, scale=0.01, size=1000)
31 | >>> dist_b = np.random.normal(loc=0.8, scale=0.04, size=1000)
32 | >>> stream = np.concatenate((dist_a, dist_b))
33 | >>> detector = ADWIN(callbacks=[HistoryConceptDrift(name="history")])
34 | >>> for i, value in enumerate(stream):
35 | ... callbacks_log = detector.update(value=value)
36 | ... if detector.drift:
37 | ... print(f"Change detected at step {i}")
38 | ... break
39 | Change detected at step 1055
40 | >>> callbacks_log["history"]["drift"]
41 | [False, False, ..., True]
42 | """ # noqa: E501 # pylint: disable=line-too-long
43 |
44 | def __init__( # noqa: D107
45 | self,
46 | name: Optional[str] = None,
47 | ) -> None:
48 | super().__init__(name=name)
49 | self.additional_vars: list[str] = []
50 | self.history: dict[str, list[Any]] = {
51 | "value": [],
52 | "num_instances": [],
53 | "drift": [],
54 | }
55 |
56 | def add_additional_vars(self, vars_: list[str]) -> None:
57 | """Add additional variables to track.
58 |
59 | :param vars_: list of variables
60 | :type vars_: list[str]
61 | """
62 | self.additional_vars.extend(vars_)
63 | self.history = {**self.history, **{var: [] for var in self.additional_vars}}
64 |
65 | def on_update_end(self, value: Union[int, float]) -> None:
66 | """On update end method.
67 |
68 | :param value: value used to update the detector
69 | :type value: Union[int, float]
70 | """
71 | self.history["value"].append(value)
72 | self.history["num_instances"].append(
73 | self.detector.num_instances # type: ignore
74 | )
75 | self.history["drift"].append(self.detector.drift) # type: ignore
76 | for var in self.additional_vars:
77 | additional_var = self.detector.additional_vars[var] # type: ignore
78 | # FIXME: Extract isinstance check to be done when # pylint: disable=fixme
79 | # add_addtional_vars is called (avoid the same computation)
80 | self.history[var].append(
81 | additional_var.get()
82 | if isinstance(additional_var, BaseStat)
83 | else additional_var
84 | )
85 |
86 | self.logs.update(**self.history)
87 |
88 | # FIXME: set_detector method as a workaround to # pylint: disable=fixme
89 | # avoid circular import problem. Make it an abstract method and
90 | # uncomment commented code when it is solved
91 |
92 | # def set_detector(self, detector) -> None:
93 | # """Set detector method.
94 | #
95 | # :raises TypeError: Type error exception
96 | # """
97 | # if not isinstance(detector, BaseConceptDrift):
98 | # raise TypeError(
99 | # f"callback {self.__class__.name} cannot be used with detector"
100 | # f" {detector.__class__name}. Must be used with a detector of "
101 | # f"type BaseConceptDrift."
102 | # )
103 | # self.detector = detector
104 |
105 | def reset(self) -> None:
106 | """Reset method."""
107 | for key in self.history:
108 | self.history[key].clear()
109 |
--------------------------------------------------------------------------------
/docs/source/references.bib:
--------------------------------------------------------------------------------
1 | @inproceedings{dos2016fast,
2 | title={Fast unsupervised online drift detection using incremental kolmogorov-smirnov test},
3 | author={dos Reis, Denis Moreira and Flach, Peter and Matwin, Stan and Batista, Gustavo},
4 | booktitle={Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
5 | pages={1545--1554},
6 | year={2016}
7 | }
8 |
9 | @article{JMLR:v13:gretton12a,
10 | author = {Arthur Gretton and Karsten M. Borgwardt and Malte J. Rasch and Bernhard Sch{{\"o}}lkopf and Alexander Smola},
11 | title = {A Kernel Two-Sample Test},
12 | journal = {Journal of Machine Learning Research},
13 | year = {2012},
14 | volume = {13},
15 | number = {25},
16 | pages = {723--773},
17 | url = {http://jmlr.org/papers/v13/gretton12a.html}
18 | }
19 |
20 | @article{moreno2012unifying,
21 | title={A unifying view on dataset shift in classification},
22 | author={Moreno-Torres, Jose G and Raeder, Troy and Alaiz-Rodr{\'\i}guez, Roc{\'\i}o and Chawla, Nitesh V and Herrera, Francisco},
23 | journal={Pattern recognition},
24 | volume={45},
25 | number={1},
26 | pages={521--530},
27 | year={2012},
28 | publisher={Elsevier}
29 | }
30 |
31 | @article{gama2014survey,
32 | title={A survey on concept drift adaptation},
33 | author={Gama, Jo{\~a}o and {\v{Z}}liobait{\.e}, Indr{\.e} and Bifet, Albert and Pechenizkiy, Mykola and Bouchachia, Abdelhamid},
34 | journal={ACM computing surveys (CSUR)},
35 | volume={46},
36 | number={4},
37 | pages={1--37},
38 | year={2014},
39 | publisher={ACM New York, NY, USA}
40 | }
41 |
42 | @article{rabanser2019failing,
43 | title={Failing loudly: An empirical study of methods for detecting dataset shift},
44 | author={Rabanser, Stephan and G{\"u}nnemann, Stephan and Lipton, Zachary},
45 | journal={Advances in Neural Information Processing Systems},
46 | volume={32},
47 | year={2019}
48 | }
49 |
50 | @article{ramirez2017survey,
51 | title={A survey on data preprocessing for data stream mining: Current status and future directions},
52 | author={Ram{\'\i}rez-Gallego, Sergio and Krawczyk, Bartosz and Garc{\'\i}a, Salvador and Wo{\'z}niak, Micha{\l} and Herrera, Francisco},
53 | journal={Neurocomputing},
54 | volume={239},
55 | pages={39--57},
56 | year={2017},
57 | publisher={Elsevier}
58 | }
59 |
60 | @inproceedings{street2001streaming,
61 | title={A streaming ensemble algorithm (SEA) for large-scale classification},
62 | author={Street, W Nick and Kim, YongSeog},
63 | booktitle={Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining},
64 | pages={377--382},
65 | year={2001}
66 | }
67 |
68 | @inproceedings{gama2004learning,
69 | title={Learning with drift detection},
70 | author={Gama, Joao and Medas, Pedro and Castillo, Gladys and Rodrigues, Pedro},
71 | booktitle={Brazilian symposium on artificial intelligence},
72 | pages={286--295},
73 | year={2004},
74 | organization={Springer}
75 | }
76 |
77 | @techreport{harries1999splice,
78 | title={Splice-2 comparative evaluation: Electricity pricing},
79 | author={Harries, Michael},
80 | year={1999},
81 | institution={The University of South Wales}
82 | }
83 |
84 | @inproceedings{rifai2011contractive,
85 | title={Contractive auto-encoders: Explicit invariance during feature extraction},
86 | author={Rifai, Salah and Vincent, Pascal and Muller, Xavier and Glorot, Xavier and Bengio, Yoshua},
87 | booktitle={Proceedings of the 28th international conference on international conference on machine learning},
88 | pages={833--840},
89 | year={2011}
90 | }
91 |
92 | @article{kingma2014adam,
93 | title={Adam: A method for stochastic optimization},
94 | author={Kingma, Diederik P and Ba, Jimmy},
95 | journal={arXiv preprint arXiv:1412.6980},
96 | year={2014}
97 | }
98 |
99 | @article{massey1951kolmogorov,
100 | title={The Kolmogorov-Smirnov test for goodness of fit},
101 | author={Massey Jr, Frank J},
102 | journal={Journal of the American statistical Association},
103 | volume={46},
104 | number={253},
105 | pages={68--78},
106 | year={1951},
107 | publisher={Taylor \& Francis}
108 | }
109 |
110 | @article{johari2022always,
111 | title={Always valid inference: Continuous monitoring of a/b tests},
112 | author={Johari, Ramesh and Koomen, Pete and Pekelis, Leonid and Walsh, David},
113 | journal={Operations Research},
114 | volume={70},
115 | number={3},
116 | pages={1806--1821},
117 | year={2022},
118 | publisher={INFORMS}
119 | }
120 |
121 | @article{storkey2009training,
122 | title={When training and test sets are different: characterizing learning transfer},
123 | author={Storkey, Amos J},
124 | journal={Dataset shift in machine learning},
125 | volume={30},
126 | number={3-28},
127 | pages={6},
128 | year={2009},
129 | publisher={Citeseer}
130 | }
131 |
--------------------------------------------------------------------------------
/frouros/metrics/prequential_error.py:
--------------------------------------------------------------------------------
1 | """Prequential error using fading factor metric module."""
2 |
3 | from typing import Optional, Union
4 |
5 | from frouros.metrics.base import BaseMetric
6 |
7 |
8 | class PrequentialError(BaseMetric):
9 | """Prequential error [dawid1984present]_ using fading factor [gama2009issues]_ metric.
10 |
11 | :param alpha: fading factor value, defaults to 1.0
12 | :type alpha: Union[int, float]
13 | :param name: name value, defaults to None. If None, the name will be set to `PrequentialError`.
14 | :type name: Optional[str]
15 |
16 | :References:
17 |
18 | .. [dawid1984present] Dawid, A. Philip.
19 | "Present position and potential developments:
20 | Some personal views statistical theory the prequential approach."
21 | Journal of the Royal Statistical Society:
22 | Series A (General) 147.2 (1984): 278-290.
23 | .. [gama2009issues] Gama, Joao, Raquel Sebastiao, and Pedro Pereira Rodrigues.
24 | "Issues in evaluation of stream learning algorithms."
25 | Proceedings of the 15th ACM SIGKDD international conference on Knowledge
26 | discovery and data mining. 2009.
27 |
28 | :Example:
29 |
30 | >>> from frouros.metrics import PrequentialError
31 | >>> metric = PrequentialError(alpha=0.9)
32 | >>> X = [1, 1, 0, 1, 0, 0]
33 | >>> Y = [1, 0, 0, 0, 1, 1]
34 | >>> for i, (X_sample, Y_sample) in enumerate(zip(X, Y)):
35 | ... error_value = 1 - (X_sample == Y_sample)
36 | ... prequential_error = metric(error_value=error_value)
37 | ... print(f"Metric={prequential_error:.5f} at step {i}")
38 | Metric=0.00000 at step 0
39 | Metric=0.52632 at step 1
40 | Metric=0.33210 at step 2
41 | Metric=0.52632 at step 3
42 | Metric=0.64199 at step 4
43 | Metric=0.71839 at step 5
44 | """ # noqa: E501 # pylint: disable=line-too-long
45 |
46 | def __init__( # noqa: D107
47 | self,
48 | alpha: Union[int, float] = 1.0,
49 | name: Optional[str] = None,
50 | ) -> None:
51 | super().__init__(name=name)
52 | self.alpha = alpha
53 | self.cumulative_error = 0.0
54 | self.cumulative_instances = 0.0
55 | self.num_instances = 0
56 |
57 | @property
58 | def alpha(self) -> Union[int, float]:
59 | """Fading factor property.
60 |
61 | :return: fading factor value
62 | :rtype: Union[int, float]
63 | """
64 | return self._alpha
65 |
66 | @alpha.setter
67 | def alpha(self, value: Union[int, float]) -> None:
68 | """Fading factor setter.
69 |
70 | :param value: value to be set
71 | :type value: Union[int, float]
72 | """
73 | if not isinstance(value, (int, float)):
74 | raise TypeError("value must be of type int or float.")
75 | if not 0.0 < value <= 1.0:
76 | raise ValueError("value must be in the range (0, 1].")
77 | self._alpha = value
78 |
79 | @property
80 | def cumulative_instances(self) -> Union[int, float]:
81 | """Cumulative instances' property.
82 |
83 | :return: fading factor value
84 | :rtype: Union[int, float]
85 | """
86 | return self._cumulative_instances
87 |
88 | @cumulative_instances.setter
89 | def cumulative_instances(self, value: Union[int, float]) -> None:
90 | """Cumulative instances' setter.
91 |
92 | :param value: value to be set
93 | :type value: Union[int, float]
94 | """
95 | if not isinstance(value, (int, float)):
96 | raise TypeError("value must be of type int or float.")
97 | self._cumulative_instances = value
98 |
99 | @property
100 | def cumulative_fading_error(self) -> Union[int, float]:
101 | """Cumulative fading error property.
102 |
103 | :return: cumulative facing error value
104 | :rtype: Union[int, float]
105 | """
106 | return self.cumulative_error / self.cumulative_instances
107 |
108 | def __call__(
109 | self,
110 | error_value: float,
111 | ) -> Union[int, float]:
112 | """__call__ method that updates the prequential error using fading factor.
113 |
114 | :param error_value error value
115 | :type error_value: float
116 | :return: cumulative facing error
117 | :rtype: Union[int, float]
118 | """
119 | self.cumulative_error = self.cumulative_error * self.alpha + error_value
120 | self.cumulative_instances = self.cumulative_instances * self.alpha + 1
121 | return self.cumulative_fading_error
122 |
123 | def reset(self) -> None:
124 | """Reset method."""
125 | self.cumulative_error = 0.0
126 | self.cumulative_instances = 0.0
127 | self.num_instances = 0
128 |
129 | def __repr__(self) -> str:
130 | """Repr method.
131 |
132 | :return: repr value
133 | :rtype: str
134 | """
135 | return f"{super().__repr__()[:-1]}, alpha={self.alpha})"
136 |
--------------------------------------------------------------------------------
/docs/source/concepts.md:
--------------------------------------------------------------------------------
1 | # Concepts
2 |
3 | Some concepts related to the drift detection field must be explained in order use `frouros` in a correct manner and at its fully potential.
4 |
5 | ## What is drift detection?
6 |
7 | Can be defined as the process of trying to detect a significant change in the concept previously learned by a model (*concept drift*), or a change related to the feature/covariate distributions (*data drift*) that can end up producing a performance decay in model's performance.
8 |
9 | Traditionally there has been little consensus on the terminology and definitions of the
10 | different types of drift, as stated in {cite}`moreno2012unifying`. In order to adopt some
11 | clear definitions, we apply those used in {cite}`gama2014survey` for the *concept drift* part, in combination with those used in {cite}`rabanser2019failing`'s work
12 | for detecting *dataset shift* using only the feature/covariate distributions.
13 |
14 | Therefore, the problem statement can be defined as follows:
15 |
16 | Given a time period ${[0, t]}$, a set of sample-pairs ${D=\{(X_{0}, y_{0}),...,(X_{t}, y_{t})\}}$, where ${X_{i} \in \mathbb{R}^{m}}$ is the ${m}$-dimensional feature vector and ${y_{i} \in \mathbb{R}^{k}}$ is the ${k}$-class vector (using *one-hot encoding*) if we are dealing with a classification problem or ${y_{i} \in \mathbb{R}}$ is a scalar if it is a regression problem, ${D}$ is used to fit ${\hat{f} \colon X \to Y}$ (known as model) to be as close as possible to the unknown ${{f} \colon X \to Y}$. *Machine learning* algorithms are typically used for this fitting procedure.
17 | ${(X_{i}, y_{i}) \notin D}$ samples obtained in ${[t+1, \infty)}$ and used by ${\hat{f}}$ may start to differ with respect to ${D}$ pairs from a statistical point of view. It is also possible that some changes occur in terms of concept of the problem (change in ${f}$).
18 |
19 | Since ${P(y, X) = P(y|X) P(X)}$ {cite}`moreno2012unifying`, a change in the joint distribution between two different times that can produce some performance degradation can be described as follows:
20 |
21 | $$
22 | P_{[0, t]}(X, y) \neq P_{[t+1, \infty)}(X, y)
23 | $$
24 |
25 | The different types of changes that are considered as a form of drift can be categorized in the following types:
26 |
27 | - **Concept drift**: There is a change in the conditional probability $P(y|X)$ with or without a change in ${P(X)}$. Thus, it can be defined as ${P_{[0, t]}(y|X) \neq P_{[t+1, \infty)}(y|X)}$. [Concept drift methods](#concept-drift) aim to detect this type of drift. Also known as *real concept drift* {cite}`gama2014survey`.
28 |
29 | - **Data drift**: There is a change in ${P(X)}$. Therefore, this type of drift only focuses in the distribution of the covariates ${P(X)}$, so
30 | ${P_{[0, t]}(X) \neq P_{[t+1, \infty)}(X)}$. [Data drift methods](#data-drift) are designed to try to detect this type drift. Unlike *concept drift* taking place, the presence of *data drift* does not guarantee that model's performance is being affected, but it is highly probable that is happening. We have renamed *dataset shift* {cite}`rabanser2019failing` to *data drift*
31 | in order to maintain consistency with the *concept drift* definition. These *data drift* methods can also be used to detect *label drift*, also known as *prior probability shift* {cite}`storkey2009training`, where the label distribution ${P(Y)}$ is the one that changes over time, in such a way that ${P_{[0, t]}(Y) \neq P_{[t+1, \infty)}(Y)}$.
32 |
33 | ## Verification latency or delay
34 |
35 | According to {cite}`dos2016fast`, is defined as the period between a model's prediction and the availability of the ground-truth label (in case of a classification problem) or the target value (in case of a regression problem).
36 | In real-world cases, the *verification latency* is highly dependent on the application domain and even in some problems it is no possible to finally obtain the ground-truth/target value, which makes it impossible to detect the *concept drift* using concept drift methods, therefore other techniques can to be used, such as [data drift methods](#data-drift) that only focus on covariate distributions.
37 |
38 | ## Drift detection methods
39 |
40 | Drift detection methods can be classified according to the type of drift they can detect and how they detect it.
41 |
42 | ### Concept drift
43 |
44 | Their main objective is to **detect concept drift**. They are closely related to data stream mining, online and incremental learning.
45 |
46 | At the time of writing this, Frouros only implements *concept drift* detectors that work in a {doc}`streaming ` manner. This means that the detector can only be updated with a single sample each time.
47 |
48 | ### Data drift
49 |
50 | On the other hand, there are problems where it is very costly or even impossible to obtain labels in a reasonable amount of time (see [verification latency](#verification-latency-or-delay)). In this case, is not possible to directly check if *concept drift* is occurring, so **detect data drift** becomes the main objective of these type of methods.
51 |
52 | At the time of writing this, Frouros implements detectors that are capable to work in {doc}`batch ` or {doc}`streaming ` mode. In addition, we can difference between univariate and multivariate data drift detectors, according to the type of feature/covariate distributions used.
53 | ```{bibliography}
54 | :filter: docname in docnames
55 | ```
56 |
--------------------------------------------------------------------------------
/frouros/detectors/concept_drift/streaming/statistical_process_control/ddm.py:
--------------------------------------------------------------------------------
1 | """DDM (Drift detection method) module."""
2 |
3 | from contextlib import suppress
4 | from typing import Any, Optional, Union
5 |
6 | from frouros.callbacks.streaming.base import BaseCallbackStreaming
7 | from frouros.detectors.concept_drift.streaming.statistical_process_control.base import (
8 | BaseSPCConfig,
9 | BaseSPCError,
10 | )
11 |
12 |
13 | class DDMConfig(BaseSPCConfig):
14 | """DDM (Drift detection method) [gama2004learning]_ configuration.
15 |
16 | :param warning_level: warning level factor, defaults to 2.0
17 | :type warning_level: float
18 | :param drift_level: drift level factor, defaults to 3.0
19 | :type drift_level: float
20 | :param min_num_instances: minimum numbers of instances to start looking for changes, defaults to 30
21 | :type min_num_instances: int
22 |
23 | :References:
24 |
25 | .. [gama2004learning] Gama, Joao, et al.
26 | "Learning with drift detection."
27 | Advances in Artificial Intelligence–SBIA 2004: 17th Brazilian Symposium on
28 | Artificial Intelligence, Sao Luis, Maranhao, Brazil, September 29-October 1,
29 | 2004. Proceedings 17. Springer Berlin Heidelberg, 2004.
30 | """ # noqa: E501 # pylint: disable=line-too-long
31 |
32 | def __init__( # noqa: D107
33 | self,
34 | warning_level: float = 2.0,
35 | drift_level: float = 3.0,
36 | min_num_instances: int = 30,
37 | ) -> None:
38 | super().__init__(
39 | warning_level=warning_level,
40 | drift_level=drift_level,
41 | min_num_instances=min_num_instances,
42 | )
43 |
44 |
45 | class DDM(BaseSPCError):
46 | """DDM (Drift detection method) [gama2004learning]_ detector.
47 |
48 | :param config: configuration object of the detector, defaults to None. If None, the default configuration of :class:`DDMConfig` is used.
49 | :type config: Optional[DDMConfig]
50 | :param callbacks: callbacks, defaults to None
51 | :type callbacks: Optional[Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]]
52 |
53 | :Note:
54 | :func:`update` method expects to receive a value of 0 if the instance is correctly classified (no error) and 1 otherwise (error).
55 |
56 | :References:
57 |
58 | .. [gama2004learning] Gama, Joao, et al.
59 | "Learning with drift detection."
60 | Advances in Artificial Intelligence–SBIA 2004: 17th Brazilian Symposium on
61 | Artificial Intelligence, Sao Luis, Maranhao, Brazil, September 29-October 1,
62 | 2004. Proceedings 17. Springer Berlin Heidelberg, 2004.
63 |
64 | :Example:
65 |
66 | >>> from frouros.detectors.concept_drift import DDM
67 | >>> import numpy as np
68 | >>> np.random.seed(seed=31)
69 | >>> dist_a = np.random.binomial(n=1, p=0.6, size=1000)
70 | >>> dist_b = np.random.binomial(n=1, p=0.8, size=1000)
71 | >>> stream = np.concatenate((dist_a, dist_b))
72 | >>> detector = DDM()
73 | >>> warning_flag = False
74 | >>> for i, value in enumerate(stream):
75 | ... _ = detector.update(value=value)
76 | ... if detector.drift:
77 | ... print(f"Change detected at step {i}")
78 | ... break
79 | ... if not warning_flag and detector.warning:
80 | ... print(f"Warning detected at step {i}")
81 | ... warning_flag = True
82 | Warning detected at step 1049
83 | Change detected at step 1131
84 | """ # noqa: E501 # pylint: disable=line-too-long
85 |
86 | config_type = DDMConfig
87 |
88 | def __init__( # noqa: D107
89 | self,
90 | config: Optional[DDMConfig] = None,
91 | callbacks: Optional[
92 | Union[BaseCallbackStreaming, list[BaseCallbackStreaming]]
93 | ] = None,
94 | ) -> None:
95 | super().__init__(
96 | config=config,
97 | callbacks=callbacks,
98 | )
99 |
100 | def _update(self, value: Union[int, float], **kwargs: Any) -> None:
101 | self.num_instances += 1
102 | self.error_rate.update(value=value)
103 |
104 | if self.num_instances >= self.config.min_num_instances:
105 | error_rate_plus_std, std = self._calculate_error_rate_plus_std()
106 |
107 | self._update_min_values(error_rate_plus_std=error_rate_plus_std, std=std)
108 |
109 | drift_flag = self._check_threshold(
110 | error_rate_plus_std=error_rate_plus_std,
111 | min_error_rate=self.min_error_rate,
112 | min_std=self.min_std,
113 | level=self.config.drift_level, # type: ignore
114 | )
115 | if drift_flag:
116 | # Out-of-Control
117 | self.drift = True
118 | self.warning = False
119 | else:
120 | warning_flag = self._check_threshold(
121 | error_rate_plus_std=error_rate_plus_std,
122 | min_error_rate=self.min_error_rate,
123 | min_std=self.min_std,
124 | level=self.config.warning_level, # type: ignore
125 | )
126 | if warning_flag:
127 | # Warning
128 | self.warning = True
129 | for callback in self.callbacks: # type: ignore
130 | with suppress(AttributeError):
131 | callback.on_warning_detected(**kwargs) # type: ignore
132 | else:
133 | # In-Control
134 | self.warning = False
135 | self.drift = False
136 | else:
137 | self.drift, self.warning = False, False
138 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, caste, color, religion, or sexual
10 | identity and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the overall
26 | community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or advances of
31 | any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email address,
35 | without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement at
63 | cespedes@ifca.unican.es.
64 | All complaints will be reviewed and investigated promptly and fairly.
65 |
66 | All community leaders are obligated to respect the privacy and security of the
67 | reporter of any incident.
68 |
69 | ## Enforcement Guidelines
70 |
71 | Community leaders will follow these Community Impact Guidelines in determining
72 | the consequences for any action they deem in violation of this Code of Conduct:
73 |
74 | ### 1. Correction
75 |
76 | **Community Impact**: Use of inappropriate language or other behavior deemed
77 | unprofessional or unwelcome in the community.
78 |
79 | **Consequence**: A private, written warning from community leaders, providing
80 | clarity around the nature of the violation and an explanation of why the
81 | behavior was inappropriate. A public apology may be requested.
82 |
83 | ### 2. Warning
84 |
85 | **Community Impact**: A violation through a single incident or series of
86 | actions.
87 |
88 | **Consequence**: A warning with consequences for continued behavior. No
89 | interaction with the people involved, including unsolicited interaction with
90 | those enforcing the Code of Conduct, for a specified period of time. This
91 | includes avoiding interactions in community spaces as well as external channels
92 | like social media. Violating these terms may lead to a temporary or permanent
93 | ban.
94 |
95 | ### 3. Temporary Ban
96 |
97 | **Community Impact**: A serious violation of community standards, including
98 | sustained inappropriate behavior.
99 |
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 |
106 | ### 4. Permanent Ban
107 |
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior, harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 |
112 | **Consequence**: A permanent ban from any sort of public interaction within the
113 | community.
114 |
115 | ## Attribution
116 |
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.1, available at
119 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
120 |
121 | Community Impact Guidelines were inspired by
122 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
123 |
124 | For answers to common questions about this code of conduct, see the FAQ at
125 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
126 | [https://www.contributor-covenant.org/translations][translations].
127 |
128 | [homepage]: https://www.contributor-covenant.org
129 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
130 | [Mozilla CoC]: https://github.com/mozilla/diversity
131 | [FAQ]: https://www.contributor-covenant.org/faq
132 | [translations]: https://www.contributor-covenant.org/translations
133 |
--------------------------------------------------------------------------------