├── .binder ├── apt.txt └── requirements.txt ├── .flake8 ├── .github ├── labeler.yml ├── release-drafter.yml ├── semantic.yml └── workflows │ ├── check-docs.yml │ ├── pr-labeler.yml │ ├── publish-doc-to-remote.yml │ ├── release-drafter.yml │ ├── run-test.yml │ └── upload-to-pypi.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs ├── code-reference │ ├── base-reference.md │ ├── costs │ │ ├── costautoregressive-reference.md │ │ ├── costclinear-reference.md │ │ ├── costcosine-reference.md │ │ ├── costl1-reference.md │ │ ├── costl2-reference.md │ │ ├── costlinear-reference.md │ │ ├── costml-reference.md │ │ ├── costnormal-reference.md │ │ ├── costrank-reference.md │ │ └── costrbf-reference.md │ ├── datasets │ │ ├── pw_constant-reference.md │ │ ├── pw_linear-reference.md │ │ ├── pw_normal-reference.md │ │ └── pw_wavy-reference.md │ ├── detection │ │ ├── binseg-reference.md │ │ ├── bottomup-reference.md │ │ ├── dynp-reference.md │ │ ├── kernelcpd-reference.md │ │ ├── pelt-reference.md │ │ └── window-reference.md │ ├── index.md │ ├── metrics │ │ ├── hausdorff.md │ │ ├── precisionrecall.md │ │ └── randindex.md │ └── show │ │ └── display.md ├── contributing.md ├── custom-cost-function.md ├── data │ └── text-segmentation-data.txt ├── examples │ ├── basic-usage.ipynb │ ├── introduction.md │ ├── kernel-cpd-performance-comparison.ipynb │ ├── merging-cost-functions.ipynb │ ├── music-segmentation.ipynb │ └── text-segmentation.ipynb ├── fit-and-predict.md ├── getting-started │ └── basic-usage.ipynb ├── images │ ├── Jupyter_logo.svg │ ├── correlation_shift.png │ ├── example-display.png │ ├── example_readme.png │ ├── hausdorff.png │ ├── precision_recall.png │ ├── randindex.png │ ├── schema_binseg.png │ ├── schema_fenetre.png │ ├── schema_tree.png │ └── sum_of_sines.png ├── index.md ├── install.md ├── javascripts │ ├── configs.js │ └── mathjax.js ├── license.md ├── release-notes.md ├── user-guide │ ├── costs │ │ ├── costautoregressive.md │ │ ├── costclinear.md │ │ ├── costcosine.md │ │ ├── costcustom.md │ │ ├── costl1.md │ │ ├── costl2.md │ │ ├── costlinear.md │ │ ├── costml.md │ │ ├── costnormal.md │ │ ├── costrank.md │ │ └── costrbf.md │ ├── datasets │ │ ├── pw_constant.md │ │ ├── pw_linear.md │ │ ├── pw_normal.md │ │ └── pw_wavy.md │ ├── detection │ │ ├── binseg.md │ │ ├── bottomup.md │ │ ├── dynp.md │ │ ├── kernelcpd.md │ │ ├── pelt.md │ │ └── window.md │ ├── evaluation.md │ ├── index.md │ ├── metrics │ │ ├── hausdorff.md │ │ ├── precisionrecall.md │ │ └── randindex.md │ └── show │ │ └── display.md └── what-is-cpd.md ├── images ├── example_readme.png ├── pw_constant.png ├── pw_constantdp.png ├── pw_linear.png └── pw_linearpelt.png ├── mkdocs.yml ├── mkdocs_macros.py ├── pyproject.toml ├── setup.cfg ├── setup.py ├── src └── ruptures │ ├── __init__.py │ ├── base.py │ ├── costs │ ├── __init__.py │ ├── costautoregressive.py │ ├── costclinear.py │ ├── costcosine.py │ ├── costl1.py │ ├── costl2.py │ ├── costlinear.py │ ├── costml.py │ ├── costnormal.py │ ├── costrank.py │ ├── costrbf.py │ └── factory.py │ ├── datasets │ ├── __init__.py │ ├── pw_constant.py │ ├── pw_linear.py │ ├── pw_normal.py │ └── pw_wavy.py │ ├── detection │ ├── __init__.py │ ├── _detection │ │ ├── __init__.py │ │ ├── ekcpd.pxd │ │ ├── ekcpd.pyx │ │ ├── ekcpd_computation.c │ │ ├── ekcpd_computation.h │ │ ├── ekcpd_pelt_computation.c │ │ ├── ekcpd_pelt_computation.h │ │ ├── kernels.c │ │ └── kernels.h │ ├── binseg.py │ ├── bottomup.py │ ├── dynp.py │ ├── kernelcpd.py │ ├── pelt.py │ └── window.py │ ├── exceptions.py │ ├── metrics │ ├── __init__.py │ ├── hamming.py │ ├── hausdorff.py │ ├── precisionrecall.py │ ├── randindex.py │ ├── sanity_check.py │ └── timeerror.py │ ├── show │ ├── __init__.py │ └── display.py │ └── utils │ ├── __init__.py │ ├── _utils │ ├── __init__.py │ ├── convert_path_matrix.pxd │ ├── convert_path_matrix.pyx │ ├── convert_path_matrix_c.c │ └── convert_path_matrix_c.h │ ├── bnode.py │ ├── drawbkps.py │ └── utils.py └── tests ├── __init__.py ├── test_bnode.py ├── test_costs.py ├── test_datasets.py ├── test_detection.py ├── test_display.py └── test_metrics.py /.binder/apt.txt: -------------------------------------------------------------------------------- 1 | libsndfile1-dev 2 | -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | ruptures 3 | librosa 4 | nltk 5 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 127 3 | max-complexity = 10 4 | select = E9,F63,F7,F82,F401 5 | per-file-ignores = __init__.py:F401 -------------------------------------------------------------------------------- /.github/labeler.yml: -------------------------------------------------------------------------------- 1 | # Config file for the labeler Github Action 2 | # https://hub.docker.com/r/jimschubert/labeler-action 3 | # labeler "full" schema 4 | 5 | # enable labeler on issues, prs, or both. 6 | enable: 7 | issues: true 8 | prs: true 9 | 10 | # comments object allows you to specify a different message for issues and prs 11 | 12 | # comments: 13 | # issues: | 14 | # Thanks for opening this issue! 15 | # I have applied any labels matching special text in your title and description. 16 | 17 | # Please review the labels and make any necessary changes. 18 | # prs: | 19 | # Thanks for the contribution! 20 | # I have applied any labels matching special text in your title and description. 21 | 22 | # Please review the labels and make any necessary changes. 23 | 24 | # Labels is an object where: 25 | # - keys are labels 26 | # - values are objects of { include: [ pattern ], exclude: [ pattern ] } 27 | # - pattern must be a valid regex, and is applied globally to 28 | # title + description of issues and/or prs (see enabled config above) 29 | # - 'include' patterns will associate a label if any of these patterns match 30 | # - 'exclude' patterns will ignore this label if any of these patterns match 31 | labels: 32 | 'Type: Fix': 33 | include: 34 | - '^(bug|fix)(\(.*\))?:(.*)' 35 | exclude: [] 36 | 'Type: Feature': 37 | include: 38 | - '^feat(\(.*\))?:(.*)' 39 | exclude: [] 40 | 'Type: Build': 41 | include: 42 | - '^build(\(.*\))?:(.*)' 43 | exclude: [] 44 | 'Type: Documentation': 45 | include: 46 | - '^docs(\(.*\))?:(.*)' 47 | exclude: [] 48 | 'Type: Refactoring': 49 | include: 50 | - '^(refactor|style)(\(.*\))?:(.*)' 51 | exclude: [] 52 | 'Type: Testing': 53 | include: 54 | - '^test(\(.*\))?:(.*)' 55 | exclude: [] 56 | 'Type: Maintenance': 57 | include: 58 | - '^(chore|mnt)(\(.*\))?:(.*)' 59 | exclude: [] 60 | 'Type: CI': 61 | include: 62 | - '^ci(\(.*\))?:(.*)' 63 | exclude: [] 64 | 'Type: Performance': 65 | include: 66 | - '^perf(\(.*\))?:(.*)' 67 | exclude: [] 68 | 'Type: Revert': 69 | include: 70 | - '^revert(\(.*\))?:(.*)' 71 | exclude: [] 72 | 'skip-changelog': 73 | include: 74 | - '^(chore: pre-commit autoupdate)' 75 | exclude: [] 76 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: 'v$RESOLVED_VERSION 🌈' 2 | tag-template: 'v$RESOLVED_VERSION' 3 | categories: 4 | - title: '🚀 Features' 5 | labels: 6 | - 'Type: Feature' 7 | - 'Type: Performance' 8 | - title: '🐛 Bug Fixes' 9 | labels: 10 | - 'Type: Fix' 11 | - title: '📚 Documentation' 12 | label: 'Type: Documentation' 13 | - title: '🧰 Maintenance' 14 | label: 15 | - 'Type: Maintenance' 16 | - 'Type: Build' 17 | - 'Type: Refactoring' 18 | - 'Type: CI' 19 | change-template: '- $TITLE @$AUTHOR (#$NUMBER)' 20 | change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. 21 | version-resolver: 22 | major: 23 | labels: 24 | - 'major' 25 | minor: 26 | labels: 27 | - 'minor' 28 | patch: 29 | labels: 30 | - 'patch' 31 | default: patch 32 | exclude-labels: 33 | - 'skip-changelog' 34 | template: | 35 | ## Changes 36 | 37 | $CHANGES 38 | -------------------------------------------------------------------------------- /.github/semantic.yml: -------------------------------------------------------------------------------- 1 | # Always validate the PR title, and ignore the commits 2 | titleOnly: true 3 | 4 | # By default types specified in commitizen/conventional-commit-types is used. 5 | # See: https://github.com/commitizen/conventional-commit-types/blob/v3.0.0/index.json 6 | # You can override the valid types 7 | types: 8 | - feat 9 | - fix 10 | - docs 11 | - style 12 | - refactor 13 | - perf 14 | - test 15 | - build 16 | - ci 17 | - chore 18 | - revert 19 | -------------------------------------------------------------------------------- /.github/workflows/check-docs.yml: -------------------------------------------------------------------------------- 1 | name: "docs" 2 | on: 3 | pull_request: 4 | branches: [ master ] 5 | paths-ignore: 6 | - '.pre-commit-config.yaml' 7 | 8 | jobs: 9 | docs: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python 3.8 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.8' 17 | - name: Install sndfile library # for librose, see https://github.com/deepcharles/ruptures/pull/121 18 | run: | 19 | sudo apt-get install libsndfile1-dev 20 | - name: Install ruptures and dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | python -m pip install .[docs] 24 | - name: Run notebooks 25 | run: | 26 | find ./docs -name '*.ipynb' | xargs -P 3 -I % jupyter nbconvert --inplace --to notebook --ExecutePreprocessor.kernel_name=python --execute % 27 | - name: Build documentation 28 | run: | 29 | mkdocs build 30 | - uses: actions/upload-artifact@v4 31 | with: 32 | name: DocumentationHTML 33 | path: site/ 34 | -------------------------------------------------------------------------------- /.github/workflows/pr-labeler.yml: -------------------------------------------------------------------------------- 1 | name: Label PRs and issues 2 | on: 3 | issues: 4 | types: [opened, edited, milestoned] 5 | pull_request_target: 6 | types: [opened] 7 | 8 | jobs: 9 | 10 | labeler: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Check Labels 15 | id: labeler 16 | uses: jimschubert/labeler-action@v2 17 | with: 18 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} -------------------------------------------------------------------------------- /.github/workflows/publish-doc-to-remote.yml: -------------------------------------------------------------------------------- 1 | # Publish docs to website on new release (or manual trigger) 2 | name: "Publish docs online" 3 | 4 | on: 5 | release: 6 | types: [created] 7 | workflow_dispatch: 8 | 9 | jobs: 10 | docs: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.x 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.8' 18 | - name: Install sndfile library # for librose, see https://github.com/deepcharles/ruptures/pull/121 19 | run: | 20 | sudo apt-get install libsndfile1-dev 21 | - name: Install ruptures and dependecies 22 | run: | 23 | python -m pip install --upgrade pip 24 | python -m pip install .[docs] 25 | - name: Run notebooks 26 | run: | 27 | find ./docs -name '*.ipynb' | xargs -P 3 -I % jupyter nbconvert --inplace --to notebook --ExecutePreprocessor.kernel_name=python --execute % 28 | - name: Build documentation 29 | run: | 30 | mkdocs build 31 | - name: Publish documentation to remote repo 32 | uses: selenehyun/gh-push@master 33 | env: 34 | GITHUB_TOKEN: ${{ secrets.DOC_PUSHER_ACCESS_TOKEN }} 35 | COMMIT_FILES: site/* 36 | REPO_FULLNAME: centre-borelli/ruptures-docs 37 | BRANCH: master 38 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v5 15 | # with: 16 | # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml 17 | # config-name: my-config.yml 18 | env: 19 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 20 | -------------------------------------------------------------------------------- /.github/workflows/run-test.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: build 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | paths-ignore: 12 | - '.pre-commit-config.yaml' 13 | - 'docs/**' 14 | - 'images/**' 15 | - '**.md' 16 | - 'mkdocs.yml' 17 | 18 | jobs: 19 | tests: 20 | strategy: 21 | matrix: 22 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 23 | os: [ubuntu-latest, windows-latest, macos-latest] 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | - name: Install ruptures 32 | run: | 33 | python -m pip install --upgrade pip 34 | python -m pip install .[test] 35 | - name: Test with pytest 36 | run: | 37 | python -m pytest --no-cov 38 | 39 | coverage: 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@v2 43 | - name: Set up Python 3.10 44 | uses: actions/setup-python@v2 45 | with: 46 | python-version: '3.10' 47 | - name: Install ruptures 48 | run: | 49 | python -m pip install --upgrade pip 50 | python -m pip install .[test,display] 51 | - name: Test with pytest 52 | run: | 53 | python -m pytest --cov --cov-report=xml --cov-report=term:skip-covered 54 | - name: Upload coverage to Codecov 55 | uses: codecov/codecov-action@v5 56 | # env: 57 | # CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 58 | with: 59 | files: ./coverage.xml 60 | flags: unittests 61 | fail_ci_if_error: true 62 | - uses: actions/upload-artifact@v4 63 | with: 64 | path: coverage.xml 65 | -------------------------------------------------------------------------------- /.github/workflows/upload-to-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Upload package to Pypi 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | overrideVersion: 7 | description: Manually force a version 8 | 9 | env: 10 | CIBW_BUILD_VERBOSITY: 3 11 | SETUPTOOLS_SCM_PRETEND_VERSION: ${{ github.event.inputs.overrideVersion }} 12 | # Run the package tests using `pytest` 13 | CIBW_TEST_REQUIRES: pytest 14 | CIBW_TEST_COMMAND: pytest {project}/tests 15 | 16 | jobs: 17 | make_sdist: 18 | name: Make SDist 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Setup Python 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: "3.10" 26 | - name: Install deps 27 | run: python -m pip install build twine 28 | - name: Build SDist 29 | run: python -m build --sdist 30 | - uses: actions/upload-artifact@v2 31 | with: 32 | path: dist/*.tar.gz 33 | - name: Check metadata 34 | run: twine check dist/* 35 | 36 | build_wheels: 37 | name: Build wheels on ${{ matrix.os }} 38 | runs-on: ${{ matrix.os }} 39 | strategy: 40 | matrix: 41 | os: [ubuntu-latest, windows-latest, macos-latest] 42 | steps: 43 | - uses: actions/checkout@v2 44 | 45 | # Used to host cibuildwheel 46 | - uses: actions/setup-python@v2 47 | 48 | - name: Install cibuildwheel 49 | run: python -m pip install cibuildwheel 50 | 51 | - name: Build wheels 52 | run: python -m cibuildwheel --output-dir wheelhouse 53 | env: 54 | # Disable explicitly building PyPI wheels for specific configurations 55 | CIBW_SKIP: pp* cp{38,39,310,311,312}-manylinux_i686 *-musllinux_* cp{38,39,310,311,312}-win32 56 | CIBW_PRERELEASE_PYTHONS: False 57 | # Manually force a version (and avoid building local wheels) 58 | CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.overrideVersion }}" 59 | CIBW_ARCHS_MACOS: x86_64 arm64 60 | 61 | - uses: actions/upload-artifact@v2 62 | with: 63 | path: wheelhouse/*.whl 64 | 65 | build_aarch64_wheels: 66 | name: Build wheels manylinux_aarch64 67 | runs-on: ubuntu-latest 68 | strategy: 69 | matrix: 70 | python: [36, 37, 38, 39, 310, 311, 312] 71 | include: 72 | - os: ubuntu-latest 73 | arch: aarch64 74 | platform_id: manylinux_aarch64 75 | steps: 76 | - uses: actions/checkout@v2 77 | 78 | - name: Set up QEMU 79 | uses: docker/setup-qemu-action@v1 80 | 81 | - name: Install cibuildwheel 82 | run: python -m pip install cibuildwheel 83 | 84 | - name: Build wheels 85 | run: python -m cibuildwheel --output-dir wheelhouse 86 | env: 87 | CIBW_ARCHS_LINUX: ${{matrix.arch}} 88 | CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} 89 | # Manually force a version (and avoid building local wheels) 90 | CIBW_ENVIRONMENT: "SETUPTOOLS_SCM_PRETEND_VERSION=${{ github.event.inputs.overrideVersion }}" 91 | - uses: actions/upload-artifact@v2 92 | with: 93 | path: wheelhouse/*.whl 94 | 95 | upload_all: 96 | needs: [build_wheels, build_aarch64_wheels, make_sdist] 97 | runs-on: ubuntu-latest 98 | steps: 99 | - uses: actions/download-artifact@v2 100 | with: 101 | name: artifact 102 | path: dist 103 | - uses: pypa/gh-action-pypi-publish@release/v1 104 | with: 105 | user: ${{ secrets.PYPI_USERNAME }} 106 | password: ${{ secrets.PYPI_PASSWORD }} 107 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | # Custom 62 | .coverage_conflict-20151112-152034 63 | .ipynb_checkpoints/ 64 | Untitled.ipynb 65 | mem 66 | .vscode/* 67 | Demonstration.ipynb 68 | docs.zip 69 | make.bat 70 | ruptures.sublime-project 71 | ruptures.sublime-workspace 72 | test.ipynb 73 | ruptures/detection/.circleci/run-build-locally.sh 74 | .circleci/run-build-locally.sh 75 | docs/build/* 76 | 77 | # OS generated files 78 | .DS_Store 79 | ./**/.DS_Store 80 | .DS_Store? 81 | .Spotlight-V100 82 | .Trashes 83 | ehthumbs.db 84 | Thumbs.db 85 | site/ 86 | src/ruptures/version.py 87 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/psf/black 3 | rev: 25.1.0 4 | hooks: 5 | - id: black 6 | language_version: python3 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v5.0.0 9 | hooks: 10 | - id: trailing-whitespace # This hook trims trailing whitespace. 11 | - id: check-docstring-first # Checks a common error of defining a docstring after code. 12 | - id: check-merge-conflict # Check for files that contain merge conflict strings. 13 | - id: check-yaml # This hook checks yaml files for parseable syntax. 14 | - id: detect-private-key # Detects the presence of private keys. 15 | - id: check-symlinks 16 | - id: check-toml 17 | - repo: https://github.com/pre-commit/pygrep-hooks 18 | rev: v1.10.0 19 | hooks: 20 | - id: python-no-eval # A quick check for the eval() built-in function. 21 | - repo: https://github.com/PyCQA/docformatter 22 | rev: eb1df347edd128b30cd3368dddc3aa65edcfac38 # Don't autoupdate until https://github.com/PyCQA/docformatter/issues/293 is fixed 23 | hooks: 24 | - id: docformatter 25 | exclude: mkdocs_macros.py 26 | args: [--in-place] 27 | - repo: https://github.com/PyCQA/flake8 28 | rev: 7.2.0 29 | hooks: 30 | - id: flake8 31 | # additional_dependencies: [flake8-docstrings, flake8-bugbear, flake8-spellcheck, flake8-import-order] 32 | - repo: https://github.com/kynan/nbstripout 33 | rev: 0.8.1 34 | hooks: 35 | - id: nbstripout 36 | - repo: https://github.com/asottile/blacken-docs 37 | rev: 1.19.1 38 | hooks: 39 | - id: blacken-docs 40 | - repo: https://github.com/nbQA-dev/nbQA 41 | rev: 1.9.1 42 | hooks: 43 | - id: nbqa-black 44 | args: [--nbqa-mutate] 45 | 46 | ci: 47 | autoupdate_commit_msg: 'chore: pre-commit autoupdate' 48 | autoupdate_schedule: 'quarterly' 49 | 50 | 51 | # https://github.com/PyCQA/docformatter/pull/287 -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | The latest release notes are available directly in Github: [ruptures/releases](https://github.com/deepcharles/ruptures/releases). 4 | 5 | Earlier releases are documented below ⬇️. 6 | 7 | ## [1.1.2] - 2020-12-01 8 | 9 | ### Added 10 | 11 | - 12cbc9e feat: add piecewise linear cpd (#91) 12 | - a12b215 test: add code coverage badge (#97) 13 | - 2e9b17f docs: add binder for notebooks (#94) 14 | - da7544f docs(costcosine): add entry for CostCosine in docs (#93) 15 | - 8c9aa35 build(setup.py/cfg): add build_ext to setup.py (#88) 16 | - 10ef8e8 build(python39): add py39 to supported versions (#87) 17 | 18 | ### Changed 19 | 20 | - 069bd41 fix(kernelcpd): bug fix in pelt (#95) 21 | - b4abc34 fix: memory leak in KernelCPD (#89) 22 | 23 | ## [1.1.1] - 2020-11-26 24 | 25 | No change to the code compared to the previous version. 26 | The package was only partly published to Pypi because of the failure of one provider in the CI. 27 | Since Pypi's policy prevents re-uploading twice the same version, we have to increment the version number. 28 | 29 | ## [1.1.0] - 2020-11-23 30 | 31 | ### Added 32 | 33 | - modify publishing process to Pypi PR#83 34 | - add cosine kernel (cost function and in KernelCPD)PR#74 35 | - add faster kernel change point detection (`KernelCPD`, C implementation) PR#74 36 | - add manual trigger to publish to Pypi PR#72 37 | 38 | ### Changed 39 | 40 | ## [1.0.6] - 2020-10-23 41 | ### Added 42 | 43 | - Correct minor error in Dynp (about min_size) PR#74 44 | - Fix legacy formatting errors PR#69 45 | - New documentation (from Sphinx to Mkdocs) PR#64 46 | - Separate requirements.txt and requirements-dev.txt PR#64 47 | - A changelog file ([link](https://github.com/deepcharles/ruptures/blob/master/CHANGELOG.md)) 48 | - New Github actions for automatic generation of documentation 49 | - Pre-commit code formatting using [black](https://github.com/psf/black) 50 | 51 | ### Changed 52 | 53 | - Correction of display function test #64 54 | - Add badges in the README (Github repo) PR#62: pypi version, python version, code style, contributor list 55 | - Typo in documentation ([PR#60](https://github.com/deepcharles/ruptures/pull/60)) by @gjaeger 56 | - Documentation theme 57 | - Documentation site 58 | 59 | ## [1.0.5] - 2020-07-22 60 | ### Changed 61 | - Link to documentation in PyPi description 62 | 63 | [Unreleased]: https://github.com/deepcharles/ruptures/compare/v1.1.2...HEAD 64 | [1.1.2]: https://github.com/deepcharles/ruptures/compare/v1.1.1...v1.1.2 65 | [1.1.1]: https://github.com/deepcharles/ruptures/compare/v1.1.0...v1.1.1 66 | [1.1.0]: https://github.com/deepcharles/ruptures/compare/v1.0.6...v1.1.0 67 | [1.0.6]: https://github.com/deepcharles/ruptures/compare/v1.0.5...v1.0.6 68 | [1.0.5]: https://github.com/deepcharles/ruptures/compare/v1.0.4...v1.0.5 69 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | 4 | ## Before contributing 5 | 6 | In all following steps, it is highly recommended to use a virtual environment. 7 | Build and installation are performed using `pip` so be sure to have the latest version available. 8 | 9 | ``` 10 | python -m pip install --upgrade pip 11 | ``` 12 | 13 | ### Install the development version 14 | 15 | It is important that you contribute to the latest version of the code. 16 | To that end, start by cloning the Github repository. 17 | 18 | ``` 19 | git clone https://github.com/deepcharles/ruptures 20 | cd ruptures 21 | ``` 22 | 23 | Then install the downloaded package with `pip`. 24 | 25 | ``` 26 | python -m pip install --editable .[dev] 27 | ``` 28 | 29 | Note that `python -m` can be omitted most of the times, but within virtualenvs, it can prevent certain errors. 30 | Also, in certain terminals (such as `zsh`), the square brackets must be escaped, e.g. replace `.[dev]` by `.\[dev\]`. 31 | 32 | In addition to `numpy`, `scipy` and `ruptures`, this command will install all packages needed to develop `ruptures`. 33 | The exact list of librairies can be found in the [`setup.cfg` file](https://github.com/deepcharles/ruptures/blob/master/setup.cfg) (section `[options.extras_require]`). 34 | 35 | ### Pre-commit hooks 36 | 37 | We use `pre-commit` to run Git hooks before submitting the code to review. 38 | These hook scripts perform simple tasks before each commit (code formatting mostly). 39 | To activate the hooks, simply run the following command in your terminal. 40 | 41 | ``` 42 | pre-commit install 43 | ``` 44 | 45 | If you try to commit a non-compliant (i.e. badly formatted) file, `pre-commit` will modify this file and make the commit fail. 46 | However you need to stage the new changes **yourself** as `pre-commit` will not do that for you (this is by design; see [here](https://github.com/pre-commit/pre-commit/issues/806) or [here](https://github.com/pre-commit/pre-commit/issues/747)). 47 | Fortunately, `pre-commit` outputs useful messages. 48 | 49 | The list of hooks (and their options) can be found in [`.pre-commit-config.yaml`](https://github.com/deepcharles/ruptures/blob/master/.pre-commit-config.yaml). 50 | For more information, see [their website](https://pre-commit.com/). 51 | If you want to manually run all pre-commit hooks on a repository, run `pre-commit run --all-files`. To run individual hooks use `pre-commit run `. 52 | 53 | ## Contribute to the code 54 | 55 | ### Write tests 56 | 57 | The following command executes the test suite. 58 | 59 | ``` 60 | python -m pytest 61 | ``` 62 | 63 | ### Write docstrings 64 | 65 | ## Contribute to the documentation 66 | 67 | Use [MkDocs](https://www.mkdocs.org/). 68 | 69 | Use `mkdocs serve` to preview your changes. 70 | Once you are satisfied, no need to build the documentation, the CI will take care of that and publish it online at the next release of the package (if the pull request has been merged). 71 | 72 | ### Add examples to the gallery 73 | 74 | An easy way to showcase your work with `ruptures` is to write a narrative example. 75 | To that, simply put a [Jupyter notebook](https://jupyter.org/) in the `docs/examples` folder. 76 | To make it appear in the documentation, add a reference in `mkdocs.yml` (`nav > Gallery of examples`): if the notebook's name is `my_notebook.ipynb`, it will be available as `examples/my_notebook.ipynb`. 77 | It will be rendered automatically when [MkDocs](https://www.mkdocs.org/) builds the documentation. 78 | 79 | !!! note 80 | To automatically add a [Binder](https://mybinder.org/v2/gh/deepcharles/ruptures/master) link and a download link to your notebook, simply add the following line of code. 81 | ```markdown 82 | {{ '' }} 83 | ``` 84 | Ideally, place this code below the title of the notebook (same cell) and it will be rendered as in [here](examples/kernel-cpd-performance-comparison.ipynb). 85 | 86 | We welcome any interesting work about a new cost function, algorithm, data, calibration method, etc. 87 | Any other package can be used in combination with `ruptures`. 88 | However, each example should be clearly explained with text and figures. 89 | The amount of raw code should also remain limited for readability. 90 | 91 | 92 | ## Miscellaneous 93 | 94 | ### Naming convention 95 | 96 | We try to follow (roughly) a consistent naming convention of modules, classes, functions, etc. 97 | When in doubt, you can refer to the [PEP 8 style guide for Python code](https://www.python.org/dev/peps/pep-0008/#naming-conventions). 98 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017-2021, ENS Paris-Saclay, CNRS 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune * 2 | graft src 3 | graft tests 4 | 5 | include LICENSE README.md pyproject.toml setup.py setup.cfg 6 | exclude CHANGELOG.md CONTRIBUTING.md mkdocs.yml mkdocs_macros.py 7 | global-exclude __pycache__ *.py[cod] .* 8 | -------------------------------------------------------------------------------- /docs/code-reference/base-reference.md: -------------------------------------------------------------------------------- 1 | # Base classes (ruptures.base) 2 | 3 | ::: ruptures.base -------------------------------------------------------------------------------- /docs/code-reference/costs/costautoregressive-reference.md: -------------------------------------------------------------------------------- 1 | # Autoregressive model change (CostAutoregressive) 2 | 3 | ::: ruptures.costs.costautoregressive 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/costs/costclinear-reference.md: -------------------------------------------------------------------------------- 1 | # Continuous linear change (CostCLinear) 2 | 3 | ::: ruptures.costs.costclinear.CostCLinear 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/costs/costcosine-reference.md: -------------------------------------------------------------------------------- 1 | # Kernelized mean change (CostCosine) 2 | 3 | ::: ruptures.costs.costcosine.CostCosine 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/costs/costl1-reference.md: -------------------------------------------------------------------------------- 1 | # CostL1 (least absolute deviation) 2 | 3 | ::: ruptures.costs.costl1.CostL1 4 | rendering: 5 | show_root_heading: true 6 | 7 | -------------------------------------------------------------------------------- /docs/code-reference/costs/costl2-reference.md: -------------------------------------------------------------------------------- 1 | # CostL2 (least squared deviation) 2 | 3 | ::: ruptures.costs.costl2.CostL2 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/costs/costlinear-reference.md: -------------------------------------------------------------------------------- 1 | # Linear model change (CostLinear) 2 | 3 | ::: ruptures.costs.costlinear.CostLinear 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/costs/costml-reference.md: -------------------------------------------------------------------------------- 1 | # Mahalanobis-type change (CostMl) 2 | 3 | ::: ruptures.costs.costml.CostMl 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/costs/costnormal-reference.md: -------------------------------------------------------------------------------- 1 | # Gaussian process change (CostNormal) 2 | 3 | ::: ruptures.costs.costnormal.CostNormal 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/costs/costrank-reference.md: -------------------------------------------------------------------------------- 1 | # Rank-based change (CostRank) 2 | 3 | ::: ruptures.costs.costrank.CostRank 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/costs/costrbf-reference.md: -------------------------------------------------------------------------------- 1 | # Kernelized mean change (CostRbf) 2 | 3 | ::: ruptures.costs.costrbf.CostRbf 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/datasets/pw_constant-reference.md: -------------------------------------------------------------------------------- 1 | # Piecewise constant (pw_constant) 2 | 3 | ::: ruptures.datasets.pw_constant.pw_constant 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/datasets/pw_linear-reference.md: -------------------------------------------------------------------------------- 1 | # Piecewise linear (pw_linear) 2 | 3 | ::: ruptures.datasets.pw_linear.pw_linear 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/datasets/pw_normal-reference.md: -------------------------------------------------------------------------------- 1 | # Piecewise Gaussian (pw_normal) 2 | 3 | ::: ruptures.datasets.pw_normal.pw_normal 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/datasets/pw_wavy-reference.md: -------------------------------------------------------------------------------- 1 | # Piecewise wavy (pw_wavy) 2 | 3 | ::: ruptures.datasets.pw_wavy.pw_wavy 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/detection/binseg-reference.md: -------------------------------------------------------------------------------- 1 | # Binary segmentation 2 | 3 | ::: ruptures.detection.binseg.Binseg 4 | rendering: 5 | show_root_heading: true 6 | 7 | -------------------------------------------------------------------------------- /docs/code-reference/detection/bottomup-reference.md: -------------------------------------------------------------------------------- 1 | # Bottom-up segmentation 2 | 3 | ::: ruptures.detection.bottomup.BottomUp 4 | rendering: 5 | show_root_heading: true 6 | 7 | -------------------------------------------------------------------------------- /docs/code-reference/detection/dynp-reference.md: -------------------------------------------------------------------------------- 1 | # Dynamic programming 2 | 3 | ::: ruptures.detection.dynp.Dynp 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/detection/kernelcpd-reference.md: -------------------------------------------------------------------------------- 1 | # Efficient kernel change point detection 2 | 3 | ::: ruptures.detection.kernelcpd.KernelCPD 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/detection/pelt-reference.md: -------------------------------------------------------------------------------- 1 | # Pelt 2 | 3 | ::: ruptures.detection.pelt.Pelt 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/detection/window-reference.md: -------------------------------------------------------------------------------- 1 | # Window-based change point detection 2 | 3 | ::: ruptures.detection.window.Window 4 | rendering: 5 | show_root_heading: true 6 | -------------------------------------------------------------------------------- /docs/code-reference/index.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | This section describes the API of all functions and classes in the `ruptures` package. 4 | For a more intuitive description of each method, please refer to the [User guide](../user-guide/index.md). 5 | 6 | Roughly, each module corresponds to a certain type of procedure: 7 | 8 | - `ruptures.base`: base classes; 9 | - `ruptures.detection`: search methods; 10 | - `ruptures.costs`: costs functions; 11 | - `ruptures.datasets`: data set generating utilities; 12 | - `ruptures.metrics`: evaluation metrics; 13 | - `ruptures.show`: display functions. 14 | -------------------------------------------------------------------------------- /docs/code-reference/metrics/hausdorff.md: -------------------------------------------------------------------------------- 1 | # Hausdorff metric (`hausdorff`) 2 | 3 | ::: ruptures.metrics.hausdorff.hausdorff 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/metrics/precisionrecall.md: -------------------------------------------------------------------------------- 1 | # Precision and recall (`precision_recall`) 2 | 3 | ::: ruptures.metrics.precisionrecall.precision_recall 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/metrics/randindex.md: -------------------------------------------------------------------------------- 1 | # Rand index (`randindex`) 2 | 3 | ::: ruptures.metrics.randindex.randindex 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/code-reference/show/display.md: -------------------------------------------------------------------------------- 1 | # Display (`display`) 2 | 3 | ::: ruptures.show.display.display 4 | rendering: 5 | show_root_heading: true -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | ../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/custom-cost-function.md: -------------------------------------------------------------------------------- 1 | # Creating a custom cost function 2 | 3 | In order to define custom cost functions, simply create a class that inherits from 4 | `ruptures.base.BaseCost` and implement the methods `.fit(signal)` and `.error(start, end)`: 5 | 6 | - The method `.fit(signal)` takes a signal as input and sets parameters. It returns `'self'`. 7 | - The method `.error(start, end)` takes two indexes `'start'` and `'end'` and returns the cost on the segment start:end. 8 | 9 | !!! example 10 | See this [custom cost example](user-guide/costs/costcustom.md). 11 | 12 | -------------------------------------------------------------------------------- /docs/examples/basic-usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic usage\n", 8 | "\n", 9 | "\n", 10 | "\n", 11 | "Let us start with a simple example to illustrate the use of `ruptures`: generate a 3-dimensional piecewise constant signal with noise and estimate the change points.\n", 12 | "\n", 13 | "## Setup\n", 14 | "First, we make the necessary imports." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import matplotlib.pyplot as plt # for display purposes\n", 24 | "\n", 25 | "import ruptures as rpt # our package" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Generate and display the signal\n", 33 | "\n", 34 | "Let us generate a 3-dimensional piecewise constant signal with Gaussian noise." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "n_samples, n_dims, sigma = 1000, 3, 2\n", 44 | "n_bkps = 4 # number of breakpoints\n", 45 | "signal, bkps = rpt.pw_constant(n_samples, n_dims, n_bkps, noise_std=sigma)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "The true change points of this synthetic signal are available in the `bkps` variable." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "print(bkps)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Note that the first four element are change point indexes while the last is simply the number of samples.\n", 69 | "(This is a technical convention so that functions in `ruptures` always know the length of the signal at hand.)\n", 70 | "\n", 71 | "It is also possible to plot our \\(\\mathbb{R}^3\\)-valued signal along with the true change points with the `rpt.display` function.\n", 72 | "In the following image, the color changes whenever the mean of the signal shifts." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "fig, ax_array = rpt.display(signal, bkps)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Change point detection\n", 89 | "We can now perform change point detection, meaning that we find the indexes where the signal mean changes.\n", 90 | "To that end, we minimize the sum of squared errors when approximating the signal by a piecewise constant signal.\n", 91 | "Formally, for a signal \\( y_0 , y_1 , \\dots , y_{T-1} \\) (\\( T \\) samples), we solve the following optimization problem, over all possible change positions \\( t_1 < t_2 < \\dots < t_K \\)\n", 92 | "where the number \\( K \\) of changes is defined by the user:\n", 93 | "\n", 94 | "\\[\n", 95 | " \\hat{t}_1, \\hat{t}_2,\\dots,\\hat{t}_K = \\arg\\min_{t_1,\\dots,t_K} V(t_1,t_2,\\dots,t_K)\n", 96 | "\\]\n", 97 | "\n", 98 | "with\n", 99 | "\n", 100 | "\\[\n", 101 | " V(t_1,t_2,\\dots,t_K) := \\sum_{k=0}^K\\sum_{t=t_k}^{t_{k+1}-1} \\|y_t-\\bar{y}_{t_k..t_{k+1}}\\|^2\n", 102 | "\\]\n", 103 | "\n", 104 | "\n", 105 | "where \\( \\bar{y}_{t_k..t_{k+1}} \\) is the empirical mean of the sub-signal \\( y_{t_k}, y_{t_k+1},\\dots,y_{t_{k+1}-1} \\).\n", 106 | "(By convention \\( t_0=0 \\) and \\( t_{K+1}=T \\).)\n", 107 | "\n", 108 | "This optimization is solved with dynamic programming, using the [`Dynp`](../user-guide/detection/dynp.md) class. (More information in the section [What is change point detection?](/what-is-cpd) and the [User guide](/user-guide).)\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# detection\n", 118 | "algo = rpt.Dynp(model=\"l2\").fit(signal)\n", 119 | "result = algo.predict(n_bkps=4)\n", 120 | "\n", 121 | "print(result)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Again the first elements are change point indexes and the last is the number of samples." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Display the results" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "To visualy compare the true segmentation (`bkps`) and the estimated one (`result`), we can resort to `rpt.display` a second time.\n", 143 | "In the following image, the alternating colors indicate the true breakpoints and the dashed vertical lines, the estimated breakpoints." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# display\n", 153 | "rpt.display(signal, bkps, result)\n", 154 | "plt.show()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "In this simple example, both are quite similar and almost undistinguishable." 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 3", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.9.0" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 4 186 | } 187 | -------------------------------------------------------------------------------- /docs/examples/introduction.md: -------------------------------------------------------------------------------- 1 | # Gallery of examples 2 | 3 | These examples illustrate the main features of the `ruptures` package. 4 | Simple examples are direct applications of the library's functions on simulated data. 5 | Advanced examples deal with more complex tasks, such as calibration and real-world data. -------------------------------------------------------------------------------- /docs/fit-and-predict.md: -------------------------------------------------------------------------------- 1 | # Fitting and prediction: estimator basics 2 | 3 | `ruptures` has an object-oriented modelling approach (largely inspired by [scikit-learn](https://scikit-learn.org/stable/getting_started.html)): change point detection algorithms are broken down into two conceptual objects that inherits from base classes: `BaseEstimator` and 4 | `BaseCost`. 5 | 6 | 7 | ## Initializing a new estimator 8 | 9 | Each change point detection algorithm inherits from the base class `ruptures.base.BaseEstimator`. 10 | When a class that inherits from the base estimator is created, the `.__init__()` method initializes 11 | an estimator with the following arguments: 12 | 13 | * `model`: "l1", "l2", "normal", "rbf", "linear", etc. Cost function to use to compute the approximation error. 14 | * `cost`: a custom cost function to the detection algorithm. Should be a `BaseCost` instance. 15 | * `jump`: reduce the set of possible change point indexes; predicted change points can only be a multiple of `jump`. 16 | * `min_size`: minimum number of samples between two change points. 17 | 18 | ## Making a prediction 19 | 20 | The main methods are `.fit()`, `.predict()`, `.fit_predict()`: 21 | 22 | - `.fit()`: generally takes a signal as input and fit the algorithm to the data. 23 | - `.predict()`: performs the change point detection. This method returns a list of indexes corresponding to the end of each regimes. By design, the last element of this list is the number of samples. 24 | - ``.fit_predict()``: helper method which calls ``.fit()`` and ``.predict()`` successively. -------------------------------------------------------------------------------- /docs/getting-started/basic-usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Basic usage\n", 8 | "\n", 9 | "\n", 10 | "\n", 11 | "Let us start with a simple example to illustrate the use of `ruptures`: generate a 3-dimensional piecewise constant signal with noise and estimate the change points.\n", 12 | "\n", 13 | "## Setup\n", 14 | "First, we make the necessary imports." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import matplotlib.pyplot as plt # for display purposes\n", 24 | "\n", 25 | "import ruptures as rpt # our package" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Generate and display the signal\n", 33 | "\n", 34 | "Let us generate a 3-dimensional piecewise constant signal with Gaussian noise." 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "n_samples, n_dims, sigma = 1000, 3, 2\n", 44 | "n_bkps = 4 # number of breakpoints\n", 45 | "signal, bkps = rpt.pw_constant(n_samples, n_dims, n_bkps, noise_std=sigma)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "The true change points of this synthetic signal are available in the `bkps` variable." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "print(bkps)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Note that the first four element are change point indexes while the last is simply the number of samples.\n", 69 | "(This is a technical convention so that functions in `ruptures` always know the length of the signal at hand.)\n", 70 | "\n", 71 | "It is also possible to plot our \\(\\mathbb{R}^3\\)-valued signal along with the true change points with the `rpt.display` function.\n", 72 | "In the following image, the color changes whenever the mean of the signal shifts." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "fig, ax_array = rpt.display(signal, bkps)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Change point detection\n", 89 | "We can now perform change point detection, meaning that we find the indexes where the signal mean changes.\n", 90 | "To that end, we minimize the sum of squared errors when approximating the signal by a piecewise constant signal.\n", 91 | "Formally, for a signal \\( y_0 , y_1 , \\dots , y_{T-1} \\) (\\( T \\) samples), we solve the following optimization problem, over all possible change positions \\( t_1 < t_2 < \\dots < t_K \\)\n", 92 | "where the number \\( K \\) of changes is defined by the user:\n", 93 | "\n", 94 | "\\[\n", 95 | " \\hat{t}_1, \\hat{t}_2,\\dots,\\hat{t}_K = \\arg\\min_{t_1,\\dots,t_K} V(t_1,t_2,\\dots,t_K)\n", 96 | "\\]\n", 97 | "\n", 98 | "with\n", 99 | "\n", 100 | "\\[\n", 101 | " V(t_1,t_2,\\dots,t_K) := \\sum_{k=0}^K\\sum_{t=t_k}^{t_{k+1}-1} \\|y_t-\\bar{y}_{t_k..t_{k+1}}\\|^2\n", 102 | "\\]\n", 103 | "\n", 104 | "\n", 105 | "where \\( \\bar{y}_{t_k..t_{k+1}} \\) is the empirical mean of the sub-signal \\( y_{t_k}, y_{t_k+1},\\dots,y_{t_{k+1}-1} \\).\n", 106 | "(By convention \\( t_0=0 \\) and \\( t_{K+1}=T \\).)\n", 107 | "\n", 108 | "This optimization is solved with dynamic programming, using the [`Dynp`](../user-guide/detection/dynp.md) class. (More information in the section [What is change point detection?](/what-is-cpd) and the [User guide](/user-guide).)\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# detection\n", 118 | "algo = rpt.Dynp(model=\"l2\").fit(signal)\n", 119 | "result = algo.predict(n_bkps=4)\n", 120 | "\n", 121 | "print(result)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Again the first elements are change point indexes and the last is the number of samples." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "## Display the results" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "To visualy compare the true segmentation (`bkps`) and the estimated one (`result`), we can resort to `rpt.display` a second time.\n", 143 | "In the following image, the alternating colors indicate the true breakpoints and the dashed vertical lines, the estimated breakpoints." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# display\n", 153 | "rpt.display(signal, bkps, result)\n", 154 | "plt.show()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "In this simple example, both are quite similar and almost undistinguishable." 162 | ] 163 | } 164 | ], 165 | "metadata": { 166 | "kernelspec": { 167 | "display_name": "Python 3", 168 | "language": "python", 169 | "name": "python3" 170 | }, 171 | "language_info": { 172 | "codemirror_mode": { 173 | "name": "ipython", 174 | "version": 3 175 | }, 176 | "file_extension": ".py", 177 | "mimetype": "text/x-python", 178 | "name": "python", 179 | "nbconvert_exporter": "python", 180 | "pygments_lexer": "ipython3", 181 | "version": "3.9.0" 182 | } 183 | }, 184 | "nbformat": 4, 185 | "nbformat_minor": 4 186 | } 187 | -------------------------------------------------------------------------------- /docs/images/correlation_shift.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/correlation_shift.png -------------------------------------------------------------------------------- /docs/images/example-display.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/example-display.png -------------------------------------------------------------------------------- /docs/images/example_readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/example_readme.png -------------------------------------------------------------------------------- /docs/images/hausdorff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/hausdorff.png -------------------------------------------------------------------------------- /docs/images/precision_recall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/precision_recall.png -------------------------------------------------------------------------------- /docs/images/randindex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/randindex.png -------------------------------------------------------------------------------- /docs/images/schema_binseg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_binseg.png -------------------------------------------------------------------------------- /docs/images/schema_fenetre.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_fenetre.png -------------------------------------------------------------------------------- /docs/images/schema_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/schema_tree.png -------------------------------------------------------------------------------- /docs/images/sum_of_sines.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/docs/images/sum_of_sines.png -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | This library requires Python >=3.6 and the following packages: `numpy`, `scipy` and `matplotlib` (the last one is optional and only for display purposes). 4 | You can either install the latest stable release or the development version. 5 | 6 | ## Stable release 7 | 8 | To install the latest stable release, use `pip` or `conda`. 9 | 10 | === "With pip" 11 | ``` 12 | python -m pip install ruptures 13 | ``` 14 | 15 | === "With conda" 16 | `ruptures` can be installed from the `conda-forge` channel (run `conda config --add channels conda-forge` to add it): 17 | ``` 18 | conda install ruptures 19 | ``` 20 | 21 | ## Development release 22 | 23 | Alternatively, you can install the development version of `ruptures` which can contain features that have not yet been integrated to the stable release. 24 | To that end, refer to the [contributing guide](contributing.md). 25 | 26 | ## Upgrade 27 | 28 | Show the current version of the package. 29 | 30 | ``` 31 | python -m pip show ruptures 32 | ``` 33 | 34 | In order to upgrade to the version, use the following command. 35 | 36 | ``` 37 | python -m pip install -U ruptures 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /docs/javascripts/configs.js: -------------------------------------------------------------------------------- 1 | window.MathJax = { 2 | tex: { 3 | inlineMath: [ 4 | ["\\(", "\\)"] 5 | ], 6 | displayMath: [ 7 | ["\\[", "\\]"] 8 | ], 9 | processEscapes: true, 10 | processEnvironments: true 11 | }, 12 | options: { 13 | ignoreHtmlClass: ".*|", 14 | processHtmlClass: "arithmatex" 15 | } 16 | }; -------------------------------------------------------------------------------- /docs/javascripts/mathjax.js: -------------------------------------------------------------------------------- 1 | window.MathJax = { 2 | tex: { 3 | inlineMath: [["\\(", "\\)"]], 4 | displayMath: [["\\[", "\\]"]], 5 | processEscapes: true, 6 | processEnvironments: true 7 | }, 8 | options: { 9 | ignoreHtmlClass: ".*|", 10 | processHtmlClass: "arithmatex" 11 | } 12 | }; 13 | 14 | document$.subscribe(() => { 15 | MathJax.startup.output.clearCache() 16 | MathJax.typesetClear() 17 | MathJax.texReset() 18 | MathJax.typesetPromise() 19 | }) -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | This project is under BSD license. 4 | 5 | ``` 6 | --8<-- "LICENSE" 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/release-notes.md: -------------------------------------------------------------------------------- 1 | --8<-- "CHANGELOG.md" 2 | -------------------------------------------------------------------------------- /docs/user-guide/costs/costautoregressive.md: -------------------------------------------------------------------------------- 1 | # Autoregressive model change (`CostAR`) 2 | 3 | ## Description 4 | 5 | Let $01$ is the segment number, $z_t=[y_{t-1}, y_{t-2},\dots,y_{t-p}]$ is the lag vector,and $p>0$ is the order of the process. 13 | 14 | The least-squares estimates of the break dates is obtained by minimizing the sum of squared 15 | residuals [[Bai2000]](#Bai2000). 16 | Formally, the associated cost function on an interval $I$ is 17 | 18 | $$ 19 | c(y_{I}) = \min_{\delta\in\mathbb{R}^p} \sum_{t\in I} \|y_t - \delta' z_t \|_2^2. 20 | $$ 21 | 22 | Currently, this function is limited to 1D signals. 23 | 24 | ## Usage 25 | 26 | Start with the usual imports and create a signal with piecewise linear trends. 27 | 28 | ```python 29 | from itertools import cycle 30 | import numpy as np 31 | import matplotlib.pylab as plt 32 | import ruptures as rpt 33 | 34 | # creation of data 35 | n = 2000 36 | n_bkps, sigma = 4, 0.5 # number of change points, noise standart deviation 37 | bkps = [400, 1000, 1300, 1800, n] 38 | f1 = np.array([0.075, 0.1]) 39 | f2 = np.array([0.1, 0.125]) 40 | freqs = np.zeros((n, 2)) 41 | for sub, val in zip(np.split(freqs, bkps[:-1]), cycle([f1, f2])): 42 | sub += val 43 | tt = np.arange(n) 44 | signal = np.sum((np.sin(2 * np.pi * tt * f) for f in freqs.T)) 45 | signal += np.random.normal(scale=sigma, size=signal.shape) 46 | # display signal 47 | rpt.show.display(signal, bkps, figsize=(10, 6)) 48 | plt.show() 49 | ``` 50 | 51 | Then create a [CostAR][ruptures.costs.costautoregressive.CostAR] instance and print the cost of the sub-signal 52 | `signal[50:150]`. 53 | The autoregressive order can be specified through the keyword ``'order'``. 54 | 55 | ```python 56 | c = rpt.costs.CostAR(order=10).fit(signal) 57 | print(c.error(50, 150)) 58 | ``` 59 | 60 | You can also compute the sum of costs for a given list of change points. 61 | 62 | ```python 63 | print(c.sum_of_costs(bkps)) 64 | print(c.sum_of_costs([10, 100, 200, 250, n])) 65 | ``` 66 | 67 | In order to use this cost class in a change point detection algorithm (inheriting from 68 | [BaseEstimator][ruptures.base.BaseEstimator]), either pass a [CostAR][ruptures.costs.costautoregressive.CostAR] instance (through the argument 69 | ``'custom_cost'``) or set `model="ar"`. 70 | Additional parameters can be passed to the cost instance through the keyword ``'params'``. 71 | 72 | ```python 73 | c = rpt.costs.CostAR(order=10) 74 | algo = rpt.Dynp(custom_cost=c) 75 | # is equivalent to 76 | algo = rpt.Dynp(model="ar", params={"order": 10}) 77 | ``` 78 | 79 | ## Reference 80 | 81 | [Bai2000] 82 | Bai, J. (2000). Vector autoregressive models with structural changes in regression coefficients and in variance–covariance matrices. Annals of Economics and Finance, 1(2), 301–336. -------------------------------------------------------------------------------- /docs/user-guide/costs/costclinear.md: -------------------------------------------------------------------------------- 1 | # Continuous linear change (`CostCLinear`) 2 | 3 | ## Description 4 | 5 | For a given set of indexes (also called knots) $t_k$ ($k=1,\dots,K$), a linear spline $f$ is such that: 6 | 7 | 1. $f$ is affine on each interval $t_k..t_{k+1}$, i.e. $f(t)=\alpha_k (t-t_k) + \beta_k$ ($\alpha_k, \beta_k \in \mathbb{R}^d$) for all $t=t_k,t_k+1,\dots,t_{k+1}-1$; 8 | 2. $f$ is continuous. 9 | 10 | The cost function [`CostCLinear`][ruptures.costs.costclinear.CostCLinear] measures the error when approximating the signal with a linear spline. 11 | Formally, it is defined for $0[Hearst1994] 64 | Hearst, M. A. (1994). Multi-paragraph segmentation of expository text. In Proceedings of the Annual Meeting of the Association for Computational Linguistics (pp. 9–16). Las Cruces, New Mexico, USA. 65 | 66 | [Cooper2002] 67 | Cooper, M., & Foote, J. (2002). Automatic music summarization via similarity analysis. In Proceedings of the International Conference on Music Information Retrieval (ISMIR) (pp. 81–85). Paris, France. 68 | 69 | [Arlot2019] 70 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56. 71 | -------------------------------------------------------------------------------- /docs/user-guide/costs/costcustom.md: -------------------------------------------------------------------------------- 1 | # Custom cost class 2 | 3 | Users who are interested in detecting a specific type of change can easily do so by creating a custom cost function. 4 | Provided, they subclass the base cost function [`BaseCost`][ruptures.base.BaseCost], they will be able to seamlessly run the algorithms implemented in `ruptures`. 5 | 6 | !!! important 7 | The custom cost class must at least implement the two following methods: `.fit(signal)` and `.error(start, end)` (see [user guide](../../custom-cost-function.md)). 8 | 9 | ## Example 10 | 11 | Let $\{y_t\}_t$ denote a 1D piecewise stationary random process. 12 | Assume that the $y_t$ are independent and exponentially distributed with a scale parameter that shifts at some unknown instants $t_1,t_2,\dots$ 13 | The change points estimates are the minimizers of the negative log-likelihood, and the associated cost function is given by 14 | 15 | $$ 16 | c(y_I) = |I| \log \bar{\mu}_I 17 | $$ 18 | 19 | where $I,\, y_I$ and $\bar{\mu}_I$ are respectively an interval, the sub-signal on this interval and the empirical mean of this sub-signal. 20 | The following code implements this cost function: 21 | 22 | ```python 23 | from math import log 24 | from ruptures.base import BaseCost 25 | 26 | 27 | class MyCost(BaseCost): 28 | """Custom cost for exponential signals.""" 29 | 30 | # The 2 following attributes must be specified for compatibility. 31 | model = "" 32 | min_size = 2 33 | 34 | def fit(self, signal): 35 | """Set the internal parameter.""" 36 | self.signal = signal 37 | return self 38 | 39 | def error(self, start, end): 40 | """Return the approximation cost on the segment [start:end]. 41 | 42 | Args: 43 | start (int): start of the segment 44 | end (int): end of the segment 45 | 46 | Returns: 47 | float: segment cost 48 | """ 49 | sub = self.signal[start:end] 50 | return (end - start) * log(sub.mean()) 51 | ``` 52 | 53 | !!! warning 54 | For compatibility reasons, the static attributes `model` and `min_size` must be explicitly specified: 55 | 56 | - `model` is simply a string containing the name of the cost function (can be empty); 57 | - `min_size` is a positive integer that indicates the minimum segment size (in number of samples) on which the cost function can be applied. 58 | 59 | This cost function can now be used with all algorithms from `ruptures`. 60 | For instance, 61 | 62 | ```python 63 | import numpy as np 64 | import matplotlib.pylab as plt 65 | import ruptures as rpt 66 | 67 | # creation of data 68 | a = np.random.exponential(scale=1, size=100) 69 | b = np.random.exponential(scale=2, size=200) 70 | signal, bkps = np.r_[a, b, a], [100, 300, 400] 71 | # cost 72 | algo = rpt.Pelt(custom_cost=MyCost()).fit(signal) 73 | my_bkps = algo.predict(pen=10) 74 | # display 75 | rpt.display(signal, bkps, my_bkps) 76 | plt.show() 77 | ``` 78 | -------------------------------------------------------------------------------- /docs/user-guide/costs/costl1.md: -------------------------------------------------------------------------------- 1 | # Least absolute deviation (`CostL1`) 2 | 3 | ## Description 4 | 5 | This cost function detects changes in the median of a signal. 6 | Overall, it is a robust estimator of a shift in the central point (mean, median, mode) of a distribution [[Bai1995]](#Bai1995). 7 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, 8 | 9 | $$ 10 | c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y}\|_1 11 | $$ 12 | 13 | where $\bar{y}$ is the componentwise median of $\{y_t\}_{t\in I}$. 14 | 15 | ## Usage 16 | 17 | Start with the usual imports and create a signal. 18 | 19 | ```python 20 | import numpy as np 21 | import matplotlib.pylab as plt 22 | import ruptures as rpt 23 | 24 | # creation of data 25 | n, dim = 500, 3 # number of samples, dimension 26 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 27 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 28 | ``` 29 | 30 | Then create a [`CostL1`][ruptures.costs.costl1.CostL1] instance and print the cost of the sub-signal `signal[50:150]`. 31 | 32 | ```python 33 | c = rpt.costs.CostL1().fit(signal) 34 | print(c.error(50, 150)) 35 | ``` 36 | 37 | You can also compute the sum of costs for a given list of change points. 38 | 39 | ```python 40 | print(c.sum_of_costs(bkps)) 41 | print(c.sum_of_costs([10, 100, 200, 250, n])) 42 | ``` 43 | 44 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator], either pass a [`CostL1`][ruptures.costs.costl1.CostL1] instance (through the argument `custom_cost`) or set `model="l1"`. 45 | 46 | ```python 47 | c = rpt.costs.CostL1() 48 | algo = rpt.Dynp(custom_cost=c) 49 | # is equivalent to 50 | algo = rpt.Dynp(model="l1") 51 | ``` 52 | 53 | ## References 54 | 55 | [Bai1995] 56 | Bai, J. (1995). Least absolute deviation of a shift. Econometric Theory, 11(3), 403–436. -------------------------------------------------------------------------------- /docs/user-guide/costs/costl2.md: -------------------------------------------------------------------------------- 1 | # Least squared deviation (`CostL2`) 2 | 3 | ## Description 4 | 5 | This cost function detects mean-shifts in a signal. 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, 7 | 8 | $$ 9 | c(y_{I}) = \sum_{t\in I} \|y_t - \bar{y}\|_2^2 10 | $$ 11 | 12 | where $\bar{y}$ is the mean of $\{y_t\}_{t\in I}$. 13 | 14 | ## Usage 15 | 16 | Start with the usual imports and create a signal. 17 | 18 | ```python 19 | import numpy as np 20 | import matplotlib.pylab as plt 21 | import ruptures as rpt 22 | 23 | # creation of data 24 | n, dim = 500, 3 # number of samples, dimension 25 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 27 | ``` 28 | 29 | Then create a [`CostL2`][ruptures.costs.costl2.CostL2] instance and print the cost of the sub-signal `signal[50:150]`. 30 | 31 | ```python 32 | c = rpt.costs.CostL2().fit(signal) 33 | print(c.error(50, 150)) 34 | ``` 35 | 36 | You can also compute the sum of costs for a given list of change points. 37 | 38 | ```python 39 | print(c.sum_of_costs(bkps)) 40 | print(c.sum_of_costs([10, 100, 200, 250, n])) 41 | ``` 42 | 43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostL2`][ruptures.costs.costl2.CostL2] instance (through the argument `custom_cost`) or set `model="l2"`. 44 | 45 | ```python 46 | c = rpt.costs.CostL2() 47 | algo = rpt.Dynp(custom_cost=c) 48 | # is equivalent to 49 | algo = rpt.Dynp(model="l2") 50 | ``` -------------------------------------------------------------------------------- /docs/user-guide/costs/costlinear.md: -------------------------------------------------------------------------------- 1 | # Linear model change (`CostLinear`) 2 | 3 | ## Description 4 | 5 | Let $0 < t_1 < t_2 < \dots < n$ be unknown change points indexes. 6 | Consider the following multiple linear regression model 7 | 8 | $$ 9 | y_t = x_t' \delta_j + \varepsilon_t, \quad \forall t=t_j,\dots,t_{j+1}-1 10 | $$ 11 | 12 | for $j>1$. 13 | Here, the observed dependant variable is $y_t\in\mathbb{R}$, the covariate vector is $x_t \in\mathbb{R}^p$, the disturbance is $\varepsilon_t\in\mathbb{R}$. 14 | The vectors $\delta_j\in\mathbb{R}^p$ are the parameter vectors (or regression coefficients). 15 | 16 | The least-squares estimates of the break dates is obtained by minimizing the sum of squared residuals [[Bai2003]](#Bai2003). 17 | Formally, the associated cost function on an interval $I$ is 18 | 19 | $$ 20 | c(y_{I}) = \min_{\delta\in\mathbb{R}^p} \sum_{t\in I} \|y_t - \delta' x_t \|_2^2. 21 | $$ 22 | 23 | ## Usage 24 | 25 | Start with the usual imports and create a signal with piecewise linear trends. 26 | 27 | ```python 28 | import numpy as np 29 | import matplotlib.pylab as plt 30 | import ruptures as rpt 31 | 32 | # creation of data 33 | n, n_reg = 2000, 3 # number of samples, number of regressors (including intercept) 34 | n_bkps = 3 # number of change points 35 | # regressors 36 | tt = np.linspace(0, 10 * np.pi, n) 37 | X = np.vstack((np.sin(tt), np.sin(5 * tt), np.ones(n))).T 38 | # parameter vectors 39 | deltas, bkps = rpt.pw_constant(n, n_reg, n_bkps, noise_std=None, delta=(1, 3)) 40 | # observed signal 41 | y = np.sum(X * deltas, axis=1) 42 | y += np.random.normal(size=y.shape) 43 | # display signal 44 | rpt.show.display(y, bkps, figsize=(10, 6)) 45 | plt.show() 46 | ``` 47 | 48 | Then create a [`CostLinear`][ruptures.costs.costlinear.CostLinear] instance and print the cost of the sub-signal `signal[50:150]`. 49 | 50 | ```python 51 | # stack observed signal and regressors. 52 | # first dimension is the observed signal. 53 | signal = np.column_stack((y.reshape(-1, 1), X)) 54 | c = rpt.costs.CostLinear().fit(signal) 55 | print(c.error(50, 150)) 56 | ``` 57 | 58 | You can also compute the sum of costs for a given list of change points. 59 | 60 | ```python 61 | print(c.sum_of_costs(bkps)) 62 | print(c.sum_of_costs([10, 100, 200, 250, n])) 63 | ``` 64 | 65 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostLinear`][ruptures.costs.costlinear.CostLinear] instance (through the argument `custom_cost`) or set `model="linear"`. 66 | 67 | ```python 68 | c = rpt.costs.CostLinear() 69 | algo = rpt.Dynp(custom_cost=c) 70 | # is equivalent to 71 | algo = rpt.Dynp(model="linear") 72 | ``` 73 | 74 | ## References 75 | 76 | [Bai2003] 77 | J. Bai and P. Perron. Critical values for multiple structural change tests. Econometrics Journal, 6(1):72–78, 2003. 78 | -------------------------------------------------------------------------------- /docs/user-guide/costs/costml.md: -------------------------------------------------------------------------------- 1 | # Change detection with a Mahalanobis-type metric (`CostMl`) 2 | 3 | ## Description 4 | 5 | Given a positive semi-definite matrix $M\in\mathbb{R}^{d\times d}$, 6 | this cost function detects changes in the mean of the embedded signal defined by the pseudo-metric 7 | 8 | $$ 9 | \| x - y \|_M^2 = (x-y)^t M (x-y). 10 | $$ 11 | 12 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, the cost function is equal to 13 | 14 | $$ 15 | c(y_{I}) = \sum_{t\in I} \| y_t - \bar{\mu} \|_{M}^2 16 | $$ 17 | 18 | where $\bar{\mu}$ is the empirical mean of the sub-signal $\{y_t\}_{t\in I}$. 19 | The matrix $M$ can for instance be the result of a similarity learning algorithm [[Xing2003](#Xing2003), [Truong2019](#Truong2019)] or the inverse of the empirical covariance matrix (yielding the Mahalanobis distance). 20 | 21 | ## Usage 22 | 23 | Start with the usual imports and create a signal. 24 | 25 | ```python 26 | import numpy as np 27 | import matplotlib.pylab as plt 28 | import ruptures as rpt 29 | 30 | # creation of data 31 | n, dim = 500, 3 # number of samples, dimension 32 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 33 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 34 | ``` 35 | 36 | Then create a [`CostMl`][ruptures.costs.costml.CostMl] instance and print the cost of the sub-signal `signal[50:150]`. 37 | 38 | ```python 39 | M = np.eye(dim) 40 | c = rpt.costs.CostMl(metric=M).fit(signal) 41 | print(c.error(50, 150)) 42 | ``` 43 | 44 | You can also compute the sum of costs for a given list of change points. 45 | 46 | ```python 47 | print(c.sum_of_costs(bkps)) 48 | print(c.sum_of_costs([10, 100, 200, 250, n])) 49 | ``` 50 | 51 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostMl`][ruptures.costs.costml.CostMl] instance (through the argument `custom_cost`) or set `model="mahalanobis"`. 52 | 53 | ```python 54 | c = rpt.costs.CostMl(metric=M) 55 | algo = rpt.Dynp(custom_cost=c) 56 | # is equivalent to 57 | algo = rpt.Dynp(model="mahalanobis", params={"metric": M}) 58 | ``` 59 | 60 | ## References 61 | 62 | [Xing2003] 63 | Xing, E. P., Jordan, M. I., & Russell, S. J. (2003). Distance metric learning, with application to clustering with side-Information. Advances in Neural Information Processing Systems (NIPS), 521–528. 64 | 65 | [Truong2019] 66 | Truong, C., Oudre, L., & Vayatis, N. (2019). Supervised kernel change point detection with partial annotations. Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 1–5. -------------------------------------------------------------------------------- /docs/user-guide/costs/costnormal.md: -------------------------------------------------------------------------------- 1 | # Gaussian process change (`CostNormal`) 2 | 3 | ## Description 4 | 5 | This cost function detects changes in the mean and covariance matrix of a sequence of multivariate Gaussian random variables. 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, 7 | $$ 8 | c(y_{I}) = |I| \log\det(\widehat{\Sigma}_I + \epsilon\text{Id}) 9 | $$ 10 | where $\widehat{\Sigma}_I$ is the empirical covariance matrix of the sub-signal $\{y_t\}_{t\in I}$ and $\epsilon>0$ is a small constant added to cope with badly conditioned covariance matrices (new in version 1.1.5, see [Issue 196](https://github.com/deepcharles/ruptures/issues/196)). 11 | It is robust to strongly dependant processes; for more information, see [[Lavielle1999]](#Lavielle1999) (univariate case) and [[Lavielle2006]](#Lavielle2006) (multivariate case). 12 | 13 | 14 | ## Usage 15 | 16 | Start with the usual imports and create a signal. 17 | 18 | ```python 19 | import numpy as np 20 | import matplotlib.pylab as plt 21 | import ruptures as rpt 22 | 23 | # creation of data 24 | n, dim = 500, 3 # number of samples, dimension 25 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 27 | ``` 28 | 29 | Then create a [`CostNormal`][ruptures.costs.costnormal.CostNormal] instance and print the cost of the sub-signal `signal[50:150]`. 30 | 31 | ```python 32 | c = rpt.costs.CostNormal().fit(signal) 33 | print(c.error(50, 150)) 34 | ``` 35 | 36 | You can also compute the sum of costs for a given list of change points. 37 | 38 | ```python 39 | print(c.sum_of_costs(bkps)) 40 | print(c.sum_of_costs([10, 100, 200, 250, n])) 41 | ``` 42 | 43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostNormal`][ruptures.costs.costnormal.CostNormal] instance (through the argument `custom_cost`) or set `model="normal"`. 44 | 45 | ```python 46 | c = rpt.costs.CostNormal() 47 | algo = rpt.Dynp(custom_cost=c) 48 | # is equivalent to 49 | algo = rpt.Dynp(model="normal") 50 | ``` 51 | 52 | To set the small diagonal bias to 0 (default behaviour in versions 1.1.4 and before), simply do the following (change `Dynp` by the search method you need). 53 | ```python 54 | c = rpt.costs.CostNormal(add_small_diag=False) 55 | algo = rpt.Dynp(custom_cost=c) 56 | # or, equivalently, 57 | algo = rpt.Dynp(model="normal", params={"add_small_diag": False}) 58 | ``` 59 | 60 | ## References 61 | 62 | [Lavielle1999] 63 | Lavielle, M. (1999). Detection of multiples changes in a sequence of dependant variables. Stochastic Processes and Their Applications, 83(1), 79–102. 64 | 65 | [Lavielle2006] 66 | Lavielle, M., & Teyssière, G. (2006). Detection of multiple change-points in multivariate time series. Lithuanian Mathematical Journal, 46(3). -------------------------------------------------------------------------------- /docs/user-guide/costs/costrank.md: -------------------------------------------------------------------------------- 1 | # Rank-based cost function (`CostRank`) 2 | 3 | ## Description 4 | 5 | This cost function detects general distribution changes in multivariate signals, using a rank transformation [[Lung-Yut-Fong2015]](#Lung-Yut-Fong2015). 6 | Formally, for a signal $\{y_t\}_t$ on an interval $[a, b)$, 7 | 8 | $$ 9 | c_{rank}(a, b) = -(b - a) \bar{r}_{a..b}' \hat{\Sigma}_r^{-1} \bar{r}_{a..b} 10 | $$ 11 | 12 | where $\bar{r}_{a..b}$ is the empirical mean of the sub-signal $\{r_t\}_{t=a+1}^b$, and $\hat{\Sigma}_r$ is the covariance matrix of the complete rank signal $r$. 13 | 14 | ## Usage 15 | 16 | Start with the usual imports and create a signal. 17 | 18 | ```python 19 | import numpy as np 20 | import matplotlib.pylab as plt 21 | import ruptures as rpt 22 | 23 | # creation of data 24 | n, dim = 500, 3 # number of samples, dimension 25 | n_bkps, sigma = 3, 5 # number of change points, noise standard deviation 26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 27 | ``` 28 | 29 | Then create a [`CostRank`][ruptures.costs.costrank.CostRank] instance and print the cost of the sub-signal `signal[50:150]`. 30 | 31 | ```python 32 | c = rpt.costs.CostRank().fit(signal) 33 | print(c.error(50, 150)) 34 | ``` 35 | 36 | You can also compute the sum of costs for a given list of change points. 37 | 38 | ```python 39 | print(c.sum_of_costs(bkps)) 40 | print(c.sum_of_costs([10, 100, 200, 250, n])) 41 | ``` 42 | 43 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostRank`][ruptures.costs.costrank.CostRank] instance (through the argument `custom_cost`) or set `model="rank"`. 44 | 45 | ```python 46 | c = rpt.costs.CostRank() 47 | algo = rpt.Dynp(custom_cost=c) 48 | # is equivalent to 49 | algo = rpt.Dynp(model="rank") 50 | ``` 51 | 52 | ## References 53 | 54 | [Lung-Yut-Fong2015] 55 | Lung-Yut-Fong, A., Lévy-Leduc, C., & Cappé, O. (2015). Homogeneity and change-point detection tests for multivariate data using rank statistics. Journal de La Société Française de Statistique, 156(4), 133–162. -------------------------------------------------------------------------------- /docs/user-guide/costs/costrbf.md: -------------------------------------------------------------------------------- 1 | # Kernelized mean change (`CostRbf`) 2 | 3 | ## Description 4 | 5 | Given a positive semi-definite kernel $k(\cdot, \cdot) : \mathbb{R}^d\times \mathbb{R}^d \mapsto \mathbb{R}$ and its associated feature map $\Phi:\mathbb{R}^d \mapsto \mathcal{H}$ (where $\mathcal{H}$ is an appropriate Hilbert space), this cost function detects changes in the mean of the embedded signal $\{\Phi(y_t)\}_t$ [[Garreau2018](#Garreau2018), [Arlot2019](#Arlot2019)]. 6 | Formally, for a signal $\{y_t\}_t$ on an interval $I$, 7 | 8 | $$ 9 | c(y_{I}) = \sum_{t\in I} \| \Phi(y_t) - \bar{\mu} \|_{\mathcal{H}}^2 10 | $$ 11 | 12 | where $\bar{\mu}$ is the empirical mean of the embedded sub-signal $\{\Phi(y_t)\}_{t\in I}$. 13 | Here the kernel is the radial basis function (rbf): 14 | 15 | $$ 16 | k(x, y) = \exp(-\gamma \| x - y \|^2 ) 17 | $$ 18 | 19 | where $\| \cdot \|$ is the Euclidean norm and $\gamma>0$ is the so-called bandwidth parameter and is determined according to median heuristics (i.e. equal to the inverse of median of all pairwise distances). 20 | 21 | In a nutshell, this cost function is able to detect changes in the distribution of an iid sequence of random variables. 22 | Because it is non-parametric, it is performs reasonably well on a wide range of tasks. 23 | 24 | ## Usage 25 | 26 | Start with the usual imports and create a signal. 27 | 28 | ```python 29 | import numpy as np 30 | import matplotlib.pylab as plt 31 | import ruptures as rpt 32 | 33 | # creation of data 34 | n, dim = 500, 3 # number of samples, dimension 35 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 36 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 37 | ``` 38 | 39 | Then create a [`CostRbf`][ruptures.costs.costrbf.CostRbf] instance and print the cost of the sub-signal `signal[50:150]`. 40 | 41 | ```python 42 | c = rpt.costs.CostRbf().fit(signal) 43 | print(c.error(50, 150)) 44 | ``` 45 | 46 | You can also compute the sum of costs for a given list of change points. 47 | 48 | ```python 49 | print(c.sum_of_costs(bkps)) 50 | print(c.sum_of_costs([10, 100, 200, 250, n])) 51 | ``` 52 | 53 | In order to use this cost class in a change point detection algorithm (inheriting from [`BaseEstimator`][ruptures.base.BaseEstimator]), either pass a [`CostRbf`][ruptures.costs.costrbf.CostRbf] instance (through the argument `custom_cost`) or set `model="rbf"`. 54 | 55 | ```python 56 | c = rpt.costs.CostRbf() 57 | algo = rpt.Dynp(custom_cost=c) 58 | # is equivalent to 59 | algo = rpt.Dynp(model="rbf") 60 | ``` 61 | 62 | ## References 63 | 64 | [Garreau2018] 65 | Garreau, D., & Arlot, S. (2018). Consistent change-point detection with kernels. Electronic Journal of Statistics, 12(2), 4440–4486. 66 | 67 | [Arlot2019] 68 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56. 69 | -------------------------------------------------------------------------------- /docs/user-guide/datasets/pw_constant.md: -------------------------------------------------------------------------------- 1 | # Piecewise constant (`pw_constant`) 2 | 3 | ## Description 4 | 5 | For a given number of samples $T$, number $K$ of change points and noise variance $\sigma^2$, the function [`pw_constant`][ruptures.datasets.pw_constant.pw_constant] generates change point dexes $0 < t_1 < \dots < t_K < T$ and a piecewise constant signal $\{y_t\}_t$ with additive Gaussian noise. 6 | 7 | ## Usage 8 | 9 | Start with the usual imports and create a signal. 10 | 11 | ```python 12 | import numpy as np 13 | import matplotlib.pylab as plt 14 | import ruptures as rpt 15 | 16 | # creation of data 17 | n, dim = 500, 3 # number of samples, dimension 18 | n_bkps, sigma = 3, 5 # number of change points, noise standard deviation 19 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 20 | rpt.display(signal, bkps) 21 | ``` 22 | 23 | The mean shift amplitude is uniformly drawn from an interval that can be changed through the keyword `delta`. 24 | 25 | ```python 26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma, delta=(1, 10)) 27 | ``` 28 | -------------------------------------------------------------------------------- /docs/user-guide/datasets/pw_linear.md: -------------------------------------------------------------------------------- 1 | # Piecewise linear (`pw_linear`) 2 | 3 | ## Description 4 | 5 | This function [`pw_linear`][ruptures.datasets.pw_linear.pw_linear] simulates a piecewise linear model (see [Cost linear](../costs/costlinear.md)). 6 | The covariates are standard Gaussian random variables. 7 | The response variable is a (piecewise) linear combination of the covariates. 8 | 9 | ## Usage 10 | 11 | Start with the usual imports and create a signal. 12 | 13 | ```python 14 | import numpy as np 15 | import matplotlib.pylab as plt 16 | import ruptures as rpt 17 | 18 | # creation of data 19 | n, dim = 500, 3 # number of samples, dimension of the covariates 20 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 21 | signal, bkps = rpt.pw_linear(n, dim, n_bkps, noise_std=sigma) 22 | rpt.display(signal, bkps) 23 | ``` -------------------------------------------------------------------------------- /docs/user-guide/datasets/pw_normal.md: -------------------------------------------------------------------------------- 1 | # Piecewise 2D Gaussian process (`pw_normal`) 2 | 3 | ## Description 4 | 5 | The function [`pw_normal`][ruptures.datasets.pw_normal.pw_normal] simulates a 2D signal of Gaussian i.i.d. random variables with zero mean and covariance matrix alternating between $[[1, 0.9], [0.9, 1]]$ and $[[1, -0.9], [-0.9, 1]]$ at every change point. 6 | 7 | ![](../../images/correlation_shift.png) 8 |
Top and middle: 2D signal example. Bottom: Scatter plot for each regime type
9 | 10 | ## Usage 11 | 12 | Start with the usual imports and create a signal. 13 | 14 | ```python 15 | import numpy as np 16 | import matplotlib.pylab as plt 17 | import ruptures as rpt 18 | 19 | # creation of data 20 | n = 500 # number of samples 21 | n_bkps = 3 # number of change points 22 | signal, bkps = rpt.pw_normal(n, n_bkps) 23 | rpt.display(signal, bkps) 24 | ``` -------------------------------------------------------------------------------- /docs/user-guide/datasets/pw_wavy.md: -------------------------------------------------------------------------------- 1 | # Piecewise sinusoidal signal (`pw_wavy`) 2 | 3 | ## Description 4 | 5 | The function [`pw_wavy`][ruptures.datasets.pw_wavy.pw_wavy] simulates a sum-of-sine signal $y_t=\sin(2\pi f_1 t)+\sin(2\pi f_2 t)$ where $t=0,\dots,T-1$. 6 | The frequency vector $[f_1, f_2]$ alternates between $[0.075, 0.1]$ and $[0.1, 0.125]$ at each change point index. 7 | Gaussian white noise can be added to the signal. 8 | 9 | ![](../../images/sum_of_sines.png) 10 |
Top: signal example. Bottom: associated spectrogram.
11 | 12 | ## Usage 13 | 14 | Start with the usual imports and create a signal. 15 | 16 | ```python 17 | import numpy as np 18 | import matplotlib.pylab as plt 19 | import ruptures as rpt 20 | 21 | # creation of data 22 | n, dim = 500, 3 # number of samples, dimension 23 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 24 | signal, bkps = rpt.pw_wavy(n, n_bkps, noise_std=sigma) 25 | rpt.display(signal, bkps) 26 | ``` -------------------------------------------------------------------------------- /docs/user-guide/detection/binseg.md: -------------------------------------------------------------------------------- 1 | # Binary segmentation (`Binseg`) 2 | 3 | ## Description 4 | 5 | Binary change point detection is used to perform fast signal segmentation and is implemented in [`Binseg`][ruptures.detection.binseg.Binseg]. 6 | It is a sequential approach: first, one change point is detected in the complete input signal, then series is split around this change point, then the operation is repeated on the two resulting sub-signals. 7 | For a theoretical and algorithmic analysis of [`Binseg`][ruptures.detection.binseg.Binseg], see for instance [[Bai1997]](#Bai1997) and [[Fryzlewicz2014]](#Fryzlewicz2014). 8 | The benefits of binary segmentation includes low complexity (of the order of $\mathcal{O}(Cn\log n)$, where $n$ is the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal), the fact that it can extend any single change point detection method to detect multiple changes points and that it can work whether the number of regimes is known beforehand or not. 9 | 10 | ![](../../images/schema_binseg.png) 11 |
Schematic view of the binary segmentation algorithm
12 | 13 | 14 | ## Usage 15 | 16 | Start with the usual imports and create a signal. 17 | 18 | ```python 19 | import numpy as np 20 | import matplotlib.pylab as plt 21 | import ruptures as rpt 22 | 23 | # creation of data 24 | n = 500 # number of samples 25 | n_bkps, sigma = 3, 5 # number of change points, noise standard deviation 26 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 27 | ``` 28 | 29 | To perform a binary segmentation of a signal, initialize a [`BinSeg`][ruptures.detection.binseg.Binseg] instance. 30 | 31 | ```python 32 | # change point detection 33 | model = "l2" # "l1", "rbf", "linear", "normal", "ar",... 34 | algo = rpt.Binseg(model=model).fit(signal) 35 | my_bkps = algo.predict(n_bkps=3) 36 | 37 | # show results 38 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6)) 39 | plt.show() 40 | ``` 41 | In the situation in which the number of change points is unknown, one can specify a penalty using 42 | the `pen` parameter or a threshold on the residual norm using `epsilon`. 43 | 44 | ```python 45 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2) 46 | # or 47 | my_bkps = algo.predict(epsilon=3 * n * sigma**2) 48 | ``` 49 | 50 | For faster predictions, one can modify the `jump` parameter during initialization. 51 | The higher it is, the faster the prediction is achieved (at the expense of precision). 52 | 53 | ```python 54 | algo = rpt.Binseg(model=model, jump=10).fit(signal) 55 | ``` 56 | 57 | ## References 58 | 59 | [Bai1997] 60 | Bai, J. (1997). Estimating multiple breaks one at a time. Econometric Theory, 13(3), 315–352. 61 | 62 | [Fryzlewicz2014] 63 | Fryzlewicz, P. (2014). Wild binary segmentation for multiple change-point detection. The Annals of Statistics, 42(6), 2243–2281. -------------------------------------------------------------------------------- /docs/user-guide/detection/bottomup.md: -------------------------------------------------------------------------------- 1 | # Bottom-up segmentation (`BottomUp`) 2 | 3 | ## Description 4 | 5 | Bottom-up change point detection is used to perform fast signal segmentation and is implemented in 6 | [`BottomUp`][ruptures.detection.bottomup.BottomUp] in a sequential manner. 7 | Contrary to binary segmentation, which is a greedy procedure, bottom-up segmentation is generous: 8 | it starts with many change points and successively deletes the less significant ones. 9 | First, the signal is divided in many sub-signals along a regular grid. 10 | Then contiguous segments are successively merged according to a measure of how similar they are. 11 | See for instance [[Keogh2001]](#Keogh2001) or [[Fryzlewicz2007]](#Fryzlewicz2007) for an algorithmic 12 | analysis of [`BottomUp`][ruptures.detection.bottomup.BottomUp]. 13 | The benefits of bottom-up segmentation includes low complexity (of the order of 14 | $\mathcal{O}(n\log n)$, where $n$ is the number of samples), the fact that it can extend 15 | any single change point detection method to detect multiple changes points and that it can work 16 | whether the number of regimes is known beforehand or not. 17 | 18 | ![](../../images/schema_tree.png) 19 |
Schematic view of the bottom-up segmentation algorithm
20 | 21 | ## Usage 22 | 23 | Start with the usual imports and create a signal. 24 | 25 | ```python 26 | import numpy as np 27 | import matplotlib.pylab as plt 28 | import ruptures as rpt 29 | 30 | # creation of data 31 | n, dim = 500, 3 # number of samples, dimension 32 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 33 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 34 | ``` 35 | 36 | To perform a bottom-up segmentation of a signal, initialize a [`BottomUp`][ruptures.detection.bottomup.BottomUp] 37 | instance. 38 | 39 | ```python 40 | # change point detection 41 | model = "l2" # "l1", "rbf", "linear", "normal", "ar" 42 | algo = rpt.BottomUp(model=model).fit(signal) 43 | my_bkps = algo.predict(n_bkps=3) 44 | 45 | # show results 46 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6)) 47 | plt.show() 48 | ``` 49 | 50 | In the situation in which the number of change points is unknown, one can specify a penalty using 51 | the `pen` parameter or a threshold on the residual norm using `epsilon`. 52 | 53 | ```python 54 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2) 55 | # or 56 | my_bkps = algo.predict(epsilon=3 * n * sigma**2) 57 | ``` 58 | 59 | For faster predictions, one can modify the `jump` parameter during initialization. 60 | The higher it is, the faster the prediction is achieved (at the expense of precision). 61 | 62 | ```python 63 | algo = rpt.BottomUp(model=model, jump=10).fit(signal) 64 | ``` 65 | 66 | ## References 67 | 68 | 69 | [Keogh2001] 70 | Keogh, E., Chu, S., Hart, D., & Pazzani, M. (2001). An online algorithm for segmenting time series. Proceedings of the IEEE International Conference on Data Mining (ICDM), 289–296. 71 | 72 | [Fryzlewicz2007] 73 | Fryzlewicz, P. (2007). Unbalanced Haar technique for nonparametric function estimation. Journal of the American Statistical Association, 102(480), 1318–1327. -------------------------------------------------------------------------------- /docs/user-guide/detection/dynp.md: -------------------------------------------------------------------------------- 1 | # Dynamic programming (`Dynp`) 2 | 3 | ## Description 4 | 5 | The method is implemented in both [`Dynp`][ruptures.detection.dynp.Dynp], which is a full native python implementation for which the user can choose any cost functions defined in `ruptures.costs` 6 | 7 | It finds the (exact) minimum of the sum of costs by computing the cost of all subsequences of a given signal. 8 | It is called "dynamic programming" because the search over all possible segmentations is ordered using a dynamic programming approach. 9 | 10 | In order to work, **the user must specify in advance the number of changes to detect**. 11 | (Consider using penalized methods when this number is unknown.) 12 | 13 | The complexity of the dynamic programming approach is of the order $\mathcal{O}(CKn^2)$, where $K$ is the number of change points to detect, $n$ the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal. 14 | Consequently, piecewise constant models (`model=l2`) are significantly faster than linear or autoregressive models. 15 | 16 | To reduce the computational cost, you can consider only a subsample of possible change point indexes, by changing the `min_size` and `jump` arguments when instantiating [Dynp](#ruptures.detection.Dynp): 17 | 18 | - `min_size` controls the minimum distance between change points; for instance, if `min_size=10`, all change points will be at least 10 samples apart. 19 | - `jump` controls the grid of possible change points; for instance, if `jump=k`, only changes at `k, 2*k, 3*k,...` are considered. 20 | 21 | ## Usage 22 | 23 | ```python 24 | import numpy as np 25 | import matplotlib.pylab as plt 26 | import ruptures as rpt 27 | 28 | # creation of data 29 | n, dim = 500, 3 30 | n_bkps, sigma = 3, 5 31 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 32 | 33 | # change point detection 34 | model = "l1" # "l2", "rbf" 35 | algo = rpt.Dynp(model=model, min_size=3, jump=5).fit(signal) 36 | my_bkps = algo.predict(n_bkps=3) 37 | 38 | # show results 39 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6)) 40 | plt.show() 41 | ``` -------------------------------------------------------------------------------- /docs/user-guide/detection/kernelcpd.md: -------------------------------------------------------------------------------- 1 | # Kernel change point detection 2 | 3 | ## Problem formulation 4 | 5 | In this section, the kernel change point detection setting is briefly described. 6 | The interested reader can refer to [[Celisse2018](#Celisse2018), [Arlot2019](#Arlot2019)] for a more complete introduction.
7 | Let $y = \{y_0,y_1,\dots,y_{T-1}\}$ denote a $\mathbb{R}^d$-valued signal with $T$ samples. 8 | This signal is mapped onto a [reproducing Hilbert space (rkhs)](https://en.wikipedia.org/wiki/Reproducing_kernel_Hilbert_space) $\mathcal{H}$ associated with a user-defined kernel function $k(\cdot, \cdot):\mathbb{R}^d\times\mathbb{R}^d\rightarrow\mathbb{R}$. 9 | The mapping function $\phi:\mathbb{R}^d\rightarrow\mathcal{H}$ onto this rkhs is implicitly defined by $\phi(y_t) = k(y_t, \cdot)\in\mathcal{H}$ resulting in the following inner-product and norm: 10 | 11 | $$ 12 | \langle\phi(y_s)\mid\phi(y_t)\rangle_{\mathcal{H}} = k(y_s,y_t) 13 | $$ 14 | 15 | and 16 | 17 | $$ 18 | \|\phi(y_t)\|_{\mathcal{H}}^2 = k(y_t,y_t) 19 | $$ 20 | 21 | for any samples $y_s,y_t\in\mathbb{R}^d$. 22 | Kernel change point detection consists in finding mean-shifts in the mapped signal $\phi(y)$ by minimizing $V(\cdot)$ where 23 | 24 | $$ 25 | V(t_1,\dots,t_K) := \sum_{k=0}^K\sum_{t=t_k}^{t_{k+1}-1} \|\phi(y_t)-\bar{\mu}_{t_k..t_{k+1}}\|^2_{\mathcal{H}} 26 | $$ 27 | 28 | where $\bar{\mu}_{t_k..t_{k+1}}$ is the empirical mean of the sub-signal $\phi(y_{t_k}), \phi(y_{t_k+1}),\dots,\phi(y_{t_{k+1}-1})$, and $t_1,t_2,\dots,t_K$ are change point indexes, in increasing order. 29 | (By convention $t_0=0$ and $t_{K+1}=T$.) 30 | 31 | **If the number of changes is known beforehand**, we solve the following optimization problem, over all possible change positions $t_10$ is the smoothing parameter (provided by the user) and $\hat{K}$ is the estimated number of change points. 46 | Higher values of $\beta$ produce lower $\hat{K}$. 47 | The exact optimization procedure is described in [[Killick2012]](#Killick2012). 48 | 49 | ## Available kernels 50 | We list below a number of kernels that are already implemented in `ruptures`. 51 | In the following, $u$ and $v$ are two d-dimensional vectors and $\|\cdot\|$ is the Euclidean norm. 52 | 53 | | Kernel | Description | Cost function | 54 | | -------------------------- | --------------------------------------------------------------------------------------------------- | ---------------------------------------------------- | 55 | | Linear
`model="linear"` | $k_{\text{linear}}(u, v) = u^T v$. | [`CostL2`](../../user-guide/costs/costl2.md) | 56 | | Gaussian
`model="rbf"` | $k_{\text{Gaussian}}(u,v)=\exp(-\gamma \|u-v\|^2)$
where $\gamma>0$ is a user-defined parameter. | [`CostRbf`](../../user-guide/costs/costrbf.md) | 57 | | Cosine
`model="cosine"` | $k_{\text{cosine}}(u, v) = (u^T v)/(\|u\|\|v\|)$ | [`CostCosine`](../../user-guide/costs/costcosine.md) | 58 | 59 | 60 | ## Implementation and usage 61 | 62 | Kernel change point detection is implemented in the class [`KernelCPD`][ruptures.detection.kernelcpd.KernelCPD], which is a C implementation of dynamic programming and PELT. 63 | To see it in action, please look at the gallery of examples, in particular: 64 | 65 | - [Kernel change point detection: a performance comparison](../../examples/kernel-cpd-performance-comparison.ipynb) 66 | 67 | The exact class API is available [here][ruptures.detection.kernelcpd.KernelCPD]. 68 | 69 | ## References 70 | 71 | [Gretton2012] 72 | Gretton, A., Borgwardt, K. M., Rasch, M. J., Schölkopf, B., & Smola, A. (2012). A kernel two-sample test. The Journal of Machine Learning Research, 13, 723–773. 73 | 74 | [Killick2012] 75 | Killick, R., Fearnhead, P., & Eckley, I. (2012). Optimal detection of changepoints with a linear computational cost. Journal of the American Statistical Association, 107(500), 1590–1598. 76 | 77 | [Celisse2018] 78 | Celisse, A., Marot, G., Pierre-Jean, M., & Rigaill, G. (2018). New efficient algorithms for multiple change-point detection with reproducing kernels. Computational Statistics and Data Analysis, 128, 200–220. 79 | 80 | [Arlot2019] 81 | Arlot, S., Celisse, A., & Harchaoui, Z. (2019). A kernel multiple change-point algorithm via model selection. Journal of Machine Learning Research, 20(162), 1–56. 82 | -------------------------------------------------------------------------------- /docs/user-guide/detection/pelt.md: -------------------------------------------------------------------------------- 1 | # Linearly penalized segmentation (`Pelt`) 2 | 3 | # Description 4 | 5 | The method is implemented in [`Pelt`][ruptures.detection.pelt.Pelt]. 6 | 7 | Because the enumeration of all possible partitions impossible, the algorithm relies on a pruning rule. 8 | Many indexes are discarded, greatly reducing the computational cost while retaining the 9 | ability to find the optimal segmentation. 10 | The implementation follows [[Killick2012]](#Killick2012). 11 | In addition, under certain conditions on the change point repartition, the avarage computational complexity is of the order of $\mathcal{O}(CKn)$, where $K$ is the number of change points to detect, $n$ the number of samples and $C$ the complexity of calling the considered cost function on one sub-signal. 12 | Consequently, piecewise constant models (`model=l2`) are significantly faster than linear or autoregressive models. 13 | 14 | To reduce the computational cost, you can consider only a subsample of possible change point indexes, by changing the `min_size` and `jump` arguments when instantiating [Pelt](#ruptures.detection.Pelt): 15 | 16 | - `min_size` controls the minimum distance between change points; for instance, if `min_size=10`, all change points will be at least 10 samples apart. 17 | - `jump` controls the grid of possible change points; for instance, if `jump=k`, only changes at `k, 2*k, 3*k,...` are considered. 18 | 19 | 20 | ## Usage 21 | 22 | ```python 23 | import numpy as np 24 | import matplotlib.pylab as plt 25 | import ruptures as rpt 26 | 27 | # creation of data 28 | n, dim = 500, 3 29 | n_bkps, sigma = 3, 1 30 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 31 | 32 | # change point detection 33 | model = "l1" # "l2", "rbf" 34 | algo = rpt.Pelt(model=model, min_size=3, jump=5).fit(signal) 35 | my_bkps = algo.predict(pen=3) 36 | 37 | # show results 38 | fig, ax_arr = rpt.display(signal, bkps, my_bkps, figsize=(10, 6)) 39 | plt.show() 40 | ``` 41 | 42 | ## References 43 | 44 | [Killick2012] 45 | Killick, R., Fearnhead, P., & Eckley, I. (2012). Optimal detection of changepoints with a linear computational cost. Journal of the American Statistical Association, 107(500), 1590–1598. 46 | -------------------------------------------------------------------------------- /docs/user-guide/detection/window.md: -------------------------------------------------------------------------------- 1 | # Window-based change point detection (`Window`) 2 | 3 | ## Description 4 | 5 | Window-based change point detection is used to perform fast signal segmentation and is implemented in [`Window`][ruptures.detection.window.Window]. 6 | The algorithm uses two windows which slide along the data stream. 7 | The statistical properties of the signals within each window are compared with a discrepancy measure. 8 | For a given cost function $c(\cdot)$, a discrepancy measure is derived $d(\cdot,\cdot)$ as follows: 9 | 10 | $$ 11 | d(y_{u..v}, y_{v..w}) = c(y_{u..w}) - c(y_{u..v}) - c(y_{v..w}) 12 | $$ 13 | 14 | where $\{y_t\}_t$ is the input signal and $u < v < w$ are indexes. 15 | The discrepancy is the cost gain of splitting the sub-signal $y_{u..w}$ at the index $v$. 16 | If the sliding windows $u..v$ and $v..w$ both fall into a segment, their statistical properties are similar and the discrepancy between the first window and the second window is low. 17 | If the sliding windows fall into two dissimilar segments, the discrepancy is significantly higher, suggesting that the boundary between windows is a change point. 18 | The discrepancy curve is the curve, defined for all indexes $t$ between $w/2$ and $n-w/2$ ($n$ is the number of samples), 19 | 20 | $$ 21 | \big(t, d(y_{t-w/2..t}, y_{t..t+w/2})\big) 22 | $$ 23 | 24 | where $w$ is the window length. 25 | A sequential peak search is performed on the discrepancy curve in order to detect change points. 26 | 27 | The benefits of window-based segmentation includes low complexity (of the order of $\mathcal{O}(n w)$, where $n$ is the number of samples), the fact that it can extend any single change point detection method to detect multiple changes points and that it can work whether the number of regimes is known beforehand or not. 28 | 29 | ![](../../images/schema_fenetre.png) 30 |
Schematic view of the window sliding segmentation algorithm
31 | 32 | ## Usage 33 | 34 | Start with the usual imports and create a signal. 35 | 36 | ```python 37 | import numpy as np 38 | import matplotlib.pylab as plt 39 | import ruptures as rpt 40 | 41 | # creation of data 42 | n, dim = 500, 3 # number of samples, dimension 43 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 44 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 45 | ``` 46 | 47 | To perform a binary segmentation of a signal, initialize a [`Window`][ruptures.detection.window.Window] 48 | instance. 49 | 50 | ```python 51 | # change point detection 52 | model = "l2" # "l1", "rbf", "linear", "normal", "ar" 53 | algo = rpt.Window(width=40, model=model).fit(signal) 54 | my_bkps = algo.predict(n_bkps=3) 55 | 56 | # show results 57 | rpt.show.display(signal, bkps, my_bkps, figsize=(10, 6)) 58 | plt.show() 59 | ``` 60 | 61 | The window length (in number of samples) is modified through the argument `width`. 62 | Usual methods assume that the window length is smaller than the smallest regime length. 63 | 64 | In the situation in which the number of change points is unknown, one can specify a penalty using 65 | the `pen` parameter or a threshold on the residual norm using `epsilon`. 66 | 67 | ```python 68 | my_bkps = algo.predict(pen=np.log(n) * dim * sigma**2) 69 | # or 70 | my_bkps = algo.predict(epsilon=3 * n * sigma**2) 71 | ``` 72 | 73 | For faster predictions, one can modify the `jump` parameter during initialization. 74 | The higher it is, the faster the prediction is achieved (at the expense of precision). 75 | 76 | ```python 77 | algo = rpt.Window(model=model, jump=10).fit(signal) 78 | ``` -------------------------------------------------------------------------------- /docs/user-guide/evaluation.md: -------------------------------------------------------------------------------- 1 | # Evaluation and visualization 2 | 3 | -------------------------------------------------------------------------------- /docs/user-guide/index.md: -------------------------------------------------------------------------------- 1 | # User guide 2 | 3 | This section describes the algorithms and utility functions of `ruptures`. 4 | Each entry of the user guide is linked to a companion entry in the [Code reference](../code-reference/index.md) section, where the API is detailed. -------------------------------------------------------------------------------- /docs/user-guide/metrics/hausdorff.md: -------------------------------------------------------------------------------- 1 | # Hausdorff metric (`hausdorff`) 2 | 3 | ## Description 4 | 5 | The [`hausdorff`][ruptures.metrics.hausdorff.hausdorff] function computes the Hausdorff metric which measures the worst prediction error. 6 | Assume a set of change point indexes $t_1,t_2,\dots$ and their estimates $\hat{t}_1, \hat{t}_2,\dots$. 7 | The Hausdorff metric is then equal to 8 | 9 | $$ 10 | \text{Hausdorff}(\{t_k\}_k, \{\hat{t}_k\}_k) := \max \{ \max_k \min_l |t_k - \hat{t}_l| \, , \max_k \min_l |\hat{t}_k - t_l|\}. 11 | $$ 12 | 13 | ![](../../images/hausdorff.png) 14 |
Schematic example: true segmentation in gray, estimated segmentation in dashed lines. Here, Hausdorff is equal to $\max(\Delta t_1, \Delta t_2, \Delta t_3)$.
15 | 16 | ## Usage 17 | 18 | Start with the usual imports and create two segmentations to compare. 19 | 20 | ```python 21 | from ruptures.metrics import hausdorff 22 | 23 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500] 24 | print(hausdorff(bkps1, bkps2)) 25 | ``` -------------------------------------------------------------------------------- /docs/user-guide/metrics/precisionrecall.md: -------------------------------------------------------------------------------- 1 | # Precision and recall (`precision_recall`) 2 | 3 | ## Description 4 | 5 | The precision and recall of an estimated segmentation is computed by the function [`precision_recall`][ruptures.metrics.precisionrecall.precision_recall] as follows. 6 | A true change point is declared "detected" (or positive) if there is at least one computed change point at less than "margin" points from it. 7 | Formally, assume a set of change point indexes $t_1,t_2,\dots$ and their estimates $\hat{t}_1, \hat{t}_2,\dots$ 8 | In the context of change point detection, precision and recall are defined as follows: 9 | 10 | $$ 11 | \text{precision}:=|\text{TP}|/|\{\hat{t}_l\}_l| \quad \text{and}\quad\text{recall}:=|\text{TP}|/|\{t_k\}_k| 12 | $$ 13 | 14 | where, for a given margin $M$, true positives $\text{TP}$ are true change points for which there is an estimated one at less than $M$ samples, i.e. 15 | 16 | $$ 17 | \text{TP}:= \{t_k\,|\, \exists\, \hat{t}_l\,\, \text{s.t.}\, |\hat{t}_l - t_k|Schematic example: true segmentation in gray, estimated segmentation in dashed lines and margin in dashed areas. Here, precision is 2/3 and recall is 2/2. 22 | 23 | ## Usage 24 | 25 | Start with the usual imports and create two change point sets to compare. 26 | 27 | ```python 28 | from ruptures.metrics import precision_recall 29 | 30 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500] 31 | p, r = precision_recall(bkps1, bkps2) 32 | print((p, r)) 33 | ``` 34 | 35 | The margin parameter $M$ can be changed through the keyword `margin` (default is 10 samples). 36 | 37 | ```python 38 | p, r = precision_recall(bkps1, bkps2, margin=10) 39 | print((p, r)) 40 | p, r = precision_recall(bkps1, bkps2, margin=20) 41 | print((p, r)) 42 | ``` -------------------------------------------------------------------------------- /docs/user-guide/metrics/randindex.md: -------------------------------------------------------------------------------- 1 | # Rand index (`randindex`) 2 | 3 | ## Description 4 | 5 | The Rand index ($RI$) measures the similarity between two segmentations and is 6 | equal to the proportion of aggreement between two partitions. 7 | Formally, for $\mathcal{T}_1$ and $\mathcal{T}_2$ two partitions of $\{1, 2,\dots,T\}$, 8 | 9 | $$ 10 | RI := \frac{N_0 + N_1}{T(T+1)/2} 11 | $$ 12 | 13 | where 14 | 15 | - $N_0$ is the number of pairs of samples that belong to the same segment 16 | according to $\mathcal{T}_1$ and $\mathcal{T}_2$, 17 | - $N_1$ is the number of pairs of samples that belong to different segments 18 | according to $\mathcal{T}_1$ and $\mathcal{T}_2$. 19 | 20 | $RI$ is between 0 (total disagreement) and 1 (total agreement). 21 | It is available in the [`randindex`][ruptures.metrics.randindex.randindex] 22 | function which uses the efficient implementation of [[Prates2021]](#Prates2021). 23 | 24 | ## Usage 25 | 26 | Start with the usual imports and create two segmentations to compare. 27 | 28 | ```python 29 | from ruptures.metrics import randindex 30 | 31 | bkps1, bkps2 = [100, 200, 500], [105, 115, 350, 400, 500] 32 | print(randindex(bkps1, bkps2)) 33 | ``` 34 | 35 | ## References 36 | 37 | [Prates2021] 38 | Prates, L. (2021). A more efficient algorithm to compute the Rand Index for 39 | change-point problems. ArXiv:2112.03738. -------------------------------------------------------------------------------- /docs/user-guide/show/display.md: -------------------------------------------------------------------------------- 1 | # Display (`display`) 2 | 3 | ## Description 4 | 5 | The function [`display`][ruptures.show.display.display] displays a signal and the change points provided in alternating colors. 6 | If another set of change point indexes is provided, they are displayed with dashed vertical dashed lines. 7 | 8 | ## Usage 9 | 10 | Start with the usual imports and create a signal. 11 | 12 | ```python 13 | import numpy as np 14 | import matplotlib.pylab as plt 15 | import ruptures as rpt 16 | 17 | # creation of data 18 | n, dim = 500, 2 # number of samples, dimension 19 | n_bkps, sigma = 3, 5 # number of change points, noise standart deviation 20 | signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma) 21 | rpt.display(signal, bkps) 22 | ``` 23 | 24 | If we computed another set of change points, for instance `[110, 150, 320, 500]`, we can easily compare the two segmentations. 25 | 26 | ```python 27 | rpt.display(signal, bkps, [110, 150, 320, 500]) 28 | ``` 29 | 30 | ![](../../images/example-display.png) 31 |
Example output of the function [`display`][ruptures.show.display.display].
-------------------------------------------------------------------------------- /docs/what-is-cpd.md: -------------------------------------------------------------------------------- 1 | # Getting started 2 | 3 | ## What is change point detection? 4 | 5 | Under construction. 6 | In the meantime, you can refer to the associated review of methods [[Truong2020]](#Truong2020). 7 | 8 | ## References 9 | 10 | [Truong2020] 11 | Truong, C., Oudre, L., & Vayatis, N. (2020). Selective review of offline change point detection methods. *Signal Processing*, 167. [[abstract]](https://deepcharles.github.io/publication/sp-review-2020) [[doi]](https://doi.org/10.1016/j.sigpro.2019.107299) [[pdf]](http://deepcharles.github.io/files/sp-review-2020.pdf) -------------------------------------------------------------------------------- /images/example_readme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/example_readme.png -------------------------------------------------------------------------------- /images/pw_constant.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_constant.png -------------------------------------------------------------------------------- /images/pw_constantdp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_constantdp.png -------------------------------------------------------------------------------- /images/pw_linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_linear.png -------------------------------------------------------------------------------- /images/pw_linearpelt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/images/pw_linearpelt.png -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: ruptures 2 | repo_url: https://github.com/deepcharles/ruptures 3 | repo_name: deepcharles/ruptures 4 | docs_dir: docs 5 | site_dir: site 6 | plugins: 7 | - search 8 | - mkdocstrings: 9 | watch: 10 | - ruptures 11 | handlers: 12 | python: 13 | options: 14 | filters: 15 | - "!^_" # exclude all members starting with _ 16 | - "^__init__$" # but always include __init__ modules and methods 17 | - mknotebooks: 18 | execute: false 19 | binder: false 20 | - macros: 21 | module_name: mkdocs_macros 22 | - section-index 23 | markdown_extensions: 24 | - pymdownx.snippets: 25 | - pymdownx.tabbed: 26 | - pymdownx.emoji: 27 | - pymdownx.superfences: 28 | - pymdownx.arithmatex: 29 | generic: true 30 | - admonition: 31 | - toc: 32 | permalink: "#" 33 | - codehilite 34 | nav: 35 | - Home: 36 | - index.md 37 | - Getting started: 38 | - what-is-cpd.md 39 | - 'Installation': install.md 40 | - 'Basic usage': getting-started/basic-usage.ipynb 41 | - 'Fitting and predicting': fit-and-predict.md 42 | - 'Custom cost function': custom-cost-function.md 43 | - User guide: 44 | - user-guide/index.md 45 | - Search methods: 46 | - 'Dynamic programming': user-guide/detection/dynp.md 47 | - 'Pelt': user-guide/detection/pelt.md 48 | - 'Kernel change detection': user-guide/detection/kernelcpd.md 49 | - 'Binary segmentation': user-guide/detection/binseg.md 50 | - 'Bottom-up segmentation': user-guide/detection/bottomup.md 51 | - 'Window sliding segmentation': user-guide/detection/window.md 52 | - Cost functions: 53 | - 'CostL1': user-guide/costs/costl1.md 54 | - 'CostL2': user-guide/costs/costl2.md 55 | - 'CostNormal': user-guide/costs/costnormal.md 56 | - 'CostRbf': user-guide/costs/costrbf.md 57 | - 'CostCosine': user-guide/costs/costcosine.md 58 | - 'CostLinear': user-guide/costs/costlinear.md 59 | - 'CostCLinear': user-guide/costs/costclinear.md 60 | - 'CostRank': user-guide/costs/costrank.md 61 | - 'CostMl': user-guide/costs/costml.md 62 | - 'CostAR': user-guide/costs/costautoregressive.md 63 | - 'Custom cost': user-guide/costs/costcustom.md 64 | - Generate signals: 65 | - 'Piecewise constant': user-guide/datasets/pw_constant.md 66 | - 'Piecewise linear': user-guide/datasets/pw_linear.md 67 | - 'Piecewise Gaussian': user-guide/datasets/pw_normal.md 68 | - 'Piecewise sinusoidal': user-guide/datasets/pw_wavy.md 69 | - Evaluation metrics: 70 | - 'Precision and recall': user-guide/metrics/precisionrecall.md 71 | - 'Hausdorff metric': user-guide/metrics/hausdorff.md 72 | - 'Rand index': user-guide/metrics/randindex.md 73 | - Display: 74 | - user-guide/show/display.md 75 | - Gallery of examples: 76 | - examples/introduction.md 77 | - 'Simple usages': 78 | - 'Basic usage': examples/basic-usage.ipynb 79 | - 'Advanced usages': 80 | - 'Combining cost functions': examples/merging-cost-functions.ipynb 81 | - 'Kernel change point detection: a performance comparison': examples/kernel-cpd-performance-comparison.ipynb 82 | - 'Music segmentation': examples/music-segmentation.ipynb 83 | - 'Text segmentation': examples/text-segmentation.ipynb 84 | 85 | - Code reference: 86 | - code-reference/index.md 87 | - Base classes: code-reference/base-reference.md 88 | - Search methods: 89 | - Dynp: code-reference/detection/dynp-reference.md 90 | - KernelCPD: code-reference/detection/kernelcpd-reference.md 91 | - Pelt: code-reference/detection/pelt-reference.md 92 | - Binseg: code-reference/detection/binseg-reference.md 93 | - BottomUp: code-reference/detection/bottomup-reference.md 94 | - Window: code-reference/detection/window-reference.md 95 | - Cost functions: 96 | - 'CostL1': code-reference/costs/costl1-reference.md 97 | - 'CostL2': code-reference/costs/costl2-reference.md 98 | - 'CostNormal': code-reference/costs/costnormal-reference.md 99 | - 'CostRbf': code-reference/costs/costrbf-reference.md 100 | - 'CostCosine': code-reference/costs/costcosine-reference.md 101 | - 'CostLinear': code-reference/costs/costlinear-reference.md 102 | - 'CostCLinear': code-reference/costs/costclinear-reference.md 103 | - 'CostRank': code-reference/costs/costrank-reference.md 104 | - 'CostMl': code-reference/costs/costml-reference.md 105 | - 'CostAR': code-reference/costs/costautoregressive-reference.md 106 | - Data sets: 107 | - 'Piecewise constant': code-reference/datasets/pw_constant-reference.md 108 | - 'Piecewise linear': code-reference/datasets/pw_linear-reference.md 109 | - 'Piecewise normal': code-reference/datasets/pw_normal-reference.md 110 | - 'Piecewise wavy': code-reference/datasets/pw_wavy-reference.md 111 | - Metrics: 112 | - 'Precision and recall': code-reference/metrics/precisionrecall.md 113 | - 'Hausdorff metric': code-reference/metrics/hausdorff.md 114 | - 'Rand index': code-reference/metrics/randindex.md 115 | - Display: 116 | - 'Display function': code-reference/show/display.md 117 | - About: 118 | - 'Release notes': release-notes.md 119 | - 'Contributing': contributing.md 120 | - 'License': license.md 121 | theme: 122 | name: material 123 | features: 124 | - navigation.tabs 125 | - search.highlight 126 | extra_javascript: 127 | - javascripts/mathjax.js 128 | - https://polyfill.io/v3/polyfill.min.js?features=es6 129 | - https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js 130 | - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js 131 | -------------------------------------------------------------------------------- /mkdocs_macros.py: -------------------------------------------------------------------------------- 1 | """This module contains Mkdocs macros to modify the documentation. The 2 | documentation on mkdocs plugins is available. 3 | 4 | [here](https://mkdocs-macros-plugin.readthedocs.io/en/latest/). 5 | """ 6 | 7 | BINDER_URL = "https://mybinder.org/v2/gh" 8 | BRANCH = "master" 9 | BINDER_TEMPLATE = """ 10 | --> 11 | !!! info 12 | - Try this notebook in an executable environment with [Binder]({binder_link}). 13 | - Download this notebook [here]({download_link}). 14 | D_{s..t} = D[t] - D[s] 78 | // S{s..t} has been stored in S[s] 79 | c_cost = D[t] - D[s] - S[s] / (t - s); 80 | n_bkps_max = int_min(n_bkps, s / min_size); // integer division: s / min_size = floor(s / min_size) 81 | for (k = 1; k <= n_bkps_max; k++) 82 | { 83 | // With k break points on y_{0..t}, sum cost with (k-1) break points on y_{0..s} and cost on y_{s..t} 84 | c_cost_sum = M_V[s * (n_bkps + 1) + (k - 1)] + c_cost; 85 | if (s == k * min_size) 86 | { 87 | // k is the smallest possibility for s in order to have k break points in y_{0..s}. 88 | // It means that y_0, y_1, ..., y_k are break points. 89 | M_V[t * (n_bkps + 1) + k] = c_cost_sum; 90 | M_path[t * (n_bkps + 1) + k] = s; 91 | continue; 92 | } 93 | // Compare to current min 94 | if (M_V[t * (n_bkps + 1) + k] > c_cost_sum) 95 | { 96 | M_V[t * (n_bkps + 1) + k] = c_cost_sum; 97 | M_path[t * (n_bkps + 1) + k] = s; 98 | } 99 | } 100 | } 101 | } 102 | 103 | // Free memory 104 | free(D); 105 | free(S); 106 | free(M_V); 107 | 108 | return; 109 | } 110 | -------------------------------------------------------------------------------- /src/ruptures/detection/_detection/ekcpd_computation.h: -------------------------------------------------------------------------------- 1 | void ekcpd_compute(double *signal, int n_samples, int n_dims, int n_bkps, int min_size, void *kernelDescObj, int *M_path_res); -------------------------------------------------------------------------------- /src/ruptures/detection/_detection/ekcpd_pelt_computation.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "kernels.h" 6 | static inline int max_int(int a, int b) 7 | { 8 | if (a > b) 9 | return a; 10 | return b; 11 | } 12 | 13 | /** 14 | * @brief Efficient kernel change point detection 15 | * 16 | * @param signal shape (n_samples*n_dims,) 17 | * @param n_samples number of samples 18 | * @param n_dims number of dimensions 19 | * @param beta smoothing parameter 20 | * @param min_size minimum size of a segment 21 | * @param kernelDescObj describe the selected kernel 22 | * @param M_path path matrix of shape (n_samples+1), filled by the function 23 | */ 24 | void ekcpd_pelt_compute(double *signal, int n_samples, int n_dims, double beta, int min_size, void *kernelDescObj, int *M_path) 25 | { 26 | int t, s; 27 | int s_min = 0; 28 | 29 | // Allocate memory 30 | double *D, *S, *M_V, *M_pruning; 31 | double c_cost, c_cost_sum, c_r, diag_element; 32 | 33 | // Initialize and allocate memory 34 | // Allocate memory 35 | D = (double *)malloc((n_samples + 1) * sizeof(double)); 36 | S = (double *)malloc((n_samples + 1) * sizeof(double)); 37 | M_V = (double *)malloc((n_samples + 1) * sizeof(double)); 38 | M_pruning = (double *)malloc((n_samples + 1) * sizeof(double)); 39 | 40 | // D, S, M_V and M_path 41 | for (t = 0; t < (n_samples + 1); t++) 42 | { 43 | D[t] = 0.0; 44 | S[t] = 0.0; 45 | M_V[t] = 0.0; 46 | M_path[t] = 0; 47 | M_pruning[t] = 0.0; 48 | } 49 | 50 | // for t<2*min_size, there cannot be any change point. 51 | for (t = 1; t < 2 * min_size; t++) 52 | { 53 | diag_element = kernel_value_by_name(&(signal[(t - 1) * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj); 54 | D[t] = D[t - 1] + diag_element; 55 | 56 | // Compute S[t-1] = S_{t-1, t}, S[t-2] = S_{t-2, t}, ..., S[0] = S_{0, t} 57 | // S_{t-1, t} can be computed with S_{t-1, t-1}. 58 | // S_{t-1, t-1} was stored in S[t-1] 59 | // S_{t-1, t} will be stored in S[t-1] as well 60 | c_r = 0.0; 61 | for (s = t - 1; s >= 0; s--) 62 | { 63 | c_r += kernel_value_by_name(&(signal[s * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj); 64 | S[s] += 2 * c_r - diag_element; 65 | } 66 | c_cost = D[t] - D[0] - S[0] / t; 67 | M_V[t] = c_cost + beta; 68 | } 69 | 70 | // Computation loop 71 | // Handle y_{0..t} = {y_0, ..., y_{t-1}} 72 | for (t = 2 * min_size; t < (n_samples + 1); t++) 73 | { 74 | diag_element = kernel_value_by_name(&(signal[(t - 1) * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj); 75 | D[t] = D[t - 1] + diag_element; 76 | 77 | // Compute S[t-1] = S_{t-1, t}, S[t-2] = S_{t-2, t}, ..., S[0] = S_{0, t} 78 | // S_{t-1, t} can be computed with S_{t-1, t-1}. 79 | // S_{t-1, t-1} was stored in S[t-1] 80 | // S_{t-1, t} will be stored in S[t-1] as well 81 | c_r = 0.0; 82 | for (s = t - 1; s >= s_min; s--) 83 | { 84 | c_r += kernel_value_by_name(&(signal[s * n_dims]), &(signal[(t - 1) * n_dims]), n_dims, kernelDescObj); 85 | S[s] += 2 * c_r - diag_element; 86 | } 87 | 88 | // Compute segmentations 89 | // Store the total cost on y_{0..t} with 0 break points in M_V[t, 0] 90 | // init 91 | s = s_min; 92 | c_cost = D[t] - D[s] - S[s] / (t - s); 93 | c_cost_sum = M_V[s] + c_cost; 94 | M_pruning[s] = c_cost_sum; 95 | c_cost_sum += beta; 96 | M_V[t] = c_cost_sum; 97 | M_path[t] = s; 98 | // search for minimum (penalized) sum of cost 99 | for (s = max_int(s_min + 1, min_size); s < t - min_size + 1; s++) 100 | { 101 | // Compute cost on y_{s..t} 102 | // D_{s..t} = D_{0..t} - D{0..s} <--> D_{s..t} = D[t] - D[s] 103 | // S{s..t} has been stored in S[s] 104 | c_cost = D[t] - D[s] - S[s] / (t - s); 105 | c_cost_sum = M_V[s] + c_cost; 106 | M_pruning[s] = c_cost_sum; 107 | c_cost_sum += beta; 108 | // Compare to current min 109 | if (M_V[t] > c_cost_sum) 110 | { 111 | M_V[t] = c_cost_sum; 112 | M_path[t] = s; 113 | } 114 | } 115 | // Pruning 116 | while ((M_pruning[s_min] >= M_V[t]) && (s_min < t - min_size + 1)) 117 | { 118 | if (s_min == 0) 119 | { 120 | s_min += min_size; 121 | } 122 | else 123 | { 124 | s_min++; 125 | } 126 | } 127 | } 128 | 129 | // Free memory 130 | free(D); 131 | free(S); 132 | free(M_V); 133 | free(M_pruning); 134 | 135 | return; 136 | } 137 | -------------------------------------------------------------------------------- /src/ruptures/detection/_detection/ekcpd_pelt_computation.h: -------------------------------------------------------------------------------- 1 | void ekcpd_pelt_compute(double *signal, int n_samples, int n_dims, double beta, int min_size, void *kernelDescObj, int *M_path); 2 | -------------------------------------------------------------------------------- /src/ruptures/detection/_detection/kernels.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "kernels.h" 5 | 6 | /************************************* 7 | * 8 | * Utils 9 | * 10 | *************************************/ 11 | 12 | static inline float min_f(float a, float b) 13 | { 14 | if (a > b) 15 | return b; 16 | return a; 17 | } 18 | 19 | static inline float max_f(float a, float b) 20 | { 21 | if (a > b) 22 | return a; 23 | return b; 24 | } 25 | 26 | float clip(float n, float lower, float upper) 27 | { 28 | return max_f(lower, min_f(n, upper)); 29 | } 30 | 31 | /************************************* 32 | * 33 | * Kernels 34 | * 35 | *************************************/ 36 | 37 | static inline double linear_kernel(double *x, double *y, int n_dims) 38 | { 39 | double kernel_value = 0.0; 40 | int dim; 41 | for (dim = 0; dim < n_dims; dim++) 42 | { 43 | kernel_value = kernel_value + x[dim] * y[dim]; 44 | } 45 | return (kernel_value); 46 | } 47 | 48 | static inline double gaussian_kernel(double *x, double *y, int n_dims, double gamma) 49 | { 50 | double squared_distance = 0.0; 51 | int t; 52 | for (t = 0; t < n_dims; t++) 53 | { 54 | squared_distance = squared_distance + (x[t] - y[t]) * (x[t] - y[t]); 55 | } 56 | // clipping to avoid exp under/overflow 57 | return exp(-clip(gamma * squared_distance, 0.01, 100)); 58 | } 59 | 60 | static inline double cosine_similarity(double *x, double *y, int n_dims) 61 | { 62 | double dot = 0.0, denom_x = 0.0, denom_y = 0.0 ; 63 | int i; 64 | for(i = 0; i < n_dims; i++) { 65 | dot += x[i] * y[i] ; 66 | denom_x += x[i] * x[i] ; 67 | denom_y += y[i] * y[i] ; 68 | } 69 | return dot / (sqrt(denom_x) * sqrt(denom_y)) ; 70 | } 71 | 72 | 73 | 74 | // Hub function that select proper kernel accoridng kernelObj 75 | double kernel_value_by_name(double *x, double *y, int n_dims, void *kernelObj) 76 | { 77 | if (strcmp(((KernelLinear *)kernelObj)->pBaseObj->name, LINEAR_KERNEL_NAME) == 0) 78 | { 79 | return linear_kernel(x, y, n_dims); 80 | } 81 | else if (strcmp(((KernelGaussian *)kernelObj)->pBaseObj->name, GAUSSIAN_KERNEL_NAME) == 0) 82 | { 83 | return gaussian_kernel(x, y, n_dims, ((KernelGaussian *)kernelObj)->gamma); 84 | } 85 | else if (strcmp(((KernelCosine *)kernelObj)->pBaseObj->name, COSINE_KERNEL_NAME) == 0) 86 | { 87 | return cosine_similarity(x, y, n_dims); 88 | } 89 | return 0.0; 90 | } 91 | -------------------------------------------------------------------------------- /src/ruptures/detection/_detection/kernels.h: -------------------------------------------------------------------------------- 1 | #define MAX_KERNEL_NAME_LENGTH 10 2 | #define LINEAR_KERNEL_NAME "linear\0" 3 | #define GAUSSIAN_KERNEL_NAME "gaussian\0" 4 | #define COSINE_KERNEL_NAME "cosine\0" 5 | 6 | typedef struct KernelGeneric { 7 | char *name; 8 | } KernelGeneric; 9 | 10 | typedef struct KernelLinear { 11 | KernelGeneric *pBaseObj; 12 | } KernelLinear; 13 | 14 | typedef struct KernelGaussian { 15 | KernelGeneric *pBaseObj; 16 | double gamma; 17 | } KernelGaussian; 18 | 19 | typedef struct KernelCosine { 20 | KernelGeneric *pBaseObj; 21 | } KernelCosine; 22 | 23 | double kernel_value_by_name(double *x, double *y, int n_dims, void *kernelObj); 24 | -------------------------------------------------------------------------------- /src/ruptures/detection/binseg.py: -------------------------------------------------------------------------------- 1 | r"""Binary segmentation.""" 2 | 3 | from functools import lru_cache 4 | 5 | import numpy as np 6 | from ruptures.base import BaseCost, BaseEstimator 7 | from ruptures.costs import cost_factory 8 | from ruptures.exceptions import BadSegmentationParameters 9 | from ruptures.utils import pairwise, sanity_check 10 | 11 | 12 | class Binseg(BaseEstimator): 13 | """Binary segmentation.""" 14 | 15 | def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None): 16 | """Initialize a Binseg instance. 17 | 18 | Args: 19 | model (str, optional): segment model, ["l1", "l2", "rbf",...]. Not used if ``'custom_cost'`` is not None. 20 | custom_cost (BaseCost, optional): custom cost function. Defaults to None. 21 | min_size (int, optional): minimum segment length. Defaults to 2 samples. 22 | jump (int, optional): subsample (one every *jump* points). Defaults to 5 samples. 23 | params (dict, optional): a dictionary of parameters for the cost instance. 24 | """ 25 | if custom_cost is not None and isinstance(custom_cost, BaseCost): 26 | self.cost = custom_cost 27 | else: 28 | if params is None: 29 | self.cost = cost_factory(model=model) 30 | else: 31 | self.cost = cost_factory(model=model, **params) 32 | self.min_size = max(min_size, self.cost.min_size) 33 | self.jump = jump 34 | self.n_samples = None 35 | self.signal = None 36 | 37 | def _seg(self, n_bkps=None, pen=None, epsilon=None): 38 | """Computes the binary segmentation. 39 | 40 | The stopping rule depends on the parameter passed to the function. 41 | 42 | Args: 43 | n_bkps (int): number of breakpoints to find before stopping. 44 | penalty (float): penalty value (>0) 45 | epsilon (float): reconstruction budget (>0) 46 | 47 | Returns: 48 | dict: partition dict {(start, end): cost value,...} 49 | """ 50 | # initialization 51 | bkps = [self.n_samples] 52 | stop = False 53 | while not stop: 54 | stop = True 55 | new_bkps = [ 56 | self.single_bkp(start, end) for start, end in pairwise([0] + bkps) 57 | ] 58 | bkp, gain = max(new_bkps, key=lambda x: x[1]) 59 | 60 | if bkp is None: # all possible configuration have been explored. 61 | break 62 | 63 | if n_bkps is not None: 64 | if len(bkps) - 1 < n_bkps: 65 | stop = False 66 | elif pen is not None: 67 | if gain > pen: 68 | stop = False 69 | elif epsilon is not None: 70 | error = self.cost.sum_of_costs(bkps) 71 | if error > epsilon: 72 | stop = False 73 | 74 | if not stop: 75 | bkps.append(bkp) 76 | bkps.sort() 77 | partition = { 78 | (start, end): self.cost.error(start, end) 79 | for start, end in pairwise([0] + bkps) 80 | } 81 | return partition 82 | 83 | @lru_cache(maxsize=None) 84 | def single_bkp(self, start, end): 85 | """Return the optimal breakpoint of [start:end] (if it exists).""" 86 | segment_cost = self.cost.error(start, end) 87 | if np.isinf(segment_cost) and segment_cost < 0: # if cost is -inf 88 | return None, 0 89 | gain_list = list() 90 | for bkp in range(start, end, self.jump): 91 | if bkp - start >= self.min_size and end - bkp >= self.min_size: 92 | gain = ( 93 | segment_cost 94 | - self.cost.error(start, bkp) 95 | - self.cost.error(bkp, end) 96 | ) 97 | gain_list.append((gain, bkp)) 98 | try: 99 | gain, bkp = max(gain_list) 100 | except ValueError: # if empty sub_sampling 101 | return None, 0 102 | return bkp, gain 103 | 104 | def fit(self, signal) -> "Binseg": 105 | """Compute params to segment signal. 106 | 107 | Args: 108 | signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,). 109 | 110 | Returns: 111 | self 112 | """ 113 | # update some params 114 | if signal.ndim == 1: 115 | self.signal = signal.reshape(-1, 1) 116 | else: 117 | self.signal = signal 118 | self.n_samples, _ = self.signal.shape 119 | self.cost.fit(signal) 120 | self.single_bkp.cache_clear() 121 | 122 | return self 123 | 124 | def predict(self, n_bkps=None, pen=None, epsilon=None): 125 | """Return the optimal breakpoints. 126 | 127 | Must be called after the fit method. The breakpoints are associated with the 128 | signal passed to [`fit()`][ruptures.detection.binseg.Binseg.fit]. 129 | The stopping rule depends on the parameter passed to the function. 130 | 131 | Args: 132 | n_bkps (int): number of breakpoints to find before stopping. 133 | pen (float): penalty value (>0) 134 | epsilon (float): reconstruction budget (>0) 135 | 136 | Raises: 137 | AssertionError: if none of `n_bkps`, `pen`, `epsilon` is set. 138 | BadSegmentationParameters: in case of impossible segmentation 139 | configuration 140 | 141 | Returns: 142 | list: sorted list of breakpoints 143 | """ 144 | msg = "Give a parameter." 145 | assert any(param is not None for param in (n_bkps, pen, epsilon)), msg 146 | 147 | # raise an exception in case of impossible segmentation configuration 148 | if not sanity_check( 149 | n_samples=self.cost.signal.shape[0], 150 | n_bkps=0 if n_bkps is None else n_bkps, 151 | jump=self.jump, 152 | min_size=self.min_size, 153 | ): 154 | raise BadSegmentationParameters 155 | 156 | partition = self._seg(n_bkps=n_bkps, pen=pen, epsilon=epsilon) 157 | bkps = sorted(e for s, e in partition.keys()) 158 | return bkps 159 | 160 | def fit_predict(self, signal, n_bkps=None, pen=None, epsilon=None): 161 | """Fit to the signal and return the optimal breakpoints. 162 | 163 | Helper method to call fit and predict once 164 | 165 | Args: 166 | signal (array): signal. Shape (n_samples, n_features) or (n_samples,). 167 | n_bkps (int): number of breakpoints. 168 | pen (float): penalty value (>0) 169 | epsilon (float): reconstruction budget (>0) 170 | 171 | Returns: 172 | list: sorted list of breakpoints 173 | """ 174 | self.fit(signal) 175 | return self.predict(n_bkps=n_bkps, pen=pen, epsilon=epsilon) 176 | -------------------------------------------------------------------------------- /src/ruptures/detection/dynp.py: -------------------------------------------------------------------------------- 1 | r"""Dynamic programming.""" 2 | 3 | from functools import lru_cache 4 | 5 | from ruptures.utils import sanity_check 6 | from ruptures.costs import cost_factory 7 | from ruptures.base import BaseCost, BaseEstimator 8 | from ruptures.exceptions import BadSegmentationParameters 9 | 10 | 11 | class Dynp(BaseEstimator): 12 | """Find optimal change points using dynamic programming. 13 | 14 | Given a segment model, it computes the best partition for which the 15 | sum of errors is minimum. 16 | """ 17 | 18 | def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None): 19 | """Creates a Dynp instance. 20 | 21 | Args: 22 | model (str, optional): segment model, ["l1", "l2", "rbf"]. Not used if ``'custom_cost'`` is not None. 23 | custom_cost (BaseCost, optional): custom cost function. Defaults to None. 24 | min_size (int, optional): minimum segment length. 25 | jump (int, optional): subsample (one every *jump* points). 26 | params (dict, optional): a dictionary of parameters for the cost instance. 27 | """ 28 | if custom_cost is not None and isinstance(custom_cost, BaseCost): 29 | self.cost = custom_cost 30 | else: 31 | self.model_name = model 32 | if params is None: 33 | self.cost = cost_factory(model=model) 34 | else: 35 | self.cost = cost_factory(model=model, **params) 36 | self.min_size = max(min_size, self.cost.min_size) 37 | self.jump = jump 38 | self.n_samples = None 39 | 40 | @lru_cache(maxsize=None) 41 | def seg(self, start, end, n_bkps): 42 | """Recurrence to find the optimal partition of signal[start:end]. 43 | 44 | This method is to be memoized and then used. 45 | 46 | Args: 47 | start (int): start of the segment (inclusive) 48 | end (int): end of the segment (exclusive) 49 | n_bkps (int): number of breakpoints 50 | 51 | Returns: 52 | dict: {(start, end): cost value, ...} 53 | """ 54 | jump, min_size = self.jump, self.min_size 55 | 56 | if n_bkps == 0: 57 | cost = self.cost.error(start, end) 58 | return {(start, end): cost} 59 | elif n_bkps > 0: 60 | # Let's fill the list of admissible last breakpoints 61 | multiple_of_jump = (k for k in range(start, end) if k % jump == 0) 62 | admissible_bkps = list() 63 | for bkp in multiple_of_jump: 64 | n_samples = bkp - start 65 | # first check if left subproblem is possible 66 | if sanity_check( 67 | n_samples=n_samples, 68 | n_bkps=n_bkps - 1, 69 | jump=jump, 70 | min_size=min_size, 71 | ): 72 | # second check if the right subproblem has enough points 73 | if end - bkp >= min_size: 74 | admissible_bkps.append(bkp) 75 | 76 | assert ( 77 | len(admissible_bkps) > 0 78 | ), "No admissible last breakpoints found.\ 79 | start, end: ({},{}), n_bkps: {}.".format( 80 | start, end, n_bkps 81 | ) 82 | 83 | # Compute the subproblems 84 | sub_problems = list() 85 | for bkp in admissible_bkps: 86 | left_partition = self.seg(start, bkp, n_bkps - 1) 87 | right_partition = self.seg(bkp, end, 0) 88 | tmp_partition = dict(left_partition) 89 | tmp_partition[(bkp, end)] = right_partition[(bkp, end)] 90 | sub_problems.append(tmp_partition) 91 | 92 | # Find the optimal partition 93 | return min(sub_problems, key=lambda d: sum(d.values())) 94 | 95 | def fit(self, signal) -> "Dynp": 96 | """Create the cache associated with the signal. 97 | 98 | Dynamic programming is a recurrence; intermediate results are cached to speed up 99 | computations. This method sets up the cache. 100 | 101 | Args: 102 | signal (array): signal. Shape (n_samples, n_features) or (n_samples,). 103 | 104 | Returns: 105 | self 106 | """ 107 | # clear cache 108 | self.seg.cache_clear() 109 | # update some params 110 | self.cost.fit(signal) 111 | self.n_samples = signal.shape[0] 112 | return self 113 | 114 | def predict(self, n_bkps): 115 | """Return the optimal breakpoints. 116 | 117 | Must be called after the fit method. The breakpoints are associated with the signal passed 118 | to [`fit()`][ruptures.detection.dynp.Dynp.fit]. 119 | 120 | Args: 121 | n_bkps (int): number of breakpoints. 122 | 123 | Raises: 124 | BadSegmentationParameters: in case of impossible segmentation 125 | configuration 126 | 127 | Returns: 128 | list: sorted list of breakpoints 129 | """ 130 | # raise an exception in case of impossible segmentation configuration 131 | if not sanity_check( 132 | n_samples=self.cost.signal.shape[0], 133 | n_bkps=n_bkps, 134 | jump=self.jump, 135 | min_size=self.min_size, 136 | ): 137 | raise BadSegmentationParameters 138 | partition = self.seg(0, self.n_samples, n_bkps) 139 | bkps = sorted(e for s, e in partition.keys()) 140 | return bkps 141 | 142 | def fit_predict(self, signal, n_bkps): 143 | """Fit to the signal and return the optimal breakpoints. 144 | 145 | Helper method to call fit and predict once 146 | 147 | Args: 148 | signal (array): signal. Shape (n_samples, n_features) or (n_samples,). 149 | n_bkps (int): number of breakpoints. 150 | 151 | Returns: 152 | list: sorted list of breakpoints 153 | """ 154 | self.fit(signal) 155 | return self.predict(n_bkps) 156 | -------------------------------------------------------------------------------- /src/ruptures/detection/pelt.py: -------------------------------------------------------------------------------- 1 | r"""Pelt.""" 2 | 3 | from math import floor 4 | 5 | from ruptures.costs import cost_factory 6 | from ruptures.base import BaseCost, BaseEstimator 7 | from ruptures.exceptions import BadSegmentationParameters 8 | from ruptures.utils import sanity_check 9 | 10 | 11 | class Pelt(BaseEstimator): 12 | """Penalized change point detection. 13 | 14 | For a given model and penalty level, computes the segmentation which 15 | minimizes the constrained sum of approximation errors. 16 | """ 17 | 18 | def __init__(self, model="l2", custom_cost=None, min_size=2, jump=5, params=None): 19 | """Initialize a Pelt instance. 20 | 21 | Args: 22 | model (str, optional): segment model, ["l1", "l2", "rbf"]. Not used if ``'custom_cost'`` is not None. 23 | custom_cost (BaseCost, optional): custom cost function. Defaults to None. 24 | min_size (int, optional): minimum segment length. 25 | jump (int, optional): subsample (one every *jump* points). 26 | params (dict, optional): a dictionary of parameters for the cost instance. 27 | """ 28 | if custom_cost is not None and isinstance(custom_cost, BaseCost): 29 | self.cost = custom_cost 30 | else: 31 | if params is None: 32 | self.cost = cost_factory(model=model) 33 | else: 34 | self.cost = cost_factory(model=model, **params) 35 | self.min_size = max(min_size, self.cost.min_size) 36 | self.jump = jump 37 | self.n_samples = None 38 | 39 | def _seg(self, pen): 40 | """Computes the segmentation for a given penalty using PELT (or a list 41 | of penalties). 42 | 43 | Args: 44 | penalty (float): penalty value 45 | 46 | Returns: 47 | dict: partition dict {(start, end): cost value,...} 48 | """ 49 | # initialization 50 | # partitions[t] contains the optimal partition of signal[0:t] 51 | partitions = dict() # this dict will be recursively filled 52 | partitions[0] = {(0, 0): 0} 53 | admissible = [] 54 | 55 | # Recursion 56 | ind = [k for k in range(0, self.n_samples, self.jump) if k >= self.min_size] 57 | ind += [self.n_samples] 58 | for bkp in ind: 59 | # adding a point to the admissible set from the previous loop. 60 | new_adm_pt = floor((bkp - self.min_size) / self.jump) 61 | new_adm_pt *= self.jump 62 | admissible.append(new_adm_pt) 63 | 64 | subproblems = list() 65 | for t in admissible: 66 | # left partition 67 | try: 68 | tmp_partition = partitions[t].copy() 69 | except KeyError: # no partition of 0:t exists 70 | continue 71 | # we update with the right partition 72 | tmp_partition.update({(t, bkp): self.cost.error(t, bkp) + pen}) 73 | subproblems.append(tmp_partition) 74 | 75 | # finding the optimal partition 76 | partitions[bkp] = min(subproblems, key=lambda d: sum(d.values())) 77 | # trimming the admissible set 78 | admissible = [ 79 | t 80 | for t, partition in zip(admissible, subproblems) 81 | if sum(partition.values()) <= sum(partitions[bkp].values()) + pen 82 | ] 83 | 84 | best_partition = partitions[self.n_samples] 85 | del best_partition[(0, 0)] 86 | return best_partition 87 | 88 | def fit(self, signal) -> "Pelt": 89 | """Set params. 90 | 91 | Args: 92 | signal (array): signal to segment. Shape (n_samples, n_features) or (n_samples,). 93 | 94 | Returns: 95 | self 96 | """ 97 | # update params 98 | self.cost.fit(signal) 99 | if signal.ndim == 1: 100 | (n_samples,) = signal.shape 101 | else: 102 | n_samples, _ = signal.shape 103 | self.n_samples = n_samples 104 | return self 105 | 106 | def predict(self, pen): 107 | """Return the optimal breakpoints. 108 | 109 | Must be called after the fit method. The breakpoints are associated with the signal passed 110 | to [`fit()`][ruptures.detection.pelt.Pelt.fit]. 111 | 112 | Args: 113 | pen (float): penalty value (>0) 114 | 115 | Raises: 116 | BadSegmentationParameters: in case of impossible segmentation 117 | configuration 118 | 119 | Returns: 120 | list: sorted list of breakpoints 121 | """ 122 | # raise an exception in case of impossible segmentation configuration 123 | if not sanity_check( 124 | n_samples=self.cost.signal.shape[0], 125 | n_bkps=0, 126 | jump=self.jump, 127 | min_size=self.min_size, 128 | ): 129 | raise BadSegmentationParameters 130 | 131 | partition = self._seg(pen) 132 | bkps = sorted(e for s, e in partition.keys()) 133 | return bkps 134 | 135 | def fit_predict(self, signal, pen): 136 | """Fit to the signal and return the optimal breakpoints. 137 | 138 | Helper method to call fit and predict once 139 | 140 | Args: 141 | signal (array): signal. Shape (n_samples, n_features) or (n_samples,). 142 | pen (float): penalty value (>0) 143 | 144 | Returns: 145 | list: sorted list of breakpoints 146 | """ 147 | self.fit(signal) 148 | return self.predict(pen) 149 | -------------------------------------------------------------------------------- /src/ruptures/exceptions.py: -------------------------------------------------------------------------------- 1 | """The `ruptures.exceptions` module includes all custom warnings and error 2 | classes used across ruptures.""" 3 | 4 | 5 | class NotEnoughPoints(Exception): 6 | """Raise this exception when there is not enough point to calculate a cost 7 | function.""" 8 | 9 | 10 | class BadSegmentationParameters(Exception): 11 | """Raise this exception when a segmentation is not possible given the 12 | parameters.""" 13 | -------------------------------------------------------------------------------- /src/ruptures/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | r""" 2 | ==================================================================================================== 3 | Evaluation 4 | ==================================================================================================== 5 | 6 | :mod:`ruptures.metrics` provides metrics to evaluate change point detection performances and 7 | :mod:`ruptures.show` provides a display function for visual inspection. 8 | 9 | .. toctree:: 10 | :glob: 11 | :maxdepth: 1 12 | 13 | hausdorff 14 | randindex 15 | precision 16 | display 17 | 18 | """ 19 | 20 | from .hausdorff import hausdorff 21 | from .timeerror import meantime 22 | from .precisionrecall import precision_recall 23 | from .hamming import hamming 24 | from .randindex import randindex 25 | -------------------------------------------------------------------------------- /src/ruptures/metrics/hamming.py: -------------------------------------------------------------------------------- 1 | """Hamming metric for segmentation.""" 2 | 3 | from ruptures.metrics.randindex import randindex 4 | 5 | 6 | def hamming(bkps1, bkps2): 7 | """Modified Hamming distance for partitions. For all pair of points (x, y), 8 | x != y, the functions computes the number of times the two partitions 9 | disagree. The result is scaled to be within 0 and 1. 10 | 11 | Args: 12 | bkps1 (list): list of the last index of each regime. 13 | bkps2 (list): list of the last index of each regime. 14 | 15 | Returns: 16 | float: Hamming distance. 17 | """ 18 | return 1 - randindex(bkps1=bkps1, bkps2=bkps2) 19 | -------------------------------------------------------------------------------- /src/ruptures/metrics/hausdorff.py: -------------------------------------------------------------------------------- 1 | r"""Hausdorff metric.""" 2 | 3 | import numpy as np 4 | from scipy.spatial.distance import cdist 5 | from ruptures.metrics.sanity_check import sanity_check 6 | 7 | 8 | def hausdorff(bkps1, bkps2): 9 | """Compute the Hausdorff distance between changepoints. 10 | 11 | Args: 12 | bkps1 (list): list of the last index of each regime. 13 | bkps2 (list): list of the last index of each regime. 14 | 15 | Returns: 16 | float: Hausdorff distance. 17 | """ 18 | sanity_check(bkps1, bkps2) 19 | bkps1_arr = np.array(bkps1[:-1]).reshape(-1, 1) 20 | bkps2_arr = np.array(bkps2[:-1]).reshape(-1, 1) 21 | pw_dist = cdist(bkps1_arr, bkps2_arr) 22 | res = max(pw_dist.min(axis=0).max(), pw_dist.min(axis=1).max()) 23 | return res 24 | -------------------------------------------------------------------------------- /src/ruptures/metrics/precisionrecall.py: -------------------------------------------------------------------------------- 1 | r"""Precision and recall.""" 2 | 3 | from itertools import product 4 | 5 | from ruptures.metrics.sanity_check import sanity_check 6 | 7 | 8 | def precision_recall(true_bkps, my_bkps, margin=10): 9 | """Calculate the precision/recall of an estimated segmentation compared 10 | with the true segmentation. 11 | 12 | Args: 13 | true_bkps (list): list of the last index of each regime (true 14 | partition). 15 | my_bkps (list): list of the last index of each regime (computed 16 | partition). 17 | margin (int, optional): allowed error (in points). 18 | 19 | Returns: 20 | tuple: (precision, recall) 21 | """ 22 | sanity_check(true_bkps, my_bkps) 23 | assert margin > 0, "Margin of error must be positive (margin = {})".format(margin) 24 | 25 | if len(my_bkps) == 1: 26 | return 0, 0 27 | 28 | used = set() 29 | true_pos = set( 30 | true_b 31 | for true_b, my_b in product(true_bkps[:-1], my_bkps[:-1]) 32 | if my_b - margin < true_b < my_b + margin 33 | and not (my_b in used or used.add(my_b)) 34 | ) 35 | 36 | tp_ = len(true_pos) 37 | precision = tp_ / (len(my_bkps) - 1) 38 | recall = tp_ / (len(true_bkps) - 1) 39 | return precision, recall 40 | -------------------------------------------------------------------------------- /src/ruptures/metrics/randindex.py: -------------------------------------------------------------------------------- 1 | r"""Rand index (`randindex`)""" 2 | 3 | from ruptures.metrics.sanity_check import sanity_check 4 | 5 | 6 | def randindex(bkps1, bkps2): 7 | """Computes the Rand index (between 0 and 1) between two segmentations. 8 | 9 | The Rand index (RI) measures the similarity between two segmentations and 10 | is equal to the proportion of aggreement between two partitions. 11 | 12 | RI is between 0 (total disagreement) and 1 (total agreement). 13 | This function uses the efficient implementation of [1]. 14 | 15 | [1] Prates, L. (2021). A more efficient algorithm to compute the Rand Index for 16 | change-point problems. ArXiv:2112.03738. 17 | 18 | Args: 19 | bkps1 (list): sorted list of the last index of each regime. 20 | bkps2 (list): sorted list of the last index of each regime. 21 | 22 | Returns: 23 | float: Rand index 24 | """ 25 | sanity_check(bkps1, bkps2) 26 | n_samples = bkps1[-1] 27 | bkps1_with_0 = [0] + bkps1 28 | bkps2_with_0 = [0] + bkps2 29 | n_bkps1 = len(bkps1) 30 | n_bkps2 = len(bkps2) 31 | 32 | disagreement = 0 33 | beginj: int = 0 # avoids unnecessary computations 34 | for index_bkps1 in range(n_bkps1): 35 | start1: int = bkps1_with_0[index_bkps1] 36 | end1: int = bkps1_with_0[index_bkps1 + 1] 37 | for index_bkps2 in range(beginj, n_bkps2): 38 | start2: int = bkps2_with_0[index_bkps2] 39 | end2: int = bkps2_with_0[index_bkps2 + 1] 40 | nij = max(min(end1, end2) - max(start1, start2), 0) 41 | disagreement += nij * abs(end1 - end2) 42 | 43 | # we can skip the rest of the iteration, nij will be 0 44 | if end1 < end2: 45 | break 46 | else: 47 | beginj = index_bkps2 + 1 48 | 49 | disagreement /= n_samples * (n_samples - 1) / 2 50 | return 1.0 - disagreement 51 | -------------------------------------------------------------------------------- /src/ruptures/metrics/sanity_check.py: -------------------------------------------------------------------------------- 1 | """Helper function to check if two breakpoints list are comparable.""" 2 | 3 | 4 | class BadPartitions(Exception): 5 | """Exception raised when the partition is bad.""" 6 | 7 | pass 8 | 9 | 10 | def sanity_check(bkps1, bkps2): 11 | """Checks if two partitions are indeed partitions of the same signal. 12 | 13 | Args: 14 | bkps1 (list): list of the last index of each regime. 15 | bkps2 (list): list of the last index of each regime. 16 | 17 | Raises: 18 | BadPartitions: whenever a partition does not respect some conditions. 19 | 20 | Returns: 21 | None: 22 | """ 23 | # checks if empty. 24 | for nom, bkps in zip(("first", "second"), (bkps1, bkps2)): 25 | if len(bkps) == 0: 26 | raise BadPartitions("The {} partition is empty.".format(nom)) 27 | # checks if both ends with the same index. 28 | if max(bkps1) != max(bkps2): 29 | raise BadPartitions( 30 | "The end of the last regime is not the same for each of the " 31 | "partitions:\n{}\n{}".format(bkps1, bkps2) 32 | ) 33 | # checks if there is repetition. 34 | for bkps in (bkps1, bkps2): 35 | seen = set() 36 | if any(i in seen or seen.add(i) for i in bkps): 37 | raise BadPartitions("Some indexes are repeated: {}".format(bkps)) 38 | -------------------------------------------------------------------------------- /src/ruptures/metrics/timeerror.py: -------------------------------------------------------------------------------- 1 | """Mean time error.""" 2 | 3 | import numpy as np 4 | from scipy.spatial.distance import cdist 5 | 6 | from ruptures.metrics.sanity_check import sanity_check 7 | 8 | 9 | def meantime(true_bkps, my_bkps): 10 | """For each computed changepoint, the mean time error is the average number 11 | of points to the closest true changepoint. Not a symetric funtion. 12 | 13 | Args: 14 | true_bkps (list): list of the last index of each regime (true 15 | partition). 16 | my_bkps (list): list of the last index of each regime (computed 17 | partition) 18 | 19 | Returns: 20 | float: mean time error. 21 | """ 22 | sanity_check(true_bkps, my_bkps) 23 | true_bkps_arr = np.array(true_bkps[:-1]).reshape(-1, 1) 24 | my_bkps_arr = np.array(my_bkps[:-1]).reshape(-1, 1) 25 | pw_dist = cdist(true_bkps_arr, my_bkps_arr) 26 | 27 | dist_from_true = pw_dist.min(axis=0) 28 | assert len(dist_from_true) == len(my_bkps) - 1 29 | 30 | return dist_from_true.mean() 31 | -------------------------------------------------------------------------------- /src/ruptures/show/__init__.py: -------------------------------------------------------------------------------- 1 | from .display import display 2 | -------------------------------------------------------------------------------- /src/ruptures/show/display.py: -------------------------------------------------------------------------------- 1 | r"""Display.""" 2 | 3 | from itertools import cycle 4 | 5 | import numpy as np 6 | 7 | from ruptures.utils import pairwise 8 | 9 | COLOR_CYCLE = ["#4286f4", "#f44174"] 10 | 11 | 12 | class MatplotlibMissingError(RuntimeError): 13 | pass 14 | 15 | 16 | def display( 17 | signal, 18 | true_chg_pts, 19 | computed_chg_pts=None, 20 | computed_chg_pts_color="k", 21 | computed_chg_pts_linewidth=3, 22 | computed_chg_pts_linestyle="--", 23 | computed_chg_pts_alpha=1.0, 24 | **kwargs 25 | ): 26 | """Display a signal and the change points provided in alternating colors. 27 | If another set of change point is provided, they are displayed with dashed 28 | vertical dashed lines. The following matplotlib subplots options is set by 29 | default, but can be changed when calling `display`): 30 | 31 | - figure size `figsize`, defaults to `(10, 2 * n_features)`. 32 | 33 | Args: 34 | signal (array): signal array, shape (n_samples,) or (n_samples, n_features). 35 | true_chg_pts (list): list of change point indexes. 36 | computed_chg_pts (list, optional): list of change point indexes. 37 | computed_chg_pts_color (str, optional): color of the lines indicating 38 | the computed_chg_pts. Defaults to "k". 39 | computed_chg_pts_linewidth (int, optional): linewidth of the lines 40 | indicating the computed_chg_pts. Defaults to 3. 41 | computed_chg_pts_linestyle (str, optional): linestyle of the lines 42 | indicating the computed_chg_pts. Defaults to "--". 43 | computed_chg_pts_alpha (float, optional): alpha of the lines indicating 44 | the computed_chg_pts. Defaults to "1.0". 45 | **kwargs : all additional keyword arguments are passed to the plt.subplots call. 46 | 47 | Returns: 48 | tuple: (figure, axarr) with a :class:`matplotlib.figure.Figure` object and an array of Axes objects. 49 | """ 50 | try: 51 | import matplotlib.pyplot as plt 52 | except ImportError: 53 | raise MatplotlibMissingError( 54 | "This feature requires the optional dependency matpotlib, you can install it using `pip install matplotlib`." 55 | ) 56 | 57 | if type(signal) != np.ndarray: 58 | # Try to get array from Pandas dataframe 59 | signal = signal.values 60 | 61 | if signal.ndim == 1: 62 | signal = signal.reshape(-1, 1) 63 | n_samples, n_features = signal.shape 64 | 65 | # let's set a sensible defaut size for the subplots 66 | matplotlib_options = { 67 | "figsize": (10, 2 * n_features), # figure size 68 | } 69 | # add/update the options given by the user 70 | matplotlib_options.update(kwargs) 71 | 72 | # create plots 73 | fig, axarr = plt.subplots(n_features, sharex=True, **matplotlib_options) 74 | if n_features == 1: 75 | axarr = [axarr] 76 | 77 | for axe, sig in zip(axarr, signal.T): 78 | color_cycle = cycle(COLOR_CYCLE) 79 | # plot s 80 | axe.plot(range(n_samples), sig) 81 | 82 | # color each (true) regime 83 | bkps = [0] + sorted(true_chg_pts) 84 | alpha = 0.2 # transparency of the colored background 85 | 86 | for (start, end), col in zip(pairwise(bkps), color_cycle): 87 | axe.axvspan(max(0, start - 0.5), end - 0.5, facecolor=col, alpha=alpha) 88 | # vertical lines to mark the computed_chg_pts 89 | if computed_chg_pts is not None: 90 | for bkp in computed_chg_pts: 91 | if bkp != 0 and bkp < n_samples: 92 | axe.axvline( 93 | x=bkp - 0.5, 94 | color=computed_chg_pts_color, 95 | linewidth=computed_chg_pts_linewidth, 96 | linestyle=computed_chg_pts_linestyle, 97 | alpha=computed_chg_pts_alpha, 98 | ) 99 | 100 | fig.tight_layout() 101 | 102 | return fig, axarr 103 | -------------------------------------------------------------------------------- /src/ruptures/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Import utils functions.""" 2 | 3 | from ruptures.utils._utils.convert_path_matrix import from_path_matrix_to_bkps_list 4 | 5 | from .bnode import Bnode 6 | from .drawbkps import draw_bkps 7 | from .utils import pairwise, sanity_check, unzip 8 | -------------------------------------------------------------------------------- /src/ruptures/utils/_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/src/ruptures/utils/_utils/__init__.py -------------------------------------------------------------------------------- /src/ruptures/utils/_utils/convert_path_matrix.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "convert_path_matrix_c.h": 2 | void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list) -------------------------------------------------------------------------------- /src/ruptures/utils/_utils/convert_path_matrix.pyx: -------------------------------------------------------------------------------- 1 | cimport ruptures.utils._utils.convert_path_matrix as convert_pm 2 | 3 | cimport cython 4 | import numpy as np 5 | 6 | cpdef from_path_matrix_to_bkps_list(int[:] path_matrix_flat, int n_bkps, int n_samples, int n_bkps_max, int jump): 7 | # Init bkps_list array 8 | cdef int[::1] bkps_list = np.empty((n_bkps+1), dtype=np.dtype("i")) 9 | try: 10 | convert_pm.convert_path_matrix_c(&path_matrix_flat[0], n_bkps, n_samples, n_bkps_max, jump, &bkps_list[0]) 11 | except: 12 | print("An exception occurred.") 13 | return np.asarray(bkps_list).tolist() -------------------------------------------------------------------------------- /src/ruptures/utils/_utils/convert_path_matrix_c.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list) 4 | { 5 | int q = (int)ceil((float)n_samples / (float)jump); 6 | bkps_list[n_bkps] = q; 7 | int k = 0; 8 | while (k++ < n_bkps) 9 | { 10 | bkps_list[n_bkps - k] = path_matrix[bkps_list[n_bkps - k + 1] * (n_bkps_max + 1) + (n_bkps - k + 1)]; 11 | } 12 | for (k = 0 ; k < n_bkps + 1 ; k++) 13 | { 14 | bkps_list[k] = bkps_list[k] * jump; 15 | } 16 | bkps_list[n_bkps] = n_samples; 17 | return; 18 | } 19 | -------------------------------------------------------------------------------- /src/ruptures/utils/_utils/convert_path_matrix_c.h: -------------------------------------------------------------------------------- 1 | void convert_path_matrix_c(int *path_matrix, int n_bkps, int n_samples, int n_bkps_max, int jump, int *bkps_list); 2 | -------------------------------------------------------------------------------- /src/ruptures/utils/bnode.py: -------------------------------------------------------------------------------- 1 | """Binary node.""" 2 | 3 | import functools 4 | import numpy as np 5 | 6 | 7 | @functools.total_ordering 8 | class Bnode: 9 | """Binary node. 10 | 11 | In binary segmentation, each segment [start, end) is a binary node. 12 | """ 13 | 14 | def __init__(self, start, end, val, left=None, right=None, parent=None): 15 | self.start = start 16 | self.end = end 17 | self.val = val 18 | self.left = left 19 | self.right = right 20 | self.parent = parent 21 | 22 | @property 23 | def gain(self): 24 | """Return the cost decrease when splitting this node.""" 25 | if self.left is None or self.right is None: 26 | return 0 27 | elif np.isinf(self.val) and self.val < 0: 28 | return 0 29 | return self.val - (self.left.val + self.right.val) 30 | 31 | def __lt__(self, other): 32 | return self.start < other.start 33 | 34 | def __eq__(self, other): 35 | return ( 36 | isinstance(other, self.__class__) 37 | and self.start == other.start 38 | and self.end == other.end 39 | ) 40 | 41 | def __hash__(self): 42 | return hash((self.__class__, self.start, self.end)) 43 | -------------------------------------------------------------------------------- /src/ruptures/utils/drawbkps.py: -------------------------------------------------------------------------------- 1 | r"""Draw a random partition.""" 2 | 3 | import numpy as np 4 | 5 | 6 | def draw_bkps(n_samples=100, n_bkps=3, seed=None): 7 | """Draw a random partition with specified number of samples and specified 8 | number of changes.""" 9 | rng = np.random.default_rng(seed=seed) 10 | alpha = np.ones(n_bkps + 1) / (n_bkps + 1) * 2000 11 | bkps = np.cumsum(rng.dirichlet(alpha) * n_samples).astype(int).tolist() 12 | bkps[-1] = n_samples 13 | return bkps 14 | -------------------------------------------------------------------------------- /src/ruptures/utils/utils.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous functions for ruptures.""" 2 | 3 | from itertools import tee 4 | from math import ceil 5 | 6 | 7 | def pairwise(iterable): 8 | """S -> (s0,s1), (s1,s2), (s2, s3), ...""" 9 | a, b = tee(iterable) 10 | next(b, None) 11 | return zip(a, b) 12 | 13 | 14 | def unzip(seq): 15 | """Reverse zip.""" 16 | return zip(*seq) 17 | 18 | 19 | def sanity_check(n_samples, n_bkps, jump, min_size): 20 | """Check if a partition if possible given some segmentation parameters. 21 | 22 | Args: 23 | n_samples (int): number of point in the signal 24 | n_bkps (int): number of breakpoints 25 | jump (int): the start index of each regime can only be a multiple of 26 | "jump" (and the end index = -1 modulo "jump"). 27 | min_size (int): minimum size of a segment. 28 | 29 | Returns: 30 | bool: True if there exists a potential configuration of 31 | breakpoints for the given parameters. False if it does not. 32 | """ 33 | n_adm_bkps = n_samples // jump # number of admissible breakpoints 34 | 35 | # Are there enough points for the given number of regimes? 36 | if n_bkps > n_adm_bkps: 37 | return False 38 | if n_bkps * ceil(min_size / jump) * jump + min_size > n_samples: 39 | return False 40 | return True 41 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepcharles/ruptures/39a837b7e8d3e08d04d33f14749a1a39d86183d2/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_bnode.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ruptures.utils import Bnode 3 | 4 | 5 | def test_bnode(): 6 | left = Bnode(start=100, end=120, val=1) 7 | right = Bnode(start=120, end=200, val=1) 8 | 9 | # bad merging, no right leaf 10 | merged_node = Bnode(start=left.start, end=right.end, left=left, right=None, val=3) 11 | assert merged_node.gain == 0 12 | 13 | # bad merging, no left leaf 14 | merged_node = Bnode(start=left.start, end=right.end, left=None, right=right, val=3) 15 | assert merged_node.gain == 0 16 | 17 | # bad merging, negative infinit val 18 | merged_node = Bnode( 19 | start=left.start, end=right.end, left=left, right=right, val=-np.inf 20 | ) 21 | assert merged_node.gain == 0 22 | 23 | # normal merging 24 | merged_node = Bnode( 25 | start=left.start, 26 | end=right.end, 27 | left=left, 28 | right=right, 29 | val=left.val + right.val + 1, 30 | ) 31 | assert merged_node.gain == merged_node.val - (left.val + right.val) 32 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | import pytest 4 | import numpy as np 5 | 6 | from ruptures.datasets import pw_constant, pw_linear, pw_normal, pw_wavy 7 | 8 | 9 | @pytest.mark.parametrize("func", [pw_constant, pw_linear, pw_normal, pw_wavy]) 10 | def test_empty_arg(func): 11 | func() 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "func, n_samples, n_features, n_bkps, noise_std", 16 | product([pw_constant], range(20, 1000, 200), range(1, 4), [2, 5, 3], [None, 1, 2]), 17 | ) 18 | def test_constant(func, n_samples, n_features, n_bkps, noise_std): 19 | signal, bkps = func( 20 | n_samples=n_samples, n_features=n_features, n_bkps=n_bkps, noise_std=noise_std 21 | ) 22 | assert signal.shape == (n_samples, n_features) 23 | assert len(bkps) == n_bkps + 1 24 | assert bkps[-1] == n_samples 25 | 26 | 27 | def test_seed(n_samples=200, n_features=3, n_bkps=5, noise_std=1, seed=12345): 28 | # pw_constant 29 | signal1, bkps1 = pw_constant( 30 | n_samples=n_samples, 31 | n_features=n_features, 32 | n_bkps=n_bkps, 33 | noise_std=noise_std, 34 | seed=seed, 35 | ) 36 | signal2, bkps2 = pw_constant( 37 | n_samples=n_samples, 38 | n_features=n_features, 39 | n_bkps=n_bkps, 40 | noise_std=noise_std, 41 | seed=seed, 42 | ) 43 | assert np.allclose(signal1, signal2) 44 | assert bkps1 == bkps2 45 | 46 | # pw_normal 47 | signal1, bkps1 = pw_normal(n_samples=n_samples, n_bkps=n_bkps, seed=seed) 48 | signal2, bkps2 = pw_normal(n_samples=n_samples, n_bkps=n_bkps, seed=seed) 49 | assert np.allclose(signal1, signal2) 50 | assert bkps1 == bkps2 51 | 52 | # pw_linear 53 | signal1, bkps1 = pw_linear( 54 | n_samples=n_samples, 55 | n_features=n_features, 56 | n_bkps=n_bkps, 57 | noise_std=noise_std, 58 | seed=seed, 59 | ) 60 | signal2, bkps2 = pw_linear( 61 | n_samples=n_samples, 62 | n_features=n_features, 63 | n_bkps=n_bkps, 64 | noise_std=noise_std, 65 | seed=seed, 66 | ) 67 | assert np.allclose(signal1, signal2) 68 | assert bkps1 == bkps2 69 | 70 | # pw_wavy 71 | signal1, bkps1 = pw_wavy( 72 | n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std, seed=seed 73 | ) 74 | signal2, bkps2 = pw_wavy( 75 | n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std, seed=seed 76 | ) 77 | assert np.allclose(signal1, signal2) 78 | assert bkps1 == bkps2 79 | 80 | 81 | @pytest.mark.parametrize( 82 | "func, n_samples, n_features, n_bkps, noise_std", 83 | product([pw_linear], range(20, 1000, 200), range(1, 4), [2, 5, 3], [None, 1, 2]), 84 | ) 85 | def test_linear(func, n_samples, n_features, n_bkps, noise_std): 86 | signal, bkps = func( 87 | n_samples=n_samples, n_features=n_features, n_bkps=n_bkps, noise_std=noise_std 88 | ) 89 | assert signal.shape == (n_samples, n_features + 1) 90 | assert len(bkps) == n_bkps + 1 91 | assert bkps[-1] == n_samples 92 | 93 | 94 | @pytest.mark.parametrize( 95 | "func, n_samples, n_bkps, noise_std", 96 | product([pw_wavy], range(20, 1000, 200), [2, 5, 3], [None, 1, 2]), 97 | ) 98 | def test_wavy(func, n_samples, n_bkps, noise_std): 99 | signal, bkps = func(n_samples=n_samples, n_bkps=n_bkps, noise_std=noise_std) 100 | assert signal.shape == (n_samples,) 101 | assert len(bkps) == n_bkps + 1 102 | assert bkps[-1] == n_samples 103 | 104 | 105 | @pytest.mark.parametrize( 106 | "func, n_samples, n_bkps", product([pw_normal], range(20, 1000, 200), [2, 5, 3]) 107 | ) 108 | def test_normal(func, n_samples, n_bkps): 109 | signal, bkps = func(n_samples=n_samples, n_bkps=n_bkps) 110 | assert signal.shape == (n_samples, 2) 111 | assert len(bkps) == n_bkps + 1 112 | assert bkps[-1] == n_samples 113 | -------------------------------------------------------------------------------- /tests/test_display.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ruptures.datasets import pw_constant 4 | from ruptures.show import display 5 | from ruptures.show.display import MatplotlibMissingError 6 | 7 | 8 | @pytest.fixture(scope="module") 9 | def signal_bkps(): 10 | signal, bkps = pw_constant() 11 | return signal, bkps 12 | 13 | 14 | def test_display_with_options(signal_bkps): 15 | try: 16 | signal, bkps = signal_bkps 17 | fig, axarr = display(signal, bkps) 18 | fig, axarr = display(signal, bkps, bkps) 19 | figsize = (20, 10) # figure size 20 | fig, axarr = display( 21 | signal, 22 | bkps, 23 | figsize=figsize, 24 | ) 25 | fig, axarr = display( 26 | signal[:, 0], 27 | bkps, 28 | figsize=figsize, 29 | ) 30 | except MatplotlibMissingError: 31 | pytest.skip("matplotlib is not installed") 32 | 33 | 34 | def test_display_without_options(signal_bkps): 35 | try: 36 | signal, bkps = signal_bkps 37 | fig, axarr = display(signal, bkps) 38 | fig, axarr = display(signal, bkps, bkps) 39 | figsize = (20, 10) # figure size 40 | fig, axarr = display(signal, bkps) 41 | fig, axarr = display(signal[:, 0], bkps) 42 | except MatplotlibMissingError: 43 | pytest.skip("matplotlib is not installed") 44 | 45 | 46 | def test_display_with_new_options(signal_bkps): 47 | try: 48 | signal, bkps = signal_bkps 49 | fig, axarr = display(signal, bkps) 50 | fig, axarr = display(signal, bkps, bkps) 51 | 52 | fig, axarr = display(signal, bkps, facecolor="k", edgecolor="b") 53 | fig, axarr = display(signal[:, 0], bkps, facecolor="k", edgecolor="b") 54 | except MatplotlibMissingError: 55 | pytest.skip("matplotlib is not installed") 56 | 57 | 58 | def test_display_with_computed_chg_pts_options(signal_bkps): 59 | try: 60 | signal, bkps = signal_bkps 61 | fig, axarr = display(signal, bkps) 62 | fig, axarr = display(signal, bkps, bkps) 63 | 64 | fig, axarr = display(signal, bkps, bkps, computed_chg_pts_color="k") 65 | fig, axarr = display( 66 | signal, bkps, bkps, computed_chg_pts_color="k", computed_chg_pts_linewidth=3 67 | ) 68 | fig, axarr = display( 69 | signal, 70 | bkps, 71 | bkps, 72 | computed_chg_pts_color="k", 73 | computed_chg_pts_linewidth=3, 74 | computed_chg_pts_linestyle="--", 75 | ) 76 | fig, axarr = display( 77 | signal, 78 | bkps, 79 | bkps, 80 | computed_chg_pts_color="k", 81 | computed_chg_pts_linewidth=3, 82 | computed_chg_pts_linestyle="--", 83 | computed_chg_pts_alpha=1.0, 84 | ) 85 | except MatplotlibMissingError: 86 | pytest.skip("matplotlib is not installed") 87 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from ruptures.metrics import ( 4 | hamming, 5 | hausdorff, 6 | meantime, 7 | precision_recall, 8 | randindex, 9 | ) 10 | from ruptures.metrics.sanity_check import BadPartitions 11 | 12 | 13 | @pytest.fixture(scope="module") 14 | def b_mb(): 15 | return [100, 200, 350, 400, 500], [101, 201, 301, 401, 500] 16 | 17 | 18 | def test_hausdorff(b_mb): 19 | b, mb = b_mb 20 | m = hausdorff(b, mb) 21 | assert m > 0 22 | m = hausdorff(b, b) 23 | assert m == 0 24 | 25 | 26 | def test_randindex(b_mb): 27 | b, mb = b_mb 28 | m = randindex(b, mb) 29 | assert 1 > m > 0 30 | m = randindex(b, b) 31 | assert m == 1 32 | 33 | 34 | def test_meantime(b_mb): 35 | b, mb = b_mb 36 | m = meantime(b, mb) 37 | assert m > 0 38 | m = meantime(b, b) 39 | assert m == 0 40 | 41 | 42 | @pytest.mark.parametrize("margin", range(1, 20, 2)) 43 | def test_precision_recall(b_mb, margin): 44 | b, mb = b_mb 45 | p, r = precision_recall(b, mb, margin=margin) 46 | assert 0 <= p < 1 47 | assert 0 <= r < 1 48 | p, r = precision_recall(b, b, margin=margin) 49 | assert (p, r) == (1, 1) 50 | p, r = precision_recall(b, [b[-1]], margin=margin) 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "metric", [hamming, hausdorff, meantime, precision_recall, randindex] 55 | ) 56 | def test_exception(b_mb, metric): 57 | true_bkps, my_bkps = b_mb 58 | with pytest.raises(BadPartitions): 59 | m = metric(true_bkps, []) 60 | with pytest.raises(BadPartitions): 61 | m = metric([], my_bkps) 62 | with pytest.raises(BadPartitions): 63 | m = metric([10, 10, 500], [10, 500]) 64 | with pytest.raises(BadPartitions): 65 | m = metric([10, 500], [10, 501]) 66 | --------------------------------------------------------------------------------